From ad6068c7568db380994dd50f6610bd4251b8d7a2 Mon Sep 17 00:00:00 2001 From: "Josh M. London" Date: Thu, 25 Jun 2026 15:32:39 -0700 Subject: [PATCH 1/2] demo bucket access from workstations - initial commit --- R/bucket_access.R | 71 +++++++++++++++++++++++ docs/bucket-access-demo.md | 116 +++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 R/bucket_access.R create mode 100644 docs/bucket-access-demo.md diff --git a/R/bucket_access.R b/R/bucket_access.R new file mode 100644 index 0000000..a6eb277 --- /dev/null +++ b/R/bucket_access.R @@ -0,0 +1,71 @@ +# Demo: access a Google Cloud Storage bucket from R running in a +# Google Cloud Workstation. +# +# This example shows how to: +# 1. authenticate with Application Default Credentials (ADC), +# 2. confirm access to a bucket, +# 3. upload a small example file, and +# 4. download that file back into R. +# +# Before running this script, authenticate once from the workstation terminal: +# gcloud auth application-default login --no-browser +# +# That command starts a copy/paste authentication flow: +# 1. copy the long URL from the workstation terminal, +# 2. open it in a browser on your local machine, +# 3. sign in and approve access, +# 4. copy the resulting code back into the workstation terminal. +# +# Alternative interactive approach: +# You can use `credentials_user_oauth2(scopes = scope)` instead of +# `token_fetch()` if you prefer to initiate authentication from R. +# +# Packages: +# install.packages(c("googleCloudStorageR", "gargle", "readr")) + +library(gargle) +library(googleCloudStorageR) +library(readr) + +# Bucket can live in a different project as long as the user or service account +# has permission on that bucket. +bucket_name <- "josh-london" + +# Full Cloud Platform scope is convenient for demos, but narrower scopes are +# preferable in production workflows when possible. +scope <- "https://www.googleapis.com/auth/cloud-platform" + +# Retrieve Application Default Credentials and use them for GCS requests. +token <- token_fetch(scopes = scope) +gcs_auth(token = token) + +# Confirm access to the bucket. +gcs_list_objects(bucket = bucket_name) + +# Set a default bucket so later calls can omit `bucket =`. +gcs_global_bucket(bucket_name) + +# Write a small example file locally, then upload it to the bucket. +local_file <- tempfile(fileext = ".csv") +write_csv(mtcars, local_file) + +gcs_upload( + file = local_file, + name = "testing/mtcars.csv" +) + +# Verify that the object now exists in the bucket. +gcs_list_objects(bucket = bucket_name) + +# Download the uploaded object. +downloaded_file <- tempfile(fileext = ".csv") +gcs_get_object( + object_name = "testing/mtcars.csv", + bucket = bucket_name, + saveToDisk = downloaded_file, + overwrite = TRUE +) + +# Read the downloaded file back into R. +mtcars_from_bucket <- read_csv(downloaded_file, show_col_types = FALSE) +head(mtcars_from_bucket) diff --git a/docs/bucket-access-demo.md b/docs/bucket-access-demo.md new file mode 100644 index 0000000..b6113bb --- /dev/null +++ b/docs/bucket-access-demo.md @@ -0,0 +1,116 @@ +# Bucket access demo from an R Workstation + +This guide explains how to use [R/bucket_access.R](../R/bucket_access.R) to test read and write access from a Google Cloud Workstation to a Google Cloud Storage bucket, including a bucket that lives in a different GCP project. + +## What this demo does + +The script is intentionally small. It demonstrates four steps: + +1. authenticate from R using Application Default Credentials (ADC), +2. confirm that the authenticated identity can access the target bucket, +3. upload a small example CSV file to the bucket, and +4. download that file back into R. + +This is a useful smoke test when you want to verify that a workstation can interact with a bucket before building a larger workflow. + +## What you need before running it + +You should have: + +- access to a Google Cloud Workstation, +- the ability to run `gcloud` from the workstation terminal, +- permission to access the target bucket, and +- the required R packages installed. + +Install the R packages if needed: + +```r +install.packages(c("googleCloudStorageR", "gargle", "readr")) +``` + +## One-time authentication setup + +From the workstation terminal, run: + +```bash +gcloud auth application-default login --no-browser +``` + +This starts a manual browser-based flow. In practice, the steps are: + +1. copy the long authentication URL from the workstation terminal, +2. open that URL in a browser on your local machine, +3. sign in with the account that has bucket access, +4. approve the request, +5. copy the returned code, and +6. paste it back into the workstation terminal. + +This creates Application Default Credentials that R can use through `gargle::token_fetch()`. + +## How to configure the script + +Open [R/bucket_access.R](../R/bucket_access.R) and update this value: + +```r +bucket_name <- "your-bucket-name" +``` + +The bucket can live in a different project from the workstation as long as the authenticated identity has the necessary permissions on that bucket. + +## How to run the demo + +Run the script in R from the workstation. + +The script will: + +- fetch ADC credentials, +- authenticate with Google Cloud Storage, +- list objects in `bucket_name`, +- upload `mtcars` as `testing/mtcars.csv`, +- download the same object to a temporary local file, and +- read the downloaded CSV back into R. + +A successful run is a good indication that both authentication and bucket permissions are working. + +## Common permission requirements + +Exact IAM roles vary, but the authenticated identity generally needs permission to: + +- access the target bucket, +- create objects in the bucket to test upload, and +- get objects from the bucket to test download. + +For this demo, bucket-level permissions matter more than project-level visibility. A user can successfully read from and write to a known bucket in another project without being able to list all buckets in that project. + +## Typical failure points + +### Authentication fails + +If `token_fetch()` fails, the workstation may not have valid Application Default Credentials yet. Re-run: + +```bash +gcloud auth application-default login --no-browser +``` + +### Bucket listing, upload, or download fails + +If calls involving `bucket_name` fail, verify: + +- the bucket name is correct, +- the bucket exists, +- your identity has read and/or write permissions, and +- any organization or project policy allows cross-project access. + +## Why the script writes a temporary file + +`googleCloudStorageR::gcs_upload()` is clearest when you upload a file from disk. The script writes `mtcars` to a temporary CSV first, then uploads that file. This makes the demo easier to understand and mirrors many real workflows where outputs are written locally and then copied to cloud storage. + +## Suggested next step for real workflows + +For production code, it is usually worth adapting this demo in three ways: + +1. move `bucket_name` and object paths into a small config section, +2. add explicit error handling and messages around authentication and uploads, and +3. avoid broad scopes when a narrower scope will do. + +If you want a more reusable version, a natural next step is to turn this into a small helper function or parameterized script. From a1cc34fe98db3e5a90276c7c847fc0b8741ab06e Mon Sep 17 00:00:00 2001 From: "Josh M. London" Date: Fri, 26 Jun 2026 12:37:38 -0700 Subject: [PATCH 2/2] updated documentation --- ...et-access-demo.md => bucket-access-demo.md | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) rename docs/bucket-access-demo.md => bucket-access-demo.md (78%) diff --git a/docs/bucket-access-demo.md b/bucket-access-demo.md similarity index 78% rename from docs/bucket-access-demo.md rename to bucket-access-demo.md index b6113bb..439c68f 100644 --- a/docs/bucket-access-demo.md +++ b/bucket-access-demo.md @@ -1,8 +1,10 @@ -# Bucket access demo from an R Workstation +# GCS Bucket access demo from Google Cloud Workstation (RStudio or Positron) -This guide explains how to use [R/bucket_access.R](../R/bucket_access.R) to test read and write access from a Google Cloud Workstation to a Google Cloud Storage bucket, including a bucket that lives in a different GCP project. +This guide explains [R/bucket_access.R](R/bucket_access.R) as a demonstration for +read and write access from a Google Cloud Workstation to a Google Cloud Storage Bucket, +including a bucket that lives in a different GCP project. -## What this demo does +## What the demo script does The script is intentionally small. It demonstrates four steps: @@ -11,7 +13,8 @@ The script is intentionally small. It demonstrates four steps: 3. upload a small example CSV file to the bucket, and 4. download that file back into R. -This is a useful smoke test when you want to verify that a workstation can interact with a bucket before building a larger workflow. +This is simple test process to verify that a workstation can interact with a bucket. +Users can use this as a starting point for implementing their own workflows. ## What you need before running it @@ -30,6 +33,11 @@ install.packages(c("googleCloudStorageR", "gargle", "readr")) ## One-time authentication setup +The recommended best practice for authentication is to use Application Default +Credentials (ADC). This creates a persistent authentication on the workstation +after a one-time process. The `gcloud` CLI application is installed by default +on all Google Workstations + From the workstation terminal, run: ```bash @@ -45,11 +53,12 @@ This starts a manual browser-based flow. In practice, the steps are: 5. copy the returned code, and 6. paste it back into the workstation terminal. -This creates Application Default Credentials that R can use through `gargle::token_fetch()`. +This creates Application Default Credentials that R can use +through `gargle::token_fetch()`. ## How to configure the script -Open [R/bucket_access.R](../R/bucket_access.R) and update this value: +Open [R/bucket_access.R](R/bucket_access.R) and update this value: ```r bucket_name <- "your-bucket-name" @@ -112,5 +121,3 @@ For production code, it is usually worth adapting this demo in three ways: 1. move `bucket_name` and object paths into a small config section, 2. add explicit error handling and messages around authentication and uploads, and 3. avoid broad scopes when a narrower scope will do. - -If you want a more reusable version, a natural next step is to turn this into a small helper function or parameterized script.