From d5012cb2843522bb90d3c953d198223cf0a510d0 Mon Sep 17 00:00:00 2001 From: bhandarivijay Date: Wed, 4 Mar 2026 17:54:30 +0000 Subject: [PATCH 01/20] chore: Migrate gsutil usage to gcloud storage --- README.md | 2 +- alluxio/alluxio.sh | 2 +- beam/README.md | 2 +- beam/beam.sh | 2 +- conda/README.md | 10 +++++----- connectors/connectors.sh | 2 +- dask/README.md | 2 +- hbase/hbase.sh | 10 +++++----- hive-llap/llap.sh | 2 +- mlvm/mlvm.sh | 8 ++++---- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9e143d414..541a872f8 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ from upstream in the cluster: ```bash BUCKET= CLUSTER= -gsutil cp presto/presto.sh gs://${BUCKET}/ +gcloud storage cp presto/presto.sh gs://${BUCKET}/ gcloud dataproc clusters create ${CLUSTER} --initialization-actions gs://${BUCKET}/presto.sh ``` diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index 9d41820d4..3775c6dbf 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -41,7 +41,7 @@ download_file() { local -r uri="$1" if [[ "${uri}" == gs://* ]]; then - gsutil cp "${uri}" ./ + gcloud storage cp "${uri}" ./ else # TODO Add metadata header tag to the wget for filtering out in download metrics. wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}" diff --git a/beam/README.md b/beam/README.md index e03de8c27..0aecdd1a7 100644 --- a/beam/README.md +++ b/beam/README.md @@ -62,7 +62,7 @@ Then, upload the jar to a Cloud Storage path that clusters can access during initialization. ```bash -gsutil cp \ +gcloud storage cp \ ./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \ /beam-runners-flink_2.11-job-server-latest-SNAPSHOT.jar ``` diff --git a/beam/beam.sh b/beam/beam.sh index 2ce1640bf..f3fe96145 100755 --- a/beam/beam.sh +++ b/beam/beam.sh @@ -40,7 +40,7 @@ function download_snapshot() { readonly snapshot_url="${1}" readonly protocol="$(echo "${snapshot_url}" | head -c5)" if [ "${protocol}" = "gs://" ]; then - gsutil cp "${snapshot_url}" "${LOCAL_JAR_NAME}" + gcloud storage cp "${snapshot_url}" "${LOCAL_JAR_NAME}" else curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}" fi diff --git a/conda/README.md b/conda/README.md index a4227dc65..59ec32b36 100644 --- a/conda/README.md +++ b/conda/README.md @@ -77,8 +77,8 @@ Where `create-my-cluster.sh` specifies a list of conda and/or pip packages to in ``` #!/usr/bin/env bash -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh @@ -100,9 +100,9 @@ Similarly, one can also specify a [conda environment yml file](https://github.co CONDA_ENV_YAML_GSC_LOC="gs://my-bucket/path/to/conda-environment.yml" CONDA_ENV_YAML_PATH="/root/conda-environment.yml" echo "Downloading conda environment at $CONDA_ENV_YAML_GSC_LOC to $CONDA_ENV_YAML_PATH ... " -gsutil -m cp -r $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh diff --git a/connectors/connectors.sh b/connectors/connectors.sh index 22157dafa..ee985e27f 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -128,7 +128,7 @@ update_connector_url() { find "${vm_connectors_dir}/" -name "${pattern}" -delete - gsutil cp -P "${url}" "${vm_connectors_dir}/" + gcloud storage cp --preserve-posix "${url}" "${vm_connectors_dir}/" local -r jar_name=${url##*/} diff --git a/dask/README.md b/dask/README.md index 69d70738b..ec0f6909b 100644 --- a/dask/README.md +++ b/dask/README.md @@ -136,7 +136,7 @@ You can also `ssh` into the cluster and execute Dask jobs from Python files. To run jobs, you can either `scp` a file onto your cluster or use `gsutil` on the cluster to download the Python file. -`gcloud compute ssh --command="gsutil cp gs://path/to/file.py .; +`gcloud compute ssh --command="gcloud storage cp gs://path/to/file.py .; python file.py` ### Accessing Web UIs diff --git a/hbase/hbase.sh b/hbase/hbase.sh index 10724dab9..7325b17fd 100755 --- a/hbase/hbase.sh +++ b/hbase/hbase.sh @@ -223,7 +223,7 @@ EOF kadmin.local -q "addprinc -randkey hbase/${m}.${DOMAIN}@${REALM}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${m}.keytab hbase/${m}.${DOMAIN}" - gsutil cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ + gcloud storage cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${m}.keytab" done @@ -232,17 +232,17 @@ EOF kadmin.local -q "addprinc -randkey hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" - gsutil cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ + gcloud storage cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab" done touch /tmp/_success - gsutil cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + gcloud storage cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" fi success=1 while [[ $success == "1" ]]; do sleep 1 success=$( - gsutil -q stat "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" echo $? ) done @@ -255,7 +255,7 @@ EOF fi # Copy keytab to machine - gsutil cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path + gcloud storage cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path # Change owner of keytab to hbase with read only permissions if [ -f $hbase_keytab_path ]; then diff --git a/hive-llap/llap.sh b/hive-llap/llap.sh index 5009fb92a..b3af46a91 100644 --- a/hive-llap/llap.sh +++ b/hive-llap/llap.sh @@ -69,7 +69,7 @@ function download_init_actions() { # Download initialization actions locally. This will download the start_llap.sh file to the cluster for execution Check if metadata is supplied echo "downalod init actions supplied as metadata..." mkdir -p "${INIT_ACTIONS_DIR}" - gsutil cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" + gcloud storage cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" chmod 700 "${INIT_ACTIONS_DIR}/start_llap.sh" } diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh index 320edfdc3..3227ac102 100644 --- a/mlvm/mlvm.sh +++ b/mlvm/mlvm.sh @@ -97,9 +97,9 @@ function download_init_actions() { # Download initialization actions locally. mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask} - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \; } @@ -167,7 +167,7 @@ function install_spark_nlp() { function install_connectors() { local -r url="gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${SPARK_BIGQUERY_VERSION}.jar" - gsutil cp "${url}" "${CONNECTORS_DIR}/" + gcloud storage cp "${url}" "${CONNECTORS_DIR}/" local -r jar_name=${url##*/} From 8c4f71aeea073935d058f0d95e5441df06ca2292 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 13:50:01 +0530 Subject: [PATCH 02/20] Update README.md --- conda/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/README.md b/conda/README.md index 59ec32b36..f63dccaa1 100644 --- a/conda/README.md +++ b/conda/README.md @@ -77,8 +77,8 @@ Where `create-my-cluster.sh` specifies a list of conda and/or pip packages to in ``` #!/usr/bin/env bash -gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh From 9797e7680c0efd161513d493ceced2064ace03a3 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 13:50:54 +0530 Subject: [PATCH 03/20] Update README.md --- conda/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/README.md b/conda/README.md index f63dccaa1..984762f97 100644 --- a/conda/README.md +++ b/conda/README.md @@ -100,9 +100,9 @@ Similarly, one can also specify a [conda environment yml file](https://github.co CONDA_ENV_YAML_GSC_LOC="gs://my-bucket/path/to/conda-environment.yml" CONDA_ENV_YAML_PATH="/root/conda-environment.yml" echo "Downloading conda environment at $CONDA_ENV_YAML_GSC_LOC to $CONDA_ENV_YAML_PATH ... " -gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH -gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH +gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh From 50cacfce883df503833aa0f0507b0cf0af42ba2a Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 15:38:30 +0530 Subject: [PATCH 04/20] Update alluxio.sh --- alluxio/alluxio.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index 3775c6dbf..b51febeed 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -33,6 +33,15 @@ readonly ALLUXIO_HOME=/opt/alluxio readonly ALLUXIO_SITE_PROPERTIES=${ALLUXIO_HOME}/conf/alluxio-site.properties readonly ALLUXIO_DOWNLOAD_URL=https://downloads.alluxio.io/downloads/files/${ALLUXIO_VERSION}/alluxio-${ALLUXIO_VERSION}-bin.tar.gz +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL_CP="gcloud storage cp" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL_CP="gsutil cp" +fi + # Downloads a file to the local machine from a remote HTTP(S) or GCS URI into the cwd # # Args: @@ -41,7 +50,7 @@ download_file() { local -r uri="$1" if [[ "${uri}" == gs://* ]]; then - gcloud storage cp "${uri}" ./ + ${GSUTIL_CP} "${uri}" ./ else # TODO Add metadata header tag to the wget for filtering out in download metrics. wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}" From 766d01a6931b7d538590b1a383cee7a6baa6ae04 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 15:39:39 +0530 Subject: [PATCH 05/20] Update beam.sh --- beam/beam.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/beam/beam.sh b/beam/beam.sh index f3fe96145..8ed64f6cb 100755 --- a/beam/beam.sh +++ b/beam/beam.sh @@ -22,6 +22,15 @@ readonly START_FLINK_YARN_SESSION_METADATA_KEY='flink-start-yarn-session' # Set this to true to start a flink yarn session at initialization time. readonly START_FLINK_YARN_SESSION_DEFAULT=true +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL_CP="gcloud storage cp" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL_CP="gsutil cp" +fi + function is_master() { local role="$(/usr/share/google/get_metadata_value attributes/dataproc-role)" if [[ "$role" == 'Master' ]]; then @@ -40,7 +49,7 @@ function download_snapshot() { readonly snapshot_url="${1}" readonly protocol="$(echo "${snapshot_url}" | head -c5)" if [ "${protocol}" = "gs://" ]; then - gcloud storage cp "${snapshot_url}" "${LOCAL_JAR_NAME}" + ${GSUTIL_CP} "${snapshot_url}" "${LOCAL_JAR_NAME}" else curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}" fi From fa154a0de42ce40ce017b85acb427e32f6559db6 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 15:45:54 +0530 Subject: [PATCH 06/20] Update connectors.sh --- connectors/connectors.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/connectors/connectors.sh b/connectors/connectors.sh index ee985e27f..1738a6e6f 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -27,6 +27,15 @@ readonly BIGQUERY_CONNECTOR_URL=$(/usr/share/google/get_metadata_value attribute readonly SPARK_BIGQUERY_CONNECTOR_URL=$(/usr/share/google/get_metadata_value attributes/spark-bigquery-connector-url || true) readonly HIVE_BIGQUERY_CONNECTOR_URL=$(/usr/share/google/get_metadata_value attributes/hive-bigquery-connector-url || true) +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL_CP="gcloud storage cp" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL_CP="gsutil cp" +fi + min_version() { echo -e "$1\n$2" | sort -r -t'.' -n -k1,1 -k2,2 -k3,3 | tail -n1 } @@ -128,7 +137,7 @@ update_connector_url() { find "${vm_connectors_dir}/" -name "${pattern}" -delete - gcloud storage cp --preserve-posix "${url}" "${vm_connectors_dir}/" + ${GSUTIL_CP} --preserve-posix "${url}" "${vm_connectors_dir}/" local -r jar_name=${url##*/} From 71371879e40cb2547459d9900dd457a899c3d714 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:01:37 +0530 Subject: [PATCH 07/20] Update hbase.sh --- hbase/hbase.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/hbase/hbase.sh b/hbase/hbase.sh index 7325b17fd..ae8cb649f 100755 --- a/hbase/hbase.sh +++ b/hbase/hbase.sh @@ -34,6 +34,15 @@ readonly REALM=$(echo "${DOMAIN}" | awk '{print toupper($0)}') readonly ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role) readonly FQDN=$(hostname -f) +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL_CP="gcloud storage" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL_CP="gsutil" +fi + function retry_command() { cmd="$1" for ((i = 0; i < 10; i++)); do @@ -223,7 +232,7 @@ EOF kadmin.local -q "addprinc -randkey hbase/${m}.${DOMAIN}@${REALM}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${m}.keytab hbase/${m}.${DOMAIN}" - gcloud storage cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ + ${GSUTIL} cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${m}.keytab" done @@ -232,17 +241,17 @@ EOF kadmin.local -q "addprinc -randkey hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" - gcloud storage cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ + ${GSUTIL} cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab" done touch /tmp/_success - gcloud storage cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + ${GSUTIL_CP} cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" fi success=1 while [[ $success == "1" ]]; do sleep 1 success=$( - gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + ${GSUTIL} objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" echo $? ) done @@ -255,7 +264,7 @@ EOF fi # Copy keytab to machine - gcloud storage cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path + ${GSUTIL} cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path # Change owner of keytab to hbase with read only permissions if [ -f $hbase_keytab_path ]; then From 34213aed53591e70b8dd16f5d9d2f6bc748623a9 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:04:39 +0530 Subject: [PATCH 08/20] Update hbase.sh --- hbase/hbase.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hbase/hbase.sh b/hbase/hbase.sh index ae8cb649f..9d3ab2c94 100755 --- a/hbase/hbase.sh +++ b/hbase/hbase.sh @@ -38,9 +38,9 @@ function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" -GSUTIL_CP="gcloud storage" +GSUTIL="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL_CP="gsutil" + GSUTIL="gsutil" fi function retry_command() { @@ -245,7 +245,7 @@ EOF "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab" done touch /tmp/_success - ${GSUTIL_CP} cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + ${GSUTIL} cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" fi success=1 while [[ $success == "1" ]]; do From 5bf2dcecbfbef97ac480646b9de02989152e2bd6 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:05:32 +0530 Subject: [PATCH 09/20] Update llap.sh --- hive-llap/llap.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hive-llap/llap.sh b/hive-llap/llap.sh index b3af46a91..88b44a1ef 100644 --- a/hive-llap/llap.sh +++ b/hive-llap/llap.sh @@ -39,6 +39,15 @@ readonly INIT_ACTIONS_REPO="$(/usr/share/google/get_metadata_value attributes/in # directory files ingestied will reside readonly INIT_ACTIONS_DIR='/usr/lib/hive-llap' +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL="gcloud storage" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL="gsutil" +fi + function pre_flight_checks(){ # check for bad configurations if [[ "${NUM_LLAP_NODES}" -ge "${WORKER_NODE_COUNT}" ]]; then @@ -69,7 +78,7 @@ function download_init_actions() { # Download initialization actions locally. This will download the start_llap.sh file to the cluster for execution Check if metadata is supplied echo "downalod init actions supplied as metadata..." mkdir -p "${INIT_ACTIONS_DIR}" - gcloud storage cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" + ${GSUTIL} cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" chmod 700 "${INIT_ACTIONS_DIR}/start_llap.sh" } From 9571314b457783a6b4574272946c31b894dcadbb Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:07:33 +0530 Subject: [PATCH 10/20] Update mlvm.sh --- mlvm/mlvm.sh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh index 3227ac102..bc12e6e63 100644 --- a/mlvm/mlvm.sh +++ b/mlvm/mlvm.sh @@ -37,6 +37,15 @@ R_VERSION="$(R --version | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/ readonly R_VERSION readonly SPARK_NLP_VERSION="3.2.1" # Must include subminor version here +function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; } +function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } + +GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" +GSUTIL="gcloud storage" +if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then + GSUTIL="gsutil" +fi + CONDA_PACKAGES=( "r-dplyr=1.0" "r-essentials=${R_VERSION}" @@ -97,9 +106,9 @@ function download_init_actions() { # Download initialization actions locally. mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask} - gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" - gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" - gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" + ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" + ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" + ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \; } @@ -167,7 +176,7 @@ function install_spark_nlp() { function install_connectors() { local -r url="gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${SPARK_BIGQUERY_VERSION}.jar" - gcloud storage cp "${url}" "${CONNECTORS_DIR}/" + ${GSUTIL} cp "${url}" "${CONNECTORS_DIR}/" local -r jar_name=${url##*/} From 1f6f5748ccb4e6cb0ea46ae64eda767e645cdf58 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:08:16 +0530 Subject: [PATCH 11/20] Update alluxio.sh --- alluxio/alluxio.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index b51febeed..cf4f3462e 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -37,9 +37,9 @@ function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" -GSUTIL_CP="gcloud storage cp" +GSUTIL_CP="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL_CP="gsutil cp" + GSUTIL="gsutil" fi # Downloads a file to the local machine from a remote HTTP(S) or GCS URI into the cwd @@ -50,7 +50,7 @@ download_file() { local -r uri="$1" if [[ "${uri}" == gs://* ]]; then - ${GSUTIL_CP} "${uri}" ./ + ${GSUTIL} "${uri}" ./ else # TODO Add metadata header tag to the wget for filtering out in download metrics. wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}" From 506586bd11d9279f6344b2f5d9d8d0f4e3ef63ef Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:08:54 +0530 Subject: [PATCH 12/20] Update beam.sh --- beam/beam.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beam/beam.sh b/beam/beam.sh index 8ed64f6cb..ef1fb972a 100755 --- a/beam/beam.sh +++ b/beam/beam.sh @@ -26,9 +26,9 @@ function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" -GSUTIL_CP="gcloud storage cp" +GSUTIL="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL_CP="gsutil cp" + GSUTIL="gsutil" fi function is_master() { @@ -49,7 +49,7 @@ function download_snapshot() { readonly snapshot_url="${1}" readonly protocol="$(echo "${snapshot_url}" | head -c5)" if [ "${protocol}" = "gs://" ]; then - ${GSUTIL_CP} "${snapshot_url}" "${LOCAL_JAR_NAME}" + ${GSUTIL} "${snapshot_url}" "${LOCAL_JAR_NAME}" else curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}" fi From 1541c52a247b3b9b0754b0e512954f527115b88a Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:10:33 +0530 Subject: [PATCH 13/20] Update connectors.sh --- connectors/connectors.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/connectors/connectors.sh b/connectors/connectors.sh index 1738a6e6f..7457dc6a3 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -31,9 +31,9 @@ function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" -GSUTIL_CP="gcloud storage cp" +GSUTIL="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL_CP="gsutil cp" + GSUTIL="gsutil" fi min_version() { @@ -137,7 +137,7 @@ update_connector_url() { find "${vm_connectors_dir}/" -name "${pattern}" -delete - ${GSUTIL_CP} --preserve-posix "${url}" "${vm_connectors_dir}/" + ${GSUTIL} cp --preserve-posix "${url}" "${vm_connectors_dir}/" local -r jar_name=${url##*/} From 86ba4a39c4d8a959fdf0f09671a019c8dd251b90 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:11:06 +0530 Subject: [PATCH 14/20] Update alluxio.sh --- alluxio/alluxio.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index cf4f3462e..9e58bc81e 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -50,7 +50,7 @@ download_file() { local -r uri="$1" if [[ "${uri}" == gs://* ]]; then - ${GSUTIL} "${uri}" ./ + ${GSUTIL} cp "${uri}" ./ else # TODO Add metadata header tag to the wget for filtering out in download metrics. wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}" From a242dddc5c44172d2a743b1eae7c6a0c2f56ea9c Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 16:11:29 +0530 Subject: [PATCH 15/20] Update beam.sh --- beam/beam.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beam/beam.sh b/beam/beam.sh index ef1fb972a..2fed3841e 100755 --- a/beam/beam.sh +++ b/beam/beam.sh @@ -49,7 +49,7 @@ function download_snapshot() { readonly snapshot_url="${1}" readonly protocol="$(echo "${snapshot_url}" | head -c5)" if [ "${protocol}" = "gs://" ]; then - ${GSUTIL} "${snapshot_url}" "${LOCAL_JAR_NAME}" + ${GSUTIL} cp "${snapshot_url}" "${LOCAL_JAR_NAME}" else curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}" fi From dcad455b5f674ba7593ff38b77c8684d822b0da6 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 20:26:09 +0530 Subject: [PATCH 16/20] Update alluxio.sh --- alluxio/alluxio.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index 9e58bc81e..336581278 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -37,7 +37,7 @@ function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" -GSUTIL_CP="gcloud storage" +GSUTIL="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then GSUTIL="gsutil" fi From e7ada01a4eb2329bd459a9d8a8fe4b5657b0fe14 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 20:27:44 +0530 Subject: [PATCH 17/20] Update connectors.sh --- connectors/connectors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connectors/connectors.sh b/connectors/connectors.sh index 7457dc6a3..84305dfd0 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -137,7 +137,7 @@ update_connector_url() { find "${vm_connectors_dir}/" -name "${pattern}" -delete - ${GSUTIL} cp --preserve-posix "${url}" "${vm_connectors_dir}/" + ${GSUTIL} cp -P "${url}" "${vm_connectors_dir}/" local -r jar_name=${url##*/} From d609ab85c6b97d55c0b48055e33702c9660d9cf1 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 20:30:04 +0530 Subject: [PATCH 18/20] Update hbase.sh --- hbase/hbase.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hbase/hbase.sh b/hbase/hbase.sh index 9d3ab2c94..89010727c 100755 --- a/hbase/hbase.sh +++ b/hbase/hbase.sh @@ -39,8 +39,10 @@ function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" GSUTIL="gcloud storage" +GSUTIL_STAT="gcloud storage objects list --stat --fetch-encrypted-object-hashes" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then GSUTIL="gsutil" + GSUTIL_STAT="gsutil -q stat" fi function retry_command() { @@ -251,7 +253,7 @@ EOF while [[ $success == "1" ]]; do sleep 1 success=$( - ${GSUTIL} objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + ${GSUTIL_STAT} "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" echo $? ) done From 4e30d7b1130a5331c2a04a90b24cd8042b6e1888 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 20:38:16 +0530 Subject: [PATCH 19/20] Update mlvm.sh --- mlvm/mlvm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh index bc12e6e63..57794e705 100644 --- a/mlvm/mlvm.sh +++ b/mlvm/mlvm.sh @@ -43,7 +43,7 @@ function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" GSUTIL="gcloud storage" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL="gsutil" + GSUTIL="gsutil -m" fi CONDA_PACKAGES=( From 3ae22597b21a8a7f15bda3e8fbaed76a90723add Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 18 May 2026 20:43:50 +0530 Subject: [PATCH 20/20] Update mlvm.sh --- mlvm/mlvm.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh index 57794e705..dcf231924 100644 --- a/mlvm/mlvm.sh +++ b/mlvm/mlvm.sh @@ -42,8 +42,10 @@ function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; } GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')" GSUTIL="gcloud storage" +GSUTIL_OPTS="" if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then - GSUTIL="gsutil -m" + GSUTIL="gsutil" + GSUTIL_OPTS="-m" fi CONDA_PACKAGES=( @@ -106,9 +108,9 @@ function download_init_actions() { # Download initialization actions locally. mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask} - ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" - ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" - ${GSUTIL} rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" + ${GSUTIL} ${GSUTIL_OPTS} rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" + ${GSUTIL} ${GSUTIL_OPTS} rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" + ${GSUTIL} ${GSUTIL_OPTS} rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \; }