Skip to content
Open
11 changes: 10 additions & 1 deletion beam/util/build-beam-artifacts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,18 @@ readonly BEAM_JOB_SERVICE_DESTINATION="$1"
readonly BEAM_CONTAINER_IMAGE_DESTINATION="$2"
readonly BEAM_SOURCE_VERSION="${3:-master}"

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
Copy link
Copy Markdown
Contributor

@vinayakumarb vinayakumarb May 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this function is not used, then can be removed. Applicable to all files.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its being used in version_lt

function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

function build_job_service() {
./gradlew :beam-runners-flink_2.11-job-server:shadowJar
gsutil cp \
${GSUTIL} cp \
./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \
${BEAM_JOB_SERVICE_DESTINATION}/beam-runners-flink_2.11-job-server-${BEAM_SOURCE_VERSION}-SNAPSHOT.jar
}
Expand Down
12 changes: 11 additions & 1 deletion cloudbuild/run-presubmit-on-k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ readonly DATAPROC_IMAGE_VERSION=$3

readonly POD_NAME=presubmit-${DATAPROC_IMAGE_VERSION//./-}-${BUILD_ID//_/-}

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi


gcloud container clusters get-credentials "${CLOUDSDK_CONTAINER_CLUSTER}"

LOGS_SINCE_TIME=$(date --iso-8601=seconds)
Expand Down Expand Up @@ -52,7 +62,7 @@ if [[ ${exit_code} != 0 ]]; then
LOG_GCS_PATH="gs://${BUCKET}/${BUILD_ID}/logs/${POD_NAME}.log"

echo "Attempting to upload logs to ${LOG_GCS_PATH}"
if kubectl logs "${POD_NAME}" | gsutil cp - "${LOG_GCS_PATH}"; then
if kubectl logs "${POD_NAME}" | ${GSUTIL} cp - "${LOG_GCS_PATH}"; then
echo "Logs for failed pod ${POD_NAME} uploaded to: ${LOG_GCS_PATH}"
else
echo "Log upload to ${LOG_GCS_PATH} failed."
Expand Down
11 changes: 10 additions & 1 deletion datasketches/datasketches.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ readonly DS_LIBPATH="/usr/lib/datasketches"
readonly SPARK_VERSION=$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)
readonly SPARK_JAVA_EXAMPLE_JAR="gs://spark-lib/datasketches/spark-java-thetasketches-1.0-SNAPSHOT.jar"

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

function download_libraries()
{
mkdir -p ${DS_LIBPATH}
Expand All @@ -57,7 +66,7 @@ function download_libraries()
function download_example_jar()
{
if [[ "${SPARK_VERSION}" < "3.5" ]]; then
gsutil cp "${SPARK_JAVA_EXAMPLE_JAR}" "${DS_LIBPATH}"
${GSUTIL} cp "${SPARK_JAVA_EXAMPLE_JAR}" "${DS_LIBPATH}"
if [ $? -eq 0 ]; then
echo "Downloaded "${SPARK_JAVA_EXAMPLE_JAR}" successfully"
else
Expand Down
11 changes: 10 additions & 1 deletion gobblin/gobblin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ readonly HADOOP_LIB="/usr/lib/hadoop/lib"

readonly JAR_NAME_CANONICALIZER="s/([-a-zA-Z0-9]+?)[-]([0-9][0-9.]+?)([-.].*?)?.jar/\1/"

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

function maybe_symlink() {
local jar=$1
if [[ ! -f "${HADOOP_LIB}/${jar}" ]]; then
Expand Down Expand Up @@ -91,7 +100,7 @@ EOF
function install_package() {
# Download binary.
local temp=$(mktemp -d)
gsutil cp "${PACKAGE_URL}" "${temp}/package.tar.gz"
${GSUTIL} cp "${PACKAGE_URL}" "${temp}/package.tar.gz"
tar -xf "${temp}/package.tar.gz" -C "${temp}"

# Setup package.
Expand Down
13 changes: 12 additions & 1 deletion gpu/manual-test-runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ export BUCKET="$(jq -r .BUCKET env.json)"

gcs_log_dir="gs://${BUCKET}/${BUILD_ID}/logs"

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
GSUTIL_OPTS=""
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
GSUTIL_OPTS="-m"
fi

function exit_handler() {
RED='\\e[0;31m'
GREEN='\\e[0;32m'
Expand All @@ -48,7 +59,7 @@ function exit_handler() {
# TODO: remove any test related resources in the project

echo 'Uploading local logs to GCS bucket.'
gsutil -m rsync -r "${log_dir}/" "${gcs_log_dir}/"
${GSUTIL} ${GSUTIL_OPTS} rsync -r "${log_dir}/" "${gcs_log_dir}/"

if [[ -f "${tmp_dir}/tests_success" ]]; then
echo -e "${GREEN}Workflow succeeded${NC}, check logs at ${log_dir}/ or ${gcs_log_dir}/"
Expand Down
4 changes: 2 additions & 2 deletions hbase/test_hbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def setUp(self):
super().setUp()
self.GCS_BUCKET = "test-hbase-{}-{}".format(self.datetime_str(),
self.random_str())
self.assert_command('gsutil mb -c regional -l {} gs://{}'.format(
self.assert_command('gcloud storage buckets create --default-storage-class=regional --location {} gs://{}'.format(
self.REGION, self.GCS_BUCKET))
Comment on lines +18 to 19
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This command string is getting long. To improve readability, you could break it into multiple lines and assign it to a variable before passing it to self.assert_command.

Suggested change
self.assert_command('gcloud storage buckets create --default-storage-class=regional --location {} gs://{}'.format(
self.REGION, self.GCS_BUCKET))
create_bucket_cmd = (
'gcloud storage buckets create '
'--default-storage-class=regional '
'--location {} gs://{}'
).format(self.REGION, self.GCS_BUCKET)
self.assert_command(create_bucket_cmd)


def tearDown(self):
self.assert_command('gsutil -m rm -rf gs://{}'.format(self.GCS_BUCKET))
self.assert_command('gcloud storage rm --recursive --continue-on-error gs://{}'.format(self.GCS_BUCKET))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This command string is a bit long. For better readability, consider assigning it to a variable on a separate line and breaking it up for clarity.

        rm_bucket_cmd = (
            'gcloud storage rm --recursive --continue-on-error gs://{}'
        ).format(self.GCS_BUCKET)
        self.assert_command(rm_bucket_cmd)

super().tearDown()

def verify_instance(self, name):
Expand Down
11 changes: 10 additions & 1 deletion hive-lineage/hive-lineage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,18 @@ function set_hive_lineage_conf() {
done
}

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

function install_jars() {
echo "Installing openlineage-hive hook"
gsutil cp -P "$INSTALLATION_SOURCE/hive-openlineage-hook-$HIVE_OL_HOOK_VERSION.jar" "$HIVE_LIB_DIR/hive-openlineage-hook.jar"
${GSUTIL} cp -P "$INSTALLATION_SOURCE/hive-openlineage-hook-$HIVE_OL_HOOK_VERSION.jar" "$HIVE_LIB_DIR/hive-openlineage-hook.jar"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure -P works for gcloud storage case.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes it works backward compatibility is there

}

function restart_hive_server2_master() {
Expand Down
11 changes: 10 additions & 1 deletion knox/verify_knox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,19 @@ function test_installation() {

}

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

# to test update, we will upload a new topology to gs bucket, and check whether it appears
# we assume that knox initialization action is the very first one, /etc/google-dataproc/startup-scripts/dataproc-initialization-script-0
function test_update_new_topology() {
gsutil cp /etc/knox/conf/topologies/example-hive-pii.xml "${KNOX_GW_CONFIG_GCS}/topologies/update_topology.xml"
${GSUTIL} cp /etc/knox/conf/topologies/example-hive-pii.xml "${KNOX_GW_CONFIG_GCS}/topologies/update_topology.xml"
sudo /bin/bash /etc/google-dataproc/startup-scripts/dataproc-initialization-script-0 update
test_installation update_topology
[[ $? == 1 ]] && return 1
Expand Down
11 changes: 10 additions & 1 deletion push-to-gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ readonly HEAD="$1"
readonly MODULE="$2"
readonly GCS_FOLDER=gs://dataproc-initialization-actions/${MODULE}/

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
fi

[[ -n "${HEAD}" && -n "${MODULE}" ]]

# Verify the repo has no uncommitted changes.
Expand Down Expand Up @@ -51,6 +60,6 @@ for file in "${MODULE}/"*.sh; do
fi
done

gsutil -m rsync -R -x "__pycache__/.*" "${MODULE}/" "${GCS_FOLDER}"
${GSUTIL} rsync -r -x "__pycache__/.*" "${MODULE}/" "${GCS_FOLDER}"

echo "Pushed ${MODULE}/ to ${GCS_FOLDER}."
14 changes: 12 additions & 2 deletions rapids/manual-test-runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ export BUCKET="$(jq -r .BUCKET env.json)"

gcs_log_dir="gs://${BUCKET}/${BUILD_ID}/logs"

function version_le() { [[ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]]; }
function version_lt() { [[ "$1" = "$2" ]] && return 1 || version_le "$1" "$2"; }

GCLOUD_SDK_VERSION="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
GSUTIL="gcloud storage"
GSUTIL_OPTS=""
if version_lt "${GCLOUD_SDK_VERSION}" "402.0.0"; then
GSUTIL="gsutil"
GSUTIL_OPTS="-m"
fi

function exit_handler() {
RED='\\e[0;31m'
GREEN='\\e[0;32m'
Expand All @@ -47,7 +58,7 @@ function exit_handler() {
# TODO: remove any test related resources in the project

echo 'Uploading local logs to GCS bucket.'
gsutil -m rsync -r "${log_dir}/" "${gcs_log_dir}/"
${GSUTIL} ${GSUTIL_OPTS} rsync -r "${log_dir}/" "${gcs_log_dir}/"

if [[ -f "${tmp_dir}/tests_success" ]]; then
echo -e "${GREEN}Workflow succeeded, check logs at ${log_dir}/ or ${gcs_log_dir}/${NC}"
Expand All @@ -74,4 +85,3 @@ export INTERNAL_IP_SSH="true"
screen -US "${session_name}" -c rapids/bazel.screenrc