Skip to content

Commit 14e9335

Browse files
Internal change
PiperOrigin-RevId: 922850859
1 parent 76b61ae commit 14e9335

4 files changed

Lines changed: 227 additions & 7 deletions

File tree

.bazelrc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,7 @@ build:windows --google_default_credentials=true
3232
build:macos --remote_cache=https://storage.googleapis.com/macos-cel-python-remote-cache
3333
build:macos --google_default_credentials=true
3434

35+
# Silence deprecation warnings from external dependencies (Linux and macOS)
36+
build:linux --cxxopt=-Wno-deprecated-declarations
37+
build:macos --cxxopt=-Wno-deprecated-declarations
38+

release/kokoro/release_linux.cfg

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@
33

44
build_file: "cel-python/release/kokoro/release_linux.sh"
55
timeout_mins: 120
6+
7+
container_properties {
8+
docker_image: "us-central1-docker.pkg.dev/kokoro-container-bakery/kokoro/ubuntu/ubuntu2204/ktcb:current"
9+
docker_sibling_containers: true
10+
}

release/kokoro/release_linux.sh

Lines changed: 207 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,104 @@
11
#!/bin/bash
22
set -e
33

4+
5+
# Avoid virtualenv/pip trying to download/upgrade tools from PyPI on host
6+
export VIRTUALENV_NO_DOWNLOAD=1
7+
export PIP_DISABLE_PIP_VERSION_CHECK=1
8+
9+
# Pass these environment variables to the cibuildwheel Docker container
10+
export CIBW_ENVIRONMENT="VIRTUALENV_NO_DOWNLOAD=1 PIP_DISABLE_PIP_VERSION_CHECK=1"
11+
export CIBW_DEPENDENCY_VERSIONS="latest"
12+
413
# If running locally (not on Kokoro), authenticate with gcloud.
514
if [ -z "${KOKORO_BUILD_ID}" ]; then
615
if ! gcloud auth application-default print-access-token --quiet > /dev/null; then
716
gcloud auth application-default login
817
fi
918
fi
1019

11-
pip install -U keyring keyrings.google-artifactregistry-auth twine cibuildwheel
20+
# We use --no-cache-dir to force pip to download packages fresh and bypass the local
21+
# cache. In Kokoro/RBE sandboxed environments, writing to the default cache directory
22+
# (~/.cache/pip) can encounter permission/sandbox restrictions or lead to stale
23+
# dependency resolution. Disabling the cache ensures a reliable, reproducible install.
24+
pip install --no-cache-dir -U keyring keyrings.google-artifactregistry-auth twine cibuildwheel
25+
26+
# Patch cibuildwheel at runtime to bypass the RBE stdout buffering deadlock.
27+
# The RBE proxy buffers the persistent container bash stdout. By appending a 4KB
28+
# padding line to the end of every command output, we force the proxy to flush the
29+
# buffer immediately. We then read and discard this padding to keep the stream clean.
30+
OCI_PATH=$(python3 -c "import cibuildwheel.oci_container; print(cibuildwheel.oci_container.__file__)")
31+
echo "Patching cibuildwheel at $OCI_PATH..."
32+
33+
cat << 'EOF' > patch_oci.py
34+
import sys
35+
import re
36+
37+
path = sys.argv[1]
38+
with open(path, 'r') as f:
39+
content = f.read()
40+
41+
# 1. Force a 4KB flush at the end of every command execution
42+
target_write = 'printf "%04d%s\\n" $? {end_of_message}'
43+
replacement_write = 'printf "%04d%s\\n%4096s\\n" $? {end_of_message} " "'
44+
if target_write in content:
45+
content = content.replace(target_write, replacement_write)
46+
print("Patched write loop.")
47+
48+
# 2. Read and discard the 4KB padding to keep the stream clean
49+
target_read = """ # add the last line to output, without the footer
50+
output_io.write(line[0:footer_offset])
51+
output_io.flush()
52+
break"""
53+
54+
replacement_read = """ # add the last line to output, without the footer
55+
output_io.write(line[0:footer_offset])
56+
output_io.flush()
57+
# Read and discard the 4KB padding line to clear the stream!
58+
self.bash_stdout.readline()
59+
break"""
60+
61+
if target_read in content:
62+
content = content.replace(target_read, replacement_read)
63+
print("Patched read loop.")
64+
65+
# 3. Patch copy_into else block using regex
66+
pattern = re.compile(r' else:.*? def copy_out', re.DOTALL)
67+
68+
replacement_copy = """ else:
69+
self.call(["mkdir", "-p", to_path.parent])
70+
# Use native docker cp to copy the file, avoiding stdin EOF deadlocks in RBE
71+
subprocess.run(
72+
[
73+
self.engine.name,
74+
"cp",
75+
str(from_path),
76+
f"{self.name}:{to_path}",
77+
],
78+
check=True,
79+
)
80+
81+
def copy_out"""
82+
83+
if pattern.search(content):
84+
content = pattern.sub(replacement_copy, content)
85+
print("Patched copy_into using regex.")
86+
else:
87+
print("Error: copy_into pattern not found!")
88+
sys.exit(1)
89+
90+
with open(path, 'w') as f:
91+
f.write(content)
92+
93+
print("Successfully patched oci_container.py!")
94+
EOF
95+
96+
python3 patch_oci.py "$OCI_PATH"
97+
rm patch_oci.py
98+
99+
# Verify that the patched file is syntactically valid Python
100+
echo "Verifying patched oci_container.py syntax..."
101+
python3 -m py_compile "$OCI_PATH" || { echo "ERROR: Patched oci_container.py is corrupted!"; exit 1; }
12102

13103
REPO_DIR=$(mktemp -d)
14104
echo "Created temporary directory: ${REPO_DIR}"
@@ -43,26 +133,139 @@ echo "Building release for version: ${VERSION}"
43133
TMP_DIR=$(mktemp -d)
44134
echo "Build directory: ${TMP_DIR}"
45135

46-
# Add trap cleanup for TMP_DIR as well
47-
trap 'echo "Cleaning up temporary directories: ${REPO_DIR} ${TMP_DIR}"; rm -rf "${REPO_DIR}" "${TMP_DIR}"' EXIT
136+
# Define a comprehensive cleanup function that always dumps logs on failure
137+
cleanup() {
138+
echo "=== CLEANUP TRIGGERED ==="
139+
if [ -f cibuildwheel.log ]; then
140+
echo "=== LAST 200 LINES OF CIBUILDWHEEL LOG ==="
141+
tail -n 200 cibuildwheel.log
142+
fi
143+
echo "Cleaning up temporary directories: ${REPO_DIR} ${TMP_DIR}"
144+
rm -rf "${REPO_DIR}" "${TMP_DIR}"
145+
}
146+
trap cleanup EXIT
48147

49148
pushd "${TMP_DIR}"
50149

51150
cp -r "${SRC_DIR}"/{*,.*} . 2>/dev/null || true
52151
cp -r "${SRC_DIR}"/release/* . 2>/dev/null || true
53152
rm -rf cel_expr_python/*_test.py
54153

154+
echo "Downloading bazelisk on host..."
155+
curl -LO https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64
156+
chmod +x bazelisk-linux-amd64
157+
55158
# Check if pyproject.toml exists before running sed
56159
if [ -f pyproject.toml ]; then
57160
sed -i "" "s/\$VERSION/${VERSION}/g" pyproject.toml || sed -i "s/\$VERSION/${VERSION}/g" pyproject.toml
58161
fi
59162

163+
export CIBW_CONTAINER_ENGINE_EXTRA_ARGS="--network=host"
164+
60165
echo "Running cibuildwheel: ${CIBWHEEL_BIN}"
61166
# Default CIBWHEEL_BIN if not set
62167
if [ -z "${CIBWHEEL_BIN}" ]; then
63168
CIBWHEEL_BIN="python3 -m cibuildwheel"
64169
fi
65-
${CIBWHEEL_BIN} --platform linux --output-dir dist
170+
171+
echo "Installing diagnostic tools (psmisc, strace) on host..."
172+
# We try to install them, but don't fail the build if we can't (e.g. if no sudo or apt)
173+
sudo apt-get update && sudo apt-get install -y psmisc strace || echo "Failed to install diagnostic tools, proceeding anyway..."
174+
175+
echo "Running cibuildwheel in background..."
176+
${CIBWHEEL_BIN} --platform linux --output-dir dist > cibuildwheel.log 2>&1 &
177+
CIBW_PID=$!
178+
179+
echo "Started cibuildwheel in background with PID $CIBW_PID"
180+
181+
# Poll the log file waiting for the hang
182+
# We look for the "mkdir -p" line followed by no activity for 60 seconds.
183+
TIMEOUT=900 # 15 minutes total timeout
184+
ELAPSED=0
185+
LAST_SIZE=0
186+
STUCK_COUNT=0
187+
HANG_DETECTED=false
188+
189+
while kill -0 $CIBW_PID 2>/dev/null; do
190+
if [ -f cibuildwheel.log ]; then
191+
# Check if the log contains the test setup line
192+
if grep -q "mkdir -p" cibuildwheel.log; then
193+
CURRENT_SIZE=$(stat -c%s cibuildwheel.log)
194+
if [ "$CURRENT_SIZE" -eq "$LAST_SIZE" ]; then
195+
# Log size hasn't changed. If this persists for 60 seconds, we assume it is stuck.
196+
STUCK_COUNT=$((STUCK_COUNT + 10))
197+
echo "Log size unchanged for ${STUCK_COUNT}s at mkdir -p..."
198+
if [ $STUCK_COUNT -ge 60 ]; then
199+
HANG_DETECTED=true
200+
break
201+
fi
202+
else
203+
STUCK_COUNT=0
204+
LAST_SIZE=$CURRENT_SIZE
205+
fi
206+
fi
207+
fi
208+
209+
sleep 10
210+
ELAPSED=$((ELAPSED + 10))
211+
if [ $ELAPSED -ge $TIMEOUT ]; then
212+
echo "Timeout waiting for build to complete."
213+
break
214+
fi
215+
done
216+
217+
if [ "$HANG_DETECTED" = "true" ]; then
218+
echo "===================================================="
219+
echo "!!! DETECTED HANG AT mkdir -p !!! STARTING DIAGNOSTICS"
220+
echo "===================================================="
221+
222+
echo "=== HOST PROCESSES ==="
223+
ps aux
224+
225+
echo "=== PROCESS TREE ==="
226+
pstree -p -a || echo "pstree not available"
227+
228+
echo "=== DOCKER CONTAINERS ==="
229+
docker ps -a
230+
231+
CONTAINER_ID=$(docker ps -q | head -n 1)
232+
if [ -n "$CONTAINER_ID" ]; then
233+
echo "=== CONTAINER PROCESSES ($CONTAINER_ID) ==="
234+
docker exec "$CONTAINER_ID" ps aux
235+
236+
echo "=== CONTAINER LSOF ==="
237+
docker exec "$CONTAINER_ID" lsof || echo "lsof not available"
238+
239+
echo "=== CONTAINER DOCKER INSPECT ==="
240+
docker inspect "$CONTAINER_ID"
241+
242+
echo "=== STRACE DOCKER PROCESSES ==="
243+
DOCKER_PID=$(pgrep -f "docker start|docker exec" | head -n 1)
244+
if [ -n "$DOCKER_PID" ]; then
245+
echo "Stracing host docker process $DOCKER_PID for 15 seconds..."
246+
timeout 15 strace -p "$DOCKER_PID" -f || true
247+
fi
248+
else
249+
echo "No active docker container found!"
250+
fi
251+
252+
echo "=== LAST 100 LINES OF CIBUILDWHEEL LOG ==="
253+
tail -n 100 cibuildwheel.log
254+
255+
echo "Diagnostics complete. Killing cibuildwheel."
256+
kill -9 $CIBW_PID
257+
exit 99
258+
fi
259+
260+
# If it didn't hang, wait for it to finish and print the log
261+
wait $CIBW_PID
262+
RC=$?
263+
echo "=== CIBUILDWHEEL LOG ==="
264+
cat cibuildwheel.log
265+
if [ $RC -ne 0 ]; then
266+
echo "cibuildwheel failed with exit code $RC"
267+
exit $RC
268+
fi
66269

67270
if [ "${DRY_RUN}" = "true" ]; then
68271
echo "[DRY RUN] Skipping upload to PyPI exit gate."

release/pyproject.toml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ where = ["."]
3838
exclude = ["codelab*", "conformance*", "custom_ext*", "release*", "testing*", "wheelhouse*"]
3939

4040
[tool.cibuildwheel]
41-
build = "cp311-* cp312-* cp313-* cp314-*"
42-
skip = "*musllinux* *win32*"
41+
build = "cp311-*"
42+
#build = "cp311-* cp312-* cp313-* cp314-*"
43+
skip = "*musllinux* *win32* *i686*"
4344
test-command = "python {project}/cel_basic_test.py"
4445
build-verbosity = 1
4546

4647
[tool.cibuildwheel.linux]
47-
before-all = "echo 'Installing bazelisk'; curl -LO https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 /usr/local/bin/bazel"
48+
manylinux-x86_64-image = "manylinux_2_28"
49+
container-engine = "docker; disable_host_mount: True"
50+
# Google's internal Kokoro/RBE network uses a secure MITM proxy that resigns HTTPS
51+
# traffic with an internal Google CA. Since the public manylinux container does not
52+
# trust this CA, git fetches for external dependencies (like @cel-cpp) will fail
53+
# with SSL certificate errors. We disable http.sslVerify inside the container to
54+
# bypass this and allow Bazel to fetch SCM dependencies through the proxy.
55+
before-all = "git config --global http.sslVerify false && echo 'Installing bazelisk' && cp {project}/bazelisk-linux-amd64 /usr/local/bin/bazel"
4856

4957
[tool.cibuildwheel.macos]
5058
before-all = "echo 'Installing bazelisk'; brew install bazelisk"

0 commit comments

Comments
 (0)