Skip to content

Commit 84f85e1

Browse files
authored
Improve conda wheel builds and macOS Bazel compatibility (#277)
* build(bazel): fix C++17 host toolchain and macOS symbol export handling - add host_cxxopt C++17 to align host/tool target compilation - pin abseil via release tarball and add explicit zlib archive - split pybind exported symbol handling for macOS vs Linux to avoid missing-symbol linker issues on Apple toolchains * build(macOS): add local_config_cc wrapper patch utility and Bazel target - add patch_wrapped_clang script to rewrite wrapped clang wrappers - patch libtool duplicate-object check handling - expose patch tool through sh_binary/genrule for local CI recovery * build(python): set setuptools build_base to _build - isolate build intermediates under _build for cleaner wheel builds and avoid conflict with BUILD file in macOS systems * ci(conda): add micromamba-based wheel build workflow for Linux and macOS - test Python 3.9/3.10/3.11 build matrix - install Bazelisk and build wheels via python -m build - upload wheel artifacts per OS and Python version - document optional Apple toolchain patch step for LC_UUID failures
1 parent 59bb2c8 commit 84f85e1

7 files changed

Lines changed: 272 additions & 8 deletions

File tree

.bazelrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Zetasql is removed.
33
# This is a candidate for removal
44
build --cxxopt="-std=c++17"
5+
build --host_cxxopt="-std=c++17"
56

67
# Needed to avoid zetasql proto error.
78
# Zetasql is removed.

.github/workflows/conda-build.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: Build data-validation with Conda
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
release:
11+
types: [published]
12+
workflow_dispatch:
13+
14+
jobs:
15+
build:
16+
runs-on: ${{ matrix.os }}
17+
strategy:
18+
fail-fast: false
19+
matrix:
20+
os: [ubuntu-latest, macos-latest]
21+
python-version: ["3.9", "3.10", "3.11"]
22+
23+
steps:
24+
- name: Checkout
25+
uses: actions/checkout@v4
26+
27+
- name: Setup Micromamba
28+
uses: mamba-org/setup-micromamba@v1
29+
with:
30+
environment-name: tfdv-build
31+
cache-environment: true
32+
create-args: >-
33+
python=${{ matrix.python-version }}
34+
35+
- name: Display environment info
36+
shell: bash -l {0}
37+
run: |
38+
micromamba info
39+
micromamba list
40+
41+
- name: Install Bazel
42+
shell: bash -l {0}
43+
run: |
44+
# Install Bazelisk (manages Bazel versions)
45+
if [ "$RUNNER_OS" == "Linux" ]; then
46+
curl -Lo /tmp/bazelisk https://github.com/bazelbuild/bazelisk/releases/download/v1.20.0/bazelisk-linux-amd64
47+
elif [ "$RUNNER_OS" == "macOS" ]; then
48+
curl -Lo /tmp/bazelisk https://github.com/bazelbuild/bazelisk/releases/download/v1.20.0/bazelisk-darwin-amd64
49+
fi
50+
chmod +x /tmp/bazelisk
51+
sudo mv /tmp/bazelisk /usr/local/bin/bazel
52+
echo "USE_BAZEL_VERSION=6.5.0" >> $GITHUB_ENV
53+
bazel --version
54+
55+
# Needed for Xcode 26.x in some CI environments; without this patch,
56+
# Bazel Apple toolchain may fail with an "LC_UUID missing" linker error.
57+
# Currently CI is passing, so keep this disabled. Uncomment if CI starts
58+
# failing with an "LC_UUID missing" error.
59+
# - name: Patch Apple CC toolchain (macOS only)
60+
# if: runner.os == 'macOS'
61+
# shell: bash -l {0}
62+
# run: bazel build //:patch_local_config_apple_cc
63+
64+
- name: Install build tooling
65+
shell: bash -l {0}
66+
run: |
67+
python -m pip install numpy~=1.22.0
68+
python -m pip install --upgrade pip build wheel "setuptools<69.3"
69+
70+
- name: Build the package
71+
shell: bash -l {0}
72+
run: |
73+
python -m build --wheel --no-isolation
74+
75+
- name: Upload wheel artifact
76+
uses: actions/upload-artifact@v4.4.0
77+
with:
78+
name: data-validation-wheel-${{ matrix.os }}-py${{ matrix.python-version }}
79+
path: dist/*.whl

BUILD

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,26 @@ licenses(["notice"])
88

99
exports_files(["LICENSE"])
1010

11+
sh_binary(
12+
name = "patch_wrapped_clang",
13+
srcs = ["patch_wrapped_clang.sh"],
14+
visibility = ["//visibility:public"],
15+
)
16+
17+
genrule(
18+
name = "patch_local_config_apple_cc",
19+
tools = ["//:patch_wrapped_clang"],
20+
outs = ["patch_local_config_apple_cc.stamp"],
21+
cmd = "$(execpath //:patch_wrapped_clang) && touch $@",
22+
tags = [
23+
"local",
24+
"no-cache",
25+
"no-remote",
26+
"no-sandbox",
27+
],
28+
target_compatible_with = ["@platforms//os:osx"],
29+
)
30+
1131
gazelle(
1232
name = "gazelle-update-repos",
1333
args = [

WORKSPACE

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ workspace(name = "tensorflow_data_validation")
22

33
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
44

5+
http_archive(
6+
name = "zlib",
7+
build_file = "@com_google_protobuf//:third_party/zlib.BUILD",
8+
sha256 = "17e88863f3600672ab49182f217281b6fc4d3c762bde361935e436a95214d05c",
9+
strip_prefix = "zlib-1.3.1",
10+
urls = ["https://github.com/madler/zlib/archive/v1.3.1.tar.gz"],
11+
)
12+
513
http_archive(
614
name = "google_bazel_common",
715
sha256 = "82a49fb27c01ad184db948747733159022f9464fc2e62da996fa700594d9ea42",
@@ -91,13 +99,11 @@ http_archive(
9199
urls = ["https://github.com/apache/arrow/archive/%s.zip" % ARROW_COMMIT],
92100
)
93101

94-
COM_GOOGLE_ABSL_COMMIT = "4447c7562e3bc702ade25105912dce503f0c4010" # lts_2023_08_0
95-
96102
http_archive(
97103
name = "com_google_absl",
98-
sha256 = "df8b3e0da03567badd9440377810c39a38ab3346fa89df077bb52e68e4d61e74",
99-
strip_prefix = "abseil-cpp-%s" % COM_GOOGLE_ABSL_COMMIT,
100-
url = "https://github.com/abseil/abseil-cpp/archive/%s.tar.gz" % COM_GOOGLE_ABSL_COMMIT,
104+
urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz"],
105+
strip_prefix = "abseil-cpp-20230802.1",
106+
sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed",
101107
)
102108

103109

patch_wrapped_clang.sh

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
5+
6+
find_wrapper_dir() {
7+
local -a candidates=()
8+
local cwd="$(pwd)"
9+
10+
candidates+=("${cwd}/external/local_config_cc")
11+
candidates+=("${cwd}/../external/local_config_cc")
12+
13+
candidates+=("${ROOT_DIR}/bazel-data-validation/external/local_config_cc")
14+
15+
if command -v bazel >/dev/null 2>&1; then
16+
local output_base
17+
if output_base="$(cd "${ROOT_DIR}" && bazel info output_base 2>/dev/null)"; then
18+
candidates+=("${output_base}/external/local_config_cc")
19+
fi
20+
fi
21+
22+
for dir in "${candidates[@]}"; do
23+
if [[ -d "${dir}" ]]; then
24+
echo "${dir}"
25+
return 0
26+
fi
27+
done
28+
29+
return 1
30+
}
31+
32+
write_wrapper() {
33+
local path="$1"
34+
cat >"${path}" <<EOF
35+
#!/bin/bash
36+
set -euo pipefail
37+
WRAPPER_DEVDIR="\${DEVELOPER_DIR:-\$(xcode-select -p)}"
38+
SDKROOT_PATH="\${SDKROOT:-\$(xcrun --sdk macosx --show-sdk-path)}"
39+
tool="clang"
40+
if [[ "\$(basename "\$0")" == "wrapped_clang_pp" ]]; then
41+
tool="clang++"
42+
fi
43+
args=()
44+
for arg in "\$@"; do
45+
if [[ "\$arg" == "DEBUG_PREFIX_MAP_PWD=." ]]; then
46+
args+=("-fdebug-prefix-map=\$(pwd)=.")
47+
continue
48+
fi
49+
arg="\${arg//__BAZEL_XCODE_DEVELOPER_DIR__/\${WRAPPER_DEVDIR}}"
50+
arg="\${arg//__BAZEL_XCODE_SDKROOT__/\${SDKROOT_PATH}}"
51+
args+=("\$arg")
52+
done
53+
exec /usr/bin/xcrun "\${tool}" "\${args[@]}"
54+
EOF
55+
chmod +x "${path}"
56+
}
57+
58+
write_libtool_check_unique() {
59+
local path="$1"
60+
cat >"${path}" <<'EOF'
61+
#!/bin/bash
62+
set -euo pipefail
63+
64+
TMP_INPUTS="$(mktemp "${TMPDIR:-/tmp}/libtool_unique.XXXXXX")"
65+
trap 'rm -f "$TMP_INPUTS"' EXIT
66+
67+
EXPECT_FILELIST=0
68+
69+
add_object() {
70+
local obj="$1"
71+
[[ -n "$obj" ]] || return 0
72+
basename "$obj" >>"$TMP_INPUTS"
73+
}
74+
75+
parse_token() {
76+
local token="$1"
77+
78+
if [[ "$EXPECT_FILELIST" == "1" ]]; then
79+
EXPECT_FILELIST=0
80+
if [[ -f "$token" ]]; then
81+
while IFS= read -r obj; do
82+
add_object "$obj"
83+
done <"$token"
84+
fi
85+
return 0
86+
fi
87+
88+
case "$token" in
89+
-filelist)
90+
EXPECT_FILELIST=1
91+
;;
92+
@*)
93+
local params_file="${token:1}"
94+
if [[ -f "$params_file" ]]; then
95+
while IFS= read -r opt; do
96+
parse_token "$opt"
97+
done <"$params_file"
98+
fi
99+
;;
100+
*.o)
101+
add_object "$token"
102+
;;
103+
esac
104+
}
105+
106+
for arg in "$@"; do
107+
parse_token "$arg"
108+
done
109+
110+
if sort "$TMP_INPUTS" | uniq -d | grep -q .; then
111+
exit 1
112+
fi
113+
114+
exit 0
115+
EOF
116+
chmod +x "${path}"
117+
}
118+
119+
main() {
120+
local wrapper_dir
121+
if ! wrapper_dir="$(find_wrapper_dir)"; then
122+
echo "Could not find local_config_cc wrapper directory." >&2
123+
echo "Run a Bazel build once, then rerun this script." >&2
124+
exit 1
125+
fi
126+
127+
local clang_wrapper="${wrapper_dir}/wrapped_clang"
128+
local clangpp_wrapper="${wrapper_dir}/wrapped_clang_pp"
129+
local libtool_check_unique="${wrapper_dir}/libtool_check_unique"
130+
131+
if [[ ! -e "${clang_wrapper}" || ! -e "${clangpp_wrapper}" || ! -e "${libtool_check_unique}" ]]; then
132+
echo "Missing wrapped_clang binaries under ${wrapper_dir}" >&2
133+
exit 1
134+
fi
135+
136+
cp -f "${clang_wrapper}" "${clang_wrapper}.bak"
137+
cp -f "${clangpp_wrapper}" "${clangpp_wrapper}.bak"
138+
cp -f "${libtool_check_unique}" "${libtool_check_unique}.bak"
139+
140+
write_wrapper "${clang_wrapper}"
141+
write_wrapper "${clangpp_wrapper}"
142+
write_libtool_check_unique "${libtool_check_unique}"
143+
144+
echo "Patched wrappers in: ${wrapper_dir}"
145+
echo "Backups saved as wrapped_clang.bak, wrapped_clang_pp.bak, libtool_check_unique.bak"
146+
}
147+
148+
main "$@"

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[build]
2+
build_base = _build

tensorflow_data_validation/build_macros.bzl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,15 @@ def tfdv_pybind_extension(
2424
prefix = name[:p + 1]
2525
so_file = "%s%s.so" % (prefix, sname)
2626
pyd_file = "%s%s.pyd" % (prefix, sname)
27-
exported_symbols = [
27+
# For macOS, only export PyInit_* (Python 3).
28+
# macOS linker requires all exported symbols to exist.
29+
exported_symbols_macos = [
30+
"PyInit_%s" % sname,
31+
]
32+
33+
# For Linux, include Python 2 symbols for compatibility
34+
# (version script allows undefined symbols).
35+
exported_symbols_linux = [
2836
"init%s" % sname,
2937
"init_%s" % sname,
3038
"PyInit_%s" % sname,
@@ -33,8 +41,8 @@ def tfdv_pybind_extension(
3341
exported_symbols_file = "%s-exported-symbols.lds" % name
3442
version_script_file = "%s-version-script.lds" % name
3543

36-
exported_symbols_output = "\n".join(["_%s" % symbol for symbol in exported_symbols])
37-
version_script_output = "\n".join([" %s;" % symbol for symbol in exported_symbols])
44+
exported_symbols_output = "\n".join(["_%s" % symbol for symbol in exported_symbols_macos])
45+
version_script_output = "\n".join([" %s;" % symbol for symbol in exported_symbols_linux])
3846

3947
native.genrule(
4048
name = name + "_exported_symbols",

0 commit comments

Comments
 (0)