diff --git a/.bazelrc b/.bazelrc index b177c125..ac237360 100644 --- a/.bazelrc +++ b/.bazelrc @@ -11,3 +11,11 @@ build --host_cxxopt=-std=c++17 build --incompatible_require_linker_input_cc_api=false build:macos --apple_platform_type=macos build:macos_arm64 --cpu=darwin_arm64 + +# Avoid Apple ld LTO library mismatch by disabling ThinLTO on macOS +build:macos --features=-thin_lto +build:macos_arm64 --features=-thin_lto + +# Enable HAVE_UNISTD_H for zlib on macOS to prevent fdopen macro conflict +build:macos --define=HAVE_UNISTD_H=1 +build:macos_arm64 --define=HAVE_UNISTD_H=1 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9d9ac25c..86495e92 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,6 +23,7 @@ on: - master release: types: [published] + workflow_dispatch: jobs: build_wheels: @@ -72,7 +73,7 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1.12 with: packages_dir: wheels/ - repository_url: https://pypi.org/legacy/ + repository_url: https://upload.pypi.org/legacy/ # already checked, and the pkginfo/twine versions on this runner causes check to fail verify-metadata: true verbose: true diff --git a/WORKSPACE b/WORKSPACE index f14fb3f5..80b0fb1a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,6 +2,17 @@ workspace(name = "tfx_bsl") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +# Patch zlib for macOS compatibility (must come before org_tensorflow_no_deps loads zlib) +http_archive( + name = "zlib", + build_file = "@com_google_protobuf//:third_party/zlib.BUILD", + sha256 = "17e88863f3600672ab49182f217281b6fc4d3c762bde361935e436a95214d05c", + strip_prefix = "zlib-1.3.1", + urls = [ + "https://github.com/madler/zlib/archive/v1.3.1.tar.gz", + ], +) + http_archive( name = "google_bazel_common", sha256 = "82a49fb27c01ad184db948747733159022f9464fc2e62da996fa700594d9ea42", diff --git a/pyproject.toml b/pyproject.toml index f8f41aba..e45e7c65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,10 +142,11 @@ before-test="rm {project}/bazel-*" test-command="pytest {project}" [tool.cibuildwheel.linux] -manylinux-x86_64-image = "manylinux2014" +manylinux-x86_64-image = "manylinux_2_28" archs=["x86_64"] -before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +before-build = "yum install -y npm && npm install -g @bazel/bazelisk && yum install -y hdf5-devel" [tool.cibuildwheel.macos] archs = ["arm64"] +before-build = "brew install hdf5" diff --git a/setup.py b/setup.py index a47d53a6..e600febe 100644 --- a/setup.py +++ b/setup.py @@ -173,9 +173,10 @@ def select_constraint(default, nightly=None, git_master=None): "absl-py>=0.9,<2.0.0", 'apache-beam[gcp]>=2.53,<3;python_version>="3.11"', 'apache-beam[gcp]>=2.50,<2.51;python_version<"3.11"', + "dill>=0.3.1,<1.0.0", "google-api-python-client>=1.7.11,<2", "numpy", - "pandas>=1.0,<2", + "pandas", 'protobuf>=4.25.2,<6.0.0;python_version>="3.11"', 'protobuf>=4.21.6,<6.0.0;python_version<"3.11"', 'pyarrow>=10,<11;python_version<"3.11"', @@ -189,8 +190,8 @@ def select_constraint(default, nightly=None, git_master=None): ), "tensorflow-serving-api" + select_constraint( - default=">=2.13.0,<3", - nightly=">=2.13.0.dev", + default=">=2.17.1,<3", + nightly=">=2.17.1.dev", git_master="@git+https://github.com/tensorflow/serving@master", ), ], diff --git a/tfx_bsl/build_macros.bzl b/tfx_bsl/build_macros.bzl index 2f42b4df..27c551b9 100644 --- a/tfx_bsl/build_macros.bzl +++ b/tfx_bsl/build_macros.bzl @@ -75,9 +75,8 @@ def tfx_bsl_pybind_extension( prefix = name[:p + 1] so_file = "%s%s.so" % (prefix, sname) pyd_file = "%s%s.pyd" % (prefix, sname) + # For Python 3, the module init symbol is PyInit_ exported_symbols = [ - "init%s" % sname, - "init_%s" % sname, "PyInit_%s" % sname, ] diff --git a/tfx_bsl/cc/sketches/misragries_sketch.h b/tfx_bsl/cc/sketches/misragries_sketch.h index 7d25daac..b19547d6 100644 --- a/tfx_bsl/cc/sketches/misragries_sketch.h +++ b/tfx_bsl/cc/sketches/misragries_sketch.h @@ -24,6 +24,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" diff --git a/tfx_bsl/cc/util/status_util.h b/tfx_bsl/cc/util/status_util.h index b1f32f69..166c16b7 100644 --- a/tfx_bsl/cc/util/status_util.h +++ b/tfx_bsl/cc/util/status_util.h @@ -17,6 +17,7 @@ #include "absl/base/optimization.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "arrow/api.h" namespace tfx_bsl { diff --git a/tfx_bsl/coders/csv_decoder_test.py b/tfx_bsl/coders/csv_decoder_test.py index 17b2ef05..3a6232e8 100644 --- a/tfx_bsl/coders/csv_decoder_test.py +++ b/tfx_bsl/coders/csv_decoder_test.py @@ -802,21 +802,21 @@ def test_invalid_row(self): input_lines = ["1,2.0,hello", "5,12.34"] column_names = ["int_feature", "float_feature", "str_feature"] with self.assertRaisesRegex( # pylint: disable=g-error-prone-assert-raises - ValueError, ".*Columns do not match specified csv headers.*" + (ValueError, RuntimeError), ".*Columns do not match specified csv headers.*" ): - with beam.Pipeline() as p: - result = ( - p - | beam.Create(input_lines, reshuffle=False) - | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=",")) - | beam.Keys() - | beam.CombineGlobally( - csv_decoder.ColumnTypeInferrer( - column_names, skip_blank_lines=False - ) - ) + p = beam.Pipeline() + result = ( + p + | beam.Create(input_lines, reshuffle=False) + | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=",")) + | beam.Keys() + | beam.CombineGlobally( + csv_decoder.ColumnTypeInferrer(column_names, skip_blank_lines=False) ) - beam_test_util.assert_that(result, lambda _: None) + ) + beam_test_util.assert_that(result, lambda _: None) + pipeline_result = p.run() + pipeline_result.wait_until_finish() def test_invalid_schema_type(self): input_lines = ["1"] diff --git a/tfx_bsl/telemetry/collection_test.py b/tfx_bsl/telemetry/collection_test.py index b8342349..e237cb11 100644 --- a/tfx_bsl/telemetry/collection_test.py +++ b/tfx_bsl/telemetry/collection_test.py @@ -29,14 +29,14 @@ def testTrackRecordBatchBytes(self): ) expected_num_bytes = inputs.nbytes - with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p: - _ = ( - p - | beam.Create([inputs]) - | collection.TrackRecordBatchBytes("TestNamespace", "num_bytes_count") - ) - + p = beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) + _ = ( + p + | beam.Create([inputs]) + | collection.TrackRecordBatchBytes("TestNamespace", "num_bytes_count") + ) pipeline_result = p.run() + pipeline_result.wait_until_finish() result_metrics = pipeline_result.metrics() actual_counter = result_metrics.query( beam.metrics.metric.MetricsFilter().with_name("num_bytes_count") @@ -74,16 +74,14 @@ def testTrackRecordTensorRepresentations(self): "ragged_tensor": num_ragged_tensors, } - with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p: - _ = ( - p - | beam.Create([tensor_representations]) - | collection.TrackTensorRepresentations( - counter_namespace="TestNamespace" - ) - ) - + p = beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) + _ = ( + p + | beam.Create([tensor_representations]) + | collection.TrackTensorRepresentations(counter_namespace="TestNamespace") + ) pipeline_result = p.run() + pipeline_result.wait_until_finish() result_metrics = pipeline_result.metrics() for kind, expected_count in expected_counters.items(): actual_counter = result_metrics.query(