From 64fecea85f835e32cfc336c586eb7cdc7964ddf8 Mon Sep 17 00:00:00 2001 From: pritishpai Date: Thu, 4 Jun 2026 15:57:07 -0400 Subject: [PATCH 1/3] Add _PAGE_SIZE constant to TableMapping to document the 10 MB workspace upload limit Co-authored-by: Isaac --- src/databricks/labs/ucx/hive_metastore/mapping.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index c87e7406e1..53b1c73efb 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -92,6 +92,8 @@ def __eq__(self, other): class TableMapping: FILENAME = 'mapping.csv' UCX_SKIP_PROPERTY = "databricks.labs.ucx.skip" + # Workspace import API has a 10 MB hard limit; 50 K rows × ~160 bytes ≈ 8 MB per page + _PAGE_SIZE = 50_000 def __init__( self, From e00af5af6e7a8d3efcb4bda0d89bab925f4d949f Mon Sep 17 00:00:00 2001 From: pritishpai Date: Thu, 4 Jun 2026 16:02:38 -0400 Subject: [PATCH 2/3] Split mapping.csv into pages when rules exceed the 10 MB workspace upload limit Co-authored-by: Isaac --- src/databricks/labs/ucx/hive_metastore/mapping.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index 53b1c73efb..c6c074a7ea 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -118,8 +118,13 @@ def current_tables(self, tables: TablesCrawler, workspace_name: str, catalog_nam def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: workspace_name = workspace_info.current() default_catalog_name = re.sub(r"\W+", "_", workspace_name) - current_tables = self.current_tables(tables, workspace_name, default_catalog_name) - return self._installation.save(list(current_tables), filename=self.FILENAME) + rules = list(self.current_tables(tables, workspace_name, default_catalog_name)) + pages = [rules[i : i + self._PAGE_SIZE] for i in range(0, len(rules), self._PAGE_SIZE)] + path = None + for i, page in enumerate(pages): + filename = self.FILENAME if i == 0 else f"mapping-{i}.csv" + path = self._installation.save(page, filename=filename) + return path or f"{self._installation.install_folder()}/{self.FILENAME}" def load(self) -> list[Rule]: try: From edd3071a757d44283b06221c9d6cf57db5195b84 Mon Sep 17 00:00:00 2001 From: pritishpai Date: Thu, 4 Jun 2026 16:03:13 -0400 Subject: [PATCH 3/3] Load all mapping pages by following mapping-1.csv, mapping-2.csv, ... until NotFound Co-authored-by: Isaac --- src/databricks/labs/ucx/hive_metastore/mapping.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index c6c074a7ea..df9af9b614 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -128,10 +128,18 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: def load(self) -> list[Rule]: try: - return self._installation.load(list[Rule], filename=self.FILENAME) + rules = self._installation.load(list[Rule], filename=self.FILENAME) except NotFound: msg = "Please run: databricks labs ucx create-table-mapping" raise ValueError(msg) from None + page = 1 + while True: + try: + rules += self._installation.load(list[Rule], filename=f"mapping-{page}.csv") + page += 1 + except NotFound: + break + return rules def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None]): # Marks a table to be skipped in the migration process by applying a table property