3737logger = logging .getLogger (__name__ )
3838
3939
40+ NestedField = t .Tuple [str , str , t .List [str ]]
41+ NestedFieldsDict = t .Dict [str , t .List [NestedField ]]
42+
43+
4044@set_catalog ()
4145class BigQueryEngineAdapter (InsertOverwriteWithMergeMixin , ClusteredByMixin ):
4246 """
@@ -282,11 +286,11 @@ def fetchall(
282286 def _split_alter_expressions (
283287 self ,
284288 alter_expressions : t .List [exp .Alter ],
285- ) -> t .Tuple [t . Dict [ str , list [ t . Tuple [ str , str ]]] , t .List [exp .Alter ]]:
289+ ) -> t .Tuple [NestedFieldsDict , t .List [exp .Alter ]]:
286290 """
287291 Returns a dictionary of the nested fields to add and a list of the non-nested alter expressions.
288292 """
289- nested_fields_to_add : t . Dict [ str , list [ t . Tuple [ str , str ]]] = defaultdict (list )
293+ nested_fields_to_add : NestedFieldsDict = defaultdict (list )
290294 non_nested_expressions = []
291295
292296 for alter_expression in alter_expressions :
@@ -296,17 +300,58 @@ def _split_alter_expressions(
296300 and isinstance (action .this , exp .Dot )
297301 and isinstance (action .kind , exp .DataType )
298302 ):
299- record = action .this .this .sql (dialect = "bigquery " )
300- field = action .this .expression .sql (dialect = "bigquery" )
301- data_type = action .kind .sql (dialect = "bigquery" )
302- nested_fields_to_add [record ].append ((data_type , field ))
303+ root_field , * leaf_fields = action .this .this .sql (dialect = self . dialect ). split ( ". " )
304+ new_field = action .this .expression .sql (dialect = self . dialect )
305+ data_type = action .kind .sql (dialect = self . dialect )
306+ nested_fields_to_add [root_field ].append ((new_field , data_type , leaf_fields ))
303307 else :
304308 non_nested_expressions .append (alter_expression )
305309
306310 return nested_fields_to_add , non_nested_expressions
307311
312+ def _build_nested_fields (
313+ self ,
314+ current_fields : t .List [bigquery .SchemaField ],
315+ fields_to_add : t .List [NestedField ],
316+ ) -> t .List [bigquery .SchemaField ]:
317+ """
318+ Recursively builds and updates the schema fields with the new nested fields.
319+ """
320+ from google .cloud import bigquery
321+
322+ new_fields = []
323+ root : t .List [t .Tuple [str , str ]] = []
324+ leaves : NestedFieldsDict = defaultdict (list )
325+ for new_field , data_type , leaf_fields in fields_to_add :
326+ if leaf_fields :
327+ leaves [leaf_fields [0 ]].append ((new_field , data_type , leaf_fields [1 :]))
328+ else :
329+ root .append ((new_field , data_type ))
330+
331+ for field in current_fields :
332+ # If the new fields are nested, we need to recursively build them
333+ if field .name in leaves :
334+ subfields = list (field .fields )
335+ subfields = self ._build_nested_fields (subfields , leaves [field .name ])
336+ new_fields .append (
337+ bigquery .SchemaField (
338+ field .name , "RECORD" , mode = field .mode , fields = tuple (subfields )
339+ )
340+ )
341+ else :
342+ new_fields .append (field )
343+
344+ # Build and append the new root-level fields
345+ new_fields .extend (
346+ self .__get_bq_schemafield (
347+ new_field [0 ], exp .DataType .build (new_field [1 ], dialect = self .dialect )
348+ )
349+ for new_field in root
350+ )
351+ return new_fields
352+
308353 def _update_table_schema_nested_fields (
309- self , nested_fields_to_add : t . Dict [ str , list [ t . Tuple [ str , str ]]] , table_name : str
354+ self , nested_fields_to_add : NestedFieldsDict , table_name : str
310355 ) -> None :
311356 """
312357 Updates a BigQuery table schema by adding the new nested fields provided.
@@ -316,18 +361,17 @@ def _update_table_schema_nested_fields(
316361 table = self ._get_table (table_name )
317362 original_schema = table .schema
318363 new_schema = []
319-
320364 for field in original_schema :
321- current_fields = list (field .fields )
322365 if field .name in nested_fields_to_add :
323- current_fields .extend (
324- bigquery .SchemaField (new_field [1 ], new_field [0 ], mode = "NULLABLE" )
325- for new_field in nested_fields_to_add [field .name ]
366+ fields = self ._build_nested_fields (
367+ list (field .fields ), nested_fields_to_add [field .name ]
326368 )
327- if current_fields :
328369 new_schema .append (
329370 bigquery .SchemaField (
330- field .name , "RECORD" , mode = "NULLABLE" , fields = tuple (current_fields )
371+ field .name ,
372+ "RECORD" ,
373+ mode = field .mode ,
374+ fields = tuple (fields ),
331375 )
332376 )
333377 else :
0 commit comments