5151import bigframes .core .utils as utils
5252import bigframes .core .window_spec as window_specs
5353import bigframes .dtypes
54+ import bigframes .exceptions
5455import bigframes .features
5556import bigframes .operations as ops
5657import bigframes .operations .aggregations as agg_ops
@@ -120,19 +121,11 @@ def __init__(
120121 f"'index_columns' (size { len (index_columns )} ) and 'index_labels' (size { len (index_labels )} ) must have equal length"
121122 )
122123
123- # If no index columns are set, create one.
124- #
125- # Note: get_index_cols in
126- # bigframes/session/_io/bigquery/read_gbq_table.py depends on this
127- # being as sequential integer index column. If this default behavior
128- # ever changes, please also update get_index_cols so
129- # that users who explicitly request a sequential integer index can
130- # still get one.
131124 if len (index_columns ) == 0 :
132- new_index_col_id = guid . generate_guid ()
133- expr = expr . promote_offsets ( new_index_col_id )
134- index_columns = [ new_index_col_id ]
135-
125+ warnings . warn (
126+ "Creating object with Null Index. Null Index is a preview feature." ,
127+ category = bigframes . exceptions . PreviewWarning ,
128+ )
136129 self ._index_columns = tuple (index_columns )
137130 # Index labels don't need complicated hierarchical access so can store as tuple
138131 self ._index_labels = (
@@ -517,7 +510,8 @@ def _copy_index_to_pandas(self, df: pd.DataFrame):
517510
518511 Warning: This method modifies ``df`` inplace.
519512 """
520- if self .index_columns :
513+ # Note: If BigQuery DataFrame has null index, a default one will be created for the local materialization.
514+ if len (self .index_columns ) > 0 :
521515 df .set_index (list (self .index_columns ), inplace = True )
522516 # Pandas names is annotated as list[str] rather than the more
523517 # general Sequence[Label] that BigQuery DataFrames has.
@@ -1093,16 +1087,25 @@ def aggregate(
10931087 aggregate_labels = self ._get_labels_for_columns (
10941088 [agg [0 ] for agg in aggregations ]
10951089 )
1090+
10961091 names : typing .List [Label ] = []
1097- for by_col_id in by_column_ids :
1098- if by_col_id in self .value_columns :
1099- names .append (self .col_id_to_label [by_col_id ])
1100- else :
1101- names .append (self .col_id_to_index_name [by_col_id ])
1092+ if len (by_column_ids ) == 0 :
1093+ label_id = guid .generate_guid ()
1094+ result_expr = result_expr .assign_constant (label_id , 0 , pd .Int64Dtype ())
1095+ index_columns = (label_id ,)
1096+ names = [None ]
1097+ else :
1098+ index_columns = tuple (by_column_ids ) # type: ignore
1099+ for by_col_id in by_column_ids :
1100+ if by_col_id in self .value_columns :
1101+ names .append (self .col_id_to_label [by_col_id ])
1102+ else :
1103+ names .append (self .col_id_to_index_name [by_col_id ])
1104+
11021105 return (
11031106 Block (
11041107 result_expr ,
1105- index_columns = by_column_ids ,
1108+ index_columns = index_columns ,
11061109 column_labels = aggregate_labels ,
11071110 index_labels = names ,
11081111 ),
@@ -1256,11 +1259,12 @@ def explode(
12561259 expr = self .expr .explode (column_ids )
12571260
12581261 if ignore_index :
1262+ new_index_ids = guid .generate_guid ()
12591263 return Block (
1260- expr .drop_columns (self .index_columns ),
1264+ expr .drop_columns (self .index_columns ). promote_offsets ( new_index_ids ) ,
12611265 column_labels = self .column_labels ,
12621266 # Initiates default index creation using the block constructor.
1263- index_columns = [],
1267+ index_columns = [new_index_ids ],
12641268 )
12651269 else :
12661270 return Block (
@@ -1423,7 +1427,8 @@ def retrieve_repr_request_results(
14231427 computed_df , query_job = head_block .to_pandas ()
14241428 formatted_df = computed_df .set_axis (self .column_labels , axis = 1 )
14251429 # we reset the axis and substitute the bf index name(s) for the default
1426- formatted_df .index .names = self .index .names # type: ignore
1430+ if len (self .index .names ) > 0 :
1431+ formatted_df .index .names = self .index .names # type: ignore
14271432 return formatted_df , count , query_job
14281433
14291434 def promote_offsets (self , label : Label = None ) -> typing .Tuple [Block , str ]:
@@ -1907,9 +1912,26 @@ def join(
19071912 other : Block ,
19081913 * ,
19091914 how = "left" ,
1910- sort = False ,
1915+ sort : bool = False ,
19111916 block_identity_join : bool = False ,
19121917 ) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
1918+ """
1919+ Join two blocks objects together, and provide mappings between source columns and output columns.
1920+
1921+ Args:
1922+ other (Block):
1923+ The right operand of the join operation
1924+ how (str):
1925+ Describes the join type. 'inner', 'outer', 'left', or 'right'
1926+ sort (bool):
1927+ if true will sort result by index
1928+ block_identity_join (bool):
1929+ If true, will not convert join to a projection (implicitly assuming unique indices)
1930+
1931+ Returns:
1932+ Block, (left_mapping, right_mapping): Result block and mappers from input column ids to result column ids.
1933+ """
1934+
19131935 if not isinstance (other , Block ):
19141936 # TODO(swast): We need to improve this error message to be more
19151937 # actionable for the user. For example, it's possible they
@@ -1923,6 +1945,16 @@ def join(
19231945 raise NotImplementedError (
19241946 f"Only how='outer','left','right','inner' currently supported. { constants .FEEDBACK_LINK } "
19251947 )
1948+ # Special case for null index,
1949+ if (
1950+ (self .index .nlevels == other .index .nlevels == 0 )
1951+ and not sort
1952+ and not block_identity_join
1953+ ):
1954+ return join_indexless (self , other , how = how )
1955+
1956+ self ._throw_if_null_index ("join" )
1957+ other ._throw_if_null_index ("join" )
19261958 if self .index .nlevels == other .index .nlevels == 1 :
19271959 return join_mono_indexed (
19281960 self , other , how = how , sort = sort , block_identity_join = block_identity_join
@@ -2071,6 +2103,12 @@ def _is_monotonic(
20712103 self ._stats_cache [column_name ].update ({op_name : result })
20722104 return result
20732105
2106+ def _throw_if_null_index (self , opname : str ):
2107+ if len (self .index_columns ) == 0 :
2108+ raise bigframes .exceptions .NullIndexError (
2109+ f"Cannot do { opname } without an index. Set an index using set_index."
2110+ )
2111+
20742112 def _get_rows_as_json_values (self ) -> Block :
20752113 # We want to preserve any ordering currently present before turning to
20762114 # direct SQL manipulation. We will restore the ordering when we rebuild
@@ -2211,6 +2249,10 @@ def __repr__(self) -> str:
22112249
22122250 def to_pandas (self ) -> pd .Index :
22132251 """Executes deferred operations and downloads the results."""
2252+ if len (self .column_ids ) == 0 :
2253+ raise bigframes .exceptions .NullIndexError (
2254+ "Cannot materialize index, as this object does not have an index. Set index column(s) using set_index."
2255+ )
22142256 # Project down to only the index column. So the query can be cached to visualize other data.
22152257 index_columns = list (self ._block .index_columns )
22162258 dtypes = dict (zip (index_columns , self .dtypes ))
@@ -2252,6 +2294,53 @@ def is_uniquely_named(self: BlockIndexProperties):
22522294 return len (set (self .names )) == len (self .names )
22532295
22542296
2297+ def join_indexless (
2298+ left : Block ,
2299+ right : Block ,
2300+ * ,
2301+ how = "left" ,
2302+ ) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2303+ """Joins two blocks"""
2304+ left_expr = left .expr
2305+ right_expr = right .expr
2306+ left_mappings = [
2307+ join_defs .JoinColumnMapping (
2308+ source_table = join_defs .JoinSide .LEFT ,
2309+ source_id = id ,
2310+ destination_id = guid .generate_guid (),
2311+ )
2312+ for id in left_expr .column_ids
2313+ ]
2314+ right_mappings = [
2315+ join_defs .JoinColumnMapping (
2316+ source_table = join_defs .JoinSide .RIGHT ,
2317+ source_id = id ,
2318+ destination_id = guid .generate_guid (),
2319+ )
2320+ for id in right_expr .column_ids
2321+ ]
2322+ combined_expr = left_expr .try_align_as_projection (
2323+ right_expr ,
2324+ join_type = how ,
2325+ mappings = (* left_mappings , * right_mappings ),
2326+ )
2327+ if combined_expr is None :
2328+ raise bigframes .exceptions .NullIndexError (
2329+ "Cannot implicitly align objects. Set an explicit index using set_index."
2330+ )
2331+ get_column_left = {m .source_id : m .destination_id for m in left_mappings }
2332+ get_column_right = {m .source_id : m .destination_id for m in right_mappings }
2333+ block = Block (
2334+ combined_expr ,
2335+ column_labels = [* left .column_labels , * right .column_labels ],
2336+ index_columns = (),
2337+ )
2338+ return (
2339+ block ,
2340+ (get_column_left , get_column_right ),
2341+ )
2342+
2343+
22552344def join_mono_indexed (
22562345 left : Block ,
22572346 right : Block ,
0 commit comments