Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 36fd5bb

Browse files
committed
Merge remote-tracking branch 'origin/main' into load-data-function-7107946425247495497
2 parents 4838687 + 248c8ea commit 36fd5bb

96 files changed

Lines changed: 2602 additions & 410 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ tests/js/node_modules/
6464
pylintrc
6565
pylintrc.test
6666
dummy.pkl
67+
.mypy_cache/

.librarian/state.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
1+
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677
22
libraries:
33
- id: bigframes
4-
version: 2.32.0
4+
version: 2.33.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22)
8+
9+
10+
### Features
11+
12+
* add bigquery.ml.transform function (#2394) ([1f9ee373c1f1d0cd08b80169c3063b862ea46465](https://github.com/googleapis/python-bigquery-dataframes/commit/1f9ee373c1f1d0cd08b80169c3063b862ea46465))
13+
* Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) ([9c3bbc36983dffb265454f27b37450df8c5fbc71](https://github.com/googleapis/python-bigquery-dataframes/commit/9c3bbc36983dffb265454f27b37450df8c5fbc71))
14+
* Stabilize interactive table height to prevent notebook layout shifts (#2378) ([a634e976c0f44087ca2a65f68cf2775ae6f04024](https://github.com/googleapis/python-bigquery-dataframes/commit/a634e976c0f44087ca2a65f68cf2775ae6f04024))
15+
* Add max_columns control for anywidget mode (#2374) ([34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78](https://github.com/googleapis/python-bigquery-dataframes/commit/34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78))
16+
* Add dark mode to anywidget mode (#2365) ([2763b41d4b86939e389f76789f5b2acd44f18169](https://github.com/googleapis/python-bigquery-dataframes/commit/2763b41d4b86939e389f76789f5b2acd44f18169))
17+
* Configure Biome for Consistent Code Style (#2364) ([81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f](https://github.com/googleapis/python-bigquery-dataframes/commit/81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f))
18+
19+
20+
### Bug Fixes
21+
22+
* Throw if write api commit op has stream_errors (#2385) ([7abfef0598d476ef233364a01f72d73291983c30](https://github.com/googleapis/python-bigquery-dataframes/commit/7abfef0598d476ef233364a01f72d73291983c30))
23+
* implement retry logic for cloud function endpoint fetching (#2369) ([0f593c27bfee89fe1bdfc880504f9ab0ac28a24e](https://github.com/googleapis/python-bigquery-dataframes/commit/0f593c27bfee89fe1bdfc880504f9ab0ac28a24e))
24+
725
## [2.32.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.31.0...bigframes-v2.32.0) (2026-01-05)
826

927

bigframes/bigquery/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import sys
2020

21-
from bigframes.bigquery import ai, ml
21+
from bigframes.bigquery import ai, ml, obj
2222
from bigframes.bigquery._operations.approx_agg import approx_top_count
2323
from bigframes.bigquery._operations.array import (
2424
array_agg,
@@ -61,6 +61,7 @@
6161
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6262
from bigframes.bigquery._operations.sql import sql_scalar
6363
from bigframes.bigquery._operations.struct import struct
64+
from bigframes.bigquery.table import create_external_table
6465
from bigframes.core.logging import log_adapter
6566

6667
_functions = [
@@ -107,6 +108,8 @@
107108
sql_scalar,
108109
# struct ops
109110
struct,
111+
# table ops
112+
create_external_table,
110113
]
111114

112115
_module = sys.modules[__name__]
@@ -160,7 +163,10 @@
160163
"sql_scalar",
161164
# struct ops
162165
"struct",
166+
# table ops
167+
"create_external_table",
163168
# Modules / SQL namespaces
164169
"ai",
165170
"ml",
171+
"obj",
166172
]

bigframes/bigquery/_operations/ai.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ def generate(
5858
>>> import bigframes.pandas as bpd
5959
>>> import bigframes.bigquery as bbq
6060
>>> country = bpd.Series(["Japan", "Canada"])
61-
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only"))
62-
0 {'result': 'Tokyo\\n', 'full_response': '{"cand...
63-
1 {'result': 'Ottawa\\n', 'full_response': '{"can...
61+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +SKIP
62+
0 {'result': 'Tokyo', 'full_response': '{"cand...
63+
1 {'result': 'Ottawa', 'full_response': '{"can...
6464
dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
6565
66-
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result")
67-
0 Tokyo\\n
68-
1 Ottawa\\n
66+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") # doctest: +SKIP
67+
0 Tokyo
68+
1 Ottawa
6969
Name: result, dtype: string
7070
7171
You get structured output when the `output_schema` parameter is set:

bigframes/bigquery/_operations/ml.py

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import cast, Mapping, Optional, Union
17+
from typing import cast, List, Mapping, Optional, Union
1818

1919
import bigframes_vendored.constants
2020
import google.cloud.bigquery
@@ -393,3 +393,130 @@ def global_explain(
393393
return bpd.read_gbq_query(sql)
394394
else:
395395
return session.read_gbq_query(sql)
396+
397+
398+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
399+
def transform(
400+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
401+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
402+
) -> dataframe.DataFrame:
403+
"""
404+
Transforms input data using a BigQuery ML model.
405+
406+
See the `BigQuery ML TRANSFORM function syntax
407+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform>`_
408+
for additional reference.
409+
410+
Args:
411+
model (bigframes.ml.base.BaseEstimator or str):
412+
The model to use for transformation.
413+
input_ (Union[bigframes.pandas.DataFrame, str]):
414+
The DataFrame or query to use for transformation.
415+
416+
Returns:
417+
bigframes.pandas.DataFrame:
418+
The transformed data.
419+
"""
420+
import bigframes.pandas as bpd
421+
422+
model_name, session = _get_model_name_and_session(model, input_)
423+
table_sql = _to_sql(input_)
424+
425+
sql = bigframes.core.sql.ml.transform(
426+
model_name=model_name,
427+
table=table_sql,
428+
)
429+
430+
if session is None:
431+
return bpd.read_gbq_query(sql)
432+
else:
433+
return session.read_gbq_query(sql)
434+
435+
436+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
437+
def generate_text(
438+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
439+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
440+
*,
441+
temperature: Optional[float] = None,
442+
max_output_tokens: Optional[int] = None,
443+
top_k: Optional[int] = None,
444+
top_p: Optional[float] = None,
445+
flatten_json_output: Optional[bool] = None,
446+
stop_sequences: Optional[List[str]] = None,
447+
ground_with_google_search: Optional[bool] = None,
448+
request_type: Optional[str] = None,
449+
) -> dataframe.DataFrame:
450+
"""
451+
Generates text using a BigQuery ML model.
452+
453+
See the `BigQuery ML GENERATE_TEXT function syntax
454+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-text>`_
455+
for additional reference.
456+
457+
Args:
458+
model (bigframes.ml.base.BaseEstimator or str):
459+
The model to use for text generation.
460+
input_ (Union[bigframes.pandas.DataFrame, str]):
461+
The DataFrame or query to use for text generation.
462+
temperature (float, optional):
463+
A FLOAT64 value that is used for sampling promiscuity. The value
464+
must be in the range ``[0.0, 1.0]``. A lower temperature works well
465+
for prompts that expect a more deterministic and less open-ended
466+
or creative response, while a higher temperature can lead to more
467+
diverse or creative results. A temperature of ``0`` is
468+
deterministic, meaning that the highest probability response is
469+
always selected.
470+
max_output_tokens (int, optional):
471+
An INT64 value that sets the maximum number of tokens in the
472+
generated text.
473+
top_k (int, optional):
474+
An INT64 value that changes how the model selects tokens for
475+
output. A ``top_k`` of ``1`` means the next selected token is the
476+
most probable among all tokens in the model's vocabulary. A
477+
``top_k`` of ``3`` means that the next token is selected from
478+
among the three most probable tokens by using temperature. The
479+
default value is ``40``.
480+
top_p (float, optional):
481+
A FLOAT64 value that changes how the model selects tokens for
482+
output. Tokens are selected from most probable to least probable
483+
until the sum of their probabilities equals the ``top_p`` value.
484+
For example, if tokens A, B, and C have a probability of 0.3, 0.2,
485+
and 0.1 and the ``top_p`` value is ``0.5``, then the model will
486+
select either A or B as the next token by using temperature. The
487+
default value is ``0.95``.
488+
flatten_json_output (bool, optional):
489+
A BOOL value that determines the content of the generated JSON column.
490+
stop_sequences (List[str], optional):
491+
An ARRAY<STRING> value that contains the stop sequences for the model.
492+
ground_with_google_search (bool, optional):
493+
A BOOL value that determines whether to ground the model with Google Search.
494+
request_type (str, optional):
495+
A STRING value that contains the request type for the model.
496+
497+
Returns:
498+
bigframes.pandas.DataFrame:
499+
The generated text.
500+
"""
501+
import bigframes.pandas as bpd
502+
503+
model_name, session = _get_model_name_and_session(model, input_)
504+
table_sql = _to_sql(input_)
505+
506+
sql = bigframes.core.sql.ml.generate_text(
507+
model_name=model_name,
508+
table=table_sql,
509+
temperature=temperature,
510+
max_output_tokens=max_output_tokens,
511+
top_k=top_k,
512+
top_p=top_p,
513+
flatten_json_output=flatten_json_output,
514+
stop_sequences=stop_sequences,
515+
ground_with_google_search=ground_with_google_search,
516+
request_type=request_type,
517+
)
518+
519+
if session is None:
520+
return bpd.read_gbq_query(sql)
521+
else:
522+
return session.read_gbq_query(sql)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
"""This module exposes BigQuery ObjectRef functions.
17+
18+
See bigframes.bigquery.obj for public docs.
19+
"""
20+
21+
22+
from __future__ import annotations
23+
24+
import datetime
25+
from typing import Optional, Sequence, Union
26+
27+
import numpy as np
28+
import pandas as pd
29+
30+
from bigframes.core import convert
31+
from bigframes.core.logging import log_adapter
32+
import bigframes.core.utils as utils
33+
import bigframes.operations as ops
34+
import bigframes.series as series
35+
36+
37+
@log_adapter.method_logger(custom_base_name="bigquery_obj")
38+
def fetch_metadata(
39+
objectref: series.Series,
40+
) -> series.Series:
41+
"""[Preview] The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value.
42+
43+
Args:
44+
objectref (bigframes.pandas.Series):
45+
A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't.
46+
47+
Returns:
48+
bigframes.pandas.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value.
49+
"""
50+
objectref = convert.to_bf_series(objectref, default_index=None)
51+
return objectref._apply_unary_op(ops.obj_fetch_metadata_op)
52+
53+
54+
@log_adapter.method_logger(custom_base_name="bigquery_obj")
55+
def get_access_url(
56+
objectref: series.Series,
57+
mode: str,
58+
duration: Optional[Union[datetime.timedelta, pd.Timedelta, np.timedelta64]] = None,
59+
) -> series.Series:
60+
"""[Preview] The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object.
61+
62+
Args:
63+
objectref (bigframes.pandas.Series):
64+
An ObjectRef value that represents a Cloud Storage object.
65+
mode (str):
66+
A STRING value that identifies the type of URL that you want to be returned. The following values are supported:
67+
'r': Returns a URL that lets you read the object.
68+
'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object.
69+
duration (Union[datetime.timedelta, pandas.Timedelta, numpy.timedelta64], optional):
70+
An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours.
71+
72+
Returns:
73+
bigframes.pandas.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object.
74+
"""
75+
objectref = convert.to_bf_series(objectref, default_index=None)
76+
77+
duration_micros = None
78+
if duration is not None:
79+
duration_micros = utils.timedelta_to_micros(duration)
80+
81+
return objectref._apply_unary_op(
82+
ops.ObjGetAccessUrl(mode=mode, duration=duration_micros)
83+
)
84+
85+
86+
@log_adapter.method_logger(custom_base_name="bigquery_obj")
87+
def make_ref(
88+
uri_or_json: Union[series.Series, Sequence[str]],
89+
authorizer: Union[series.Series, str, None] = None,
90+
) -> series.Series:
91+
"""[Preview] Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object.
92+
93+
Args:
94+
uri_or_json (bigframes.pandas.Series or str):
95+
A series of STRING values that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg.
96+
OR
97+
A series of JSON value that represents a Cloud Storage object.
98+
authorizer (bigframes.pandas.Series or str, optional):
99+
A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object.
100+
Required if ``uri_or_json`` is a URI string.
101+
102+
Returns:
103+
bigframes.pandas.Series: An ObjectRef value.
104+
"""
105+
uri_or_json = convert.to_bf_series(uri_or_json, default_index=None)
106+
107+
if authorizer is not None:
108+
# Avoid join problems encountered if we try to convert a literal into Series.
109+
if not isinstance(authorizer, str):
110+
authorizer = convert.to_bf_series(authorizer, default_index=None)
111+
112+
return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op)
113+
114+
# If authorizer is not provided, we assume uri_or_json is a JSON objectref
115+
return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op)

bigframes/bigquery/ml.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
create_model,
2424
evaluate,
2525
explain_predict,
26+
generate_text,
2627
global_explain,
2728
predict,
29+
transform,
2830
)
2931

3032
__all__ = [
@@ -33,4 +35,6 @@
3335
"predict",
3436
"explain_predict",
3537
"global_explain",
38+
"transform",
39+
"generate_text",
3640
]

0 commit comments

Comments
 (0)