From 3ccb3ceb005eac30a4a1842f048ee9c604b67472 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Mon, 18 May 2026 16:01:30 +0000
Subject: [PATCH 01/20] feat: add `bigframes.bigquery.aead.*` scalar functions

---
 .../scripts/data/sql-functions/aead.yaml      | 216 ++++++++++++++++++
 1 file changed, 216 insertions(+)
 create mode 100644 packages/bigframes/scripts/data/sql-functions/aead.yaml

diff --git a/packages/bigframes/scripts/data/sql-functions/aead.yaml b/packages/bigframes/scripts/data/sql-functions/aead.yaml
new file mode 100644
index 000000000000..ba5dc1fb3af9
--- /dev/null
+++ b/packages/bigframes/scripts/data/sql-functions/aead.yaml
@@ -0,0 +1,216 @@
+urn: extension:google:bq_scalar_functions
+scalar_functions:
+  - name: "aead.decrypt_bytes"
+    description: "GoogleSQL scalar function aead.decrypt_bytes"
+    impls:
+      # Signature: aead.decrypt_bytes:vbin_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary
+      # Signature: aead.decrypt_bytes:struct_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: struct
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary
+  - name: "aead.decrypt_string"
+    description: "GoogleSQL scalar function aead.decrypt_string"
+    impls:
+      # Signature: aead.decrypt_string:vbin_vbin_str
+      - args:
+          - name: "arg0"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: string
+            optional: false
+            keyword_only: false
+        return: string
+      # Signature: aead.decrypt_string:struct_vbin_str
+      - args:
+          - name: "arg0"
+            value: struct
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: string
+            optional: false
+            keyword_only: false
+        return: string
+  - name: "aead.encrypt"
+    description: "GoogleSQL scalar function aead.encrypt"
+    impls:
+      # Signature: aead.encrypt:vbin_str_str
+      - args:
+          - name: "arg0"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: string
+            optional: false
+            keyword_only: false
+        return: binary
+      # Signature: aead.encrypt:vbin_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary
+      # Signature: aead.encrypt:struct_str_str
+      - args:
+          - name: "arg0"
+            value: struct
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: string
+            optional: false
+            keyword_only: false
+        return: binary
+      # Signature: aead.encrypt:struct_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: struct
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary
+  - name: "aead.envelope_decrypt_bytes"
+    description: "GoogleSQL scalar function aead.envelope_decrypt_bytes"
+    impls:
+      # Signature: aead.envelope_decrypt_bytes:str_vbin_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg3"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary
+  - name: "aead.envelope_decrypt_string"
+    description: "GoogleSQL scalar function aead.envelope_decrypt_string"
+    impls:
+      # Signature: aead.envelope_decrypt_string:str_vbin_vbin_str
+      - args:
+          - name: "arg0"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg3"
+            value: string
+            optional: false
+            keyword_only: false
+        return: string
+  - name: "aead.envelope_encrypt"
+    description: "GoogleSQL scalar function aead.envelope_encrypt"
+    impls:
+      # Signature: aead.envelope_encrypt:str_vbin_str_str
+      - args:
+          - name: "arg0"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg3"
+            value: string
+            optional: false
+            keyword_only: false
+        return: binary
+      # Signature: aead.envelope_encrypt:str_vbin_vbin_vbin
+      - args:
+          - name: "arg0"
+            value: string
+            optional: false
+            keyword_only: false
+          - name: "arg1"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg2"
+            value: binary
+            optional: false
+            keyword_only: false
+          - name: "arg3"
+            value: binary
+            optional: false
+            keyword_only: false
+        return: binary

From 4c94ec3e1d81dee1f7cb45ac3c2ccdf7f900e590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Mon, 18 May 2026 16:13:34 +0000
Subject: [PATCH 02/20] update function definitions to match
 https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/aead_encryption_functions

---
 .../scripts/data/sql-functions/aead.yaml      | 139 ++++--------------
 1 file changed, 27 insertions(+), 112 deletions(-)

diff --git a/packages/bigframes/scripts/data/sql-functions/aead.yaml b/packages/bigframes/scripts/data/sql-functions/aead.yaml
index ba5dc1fb3af9..6c289a96e886 100644
--- a/packages/bigframes/scripts/data/sql-functions/aead.yaml
+++ b/packages/bigframes/scripts/data/sql-functions/aead.yaml
@@ -1,215 +1,130 @@
 urn: extension:google:bq_scalar_functions
 scalar_functions:
   - name: "aead.decrypt_bytes"
-    description: "GoogleSQL scalar function aead.decrypt_bytes"
+    description: "Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails."
     impls:
       # Signature: aead.decrypt_bytes:vbin_vbin_vbin
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "ciphertext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: binary
             optional: false
             keyword_only: false
         return: binary
       # Signature: aead.decrypt_bytes:struct_vbin_vbin
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: struct
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "ciphertext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: binary
             optional: false
             keyword_only: false
         return: binary
   - name: "aead.decrypt_string"
-    description: "GoogleSQL scalar function aead.decrypt_string"
+    description: "Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING."
     impls:
       # Signature: aead.decrypt_string:vbin_vbin_str
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "ciphertext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: string
             optional: false
             keyword_only: false
         return: string
       # Signature: aead.decrypt_string:struct_vbin_str
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: struct
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "ciphertext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: string
             optional: false
             keyword_only: false
         return: string
   - name: "aead.encrypt"
-    description: "GoogleSQL scalar function aead.encrypt"
+    description: "Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL."
     impls:
       # Signature: aead.encrypt:vbin_str_str
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "plaintext"
             value: string
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: string
             optional: false
             keyword_only: false
         return: binary
       # Signature: aead.encrypt:vbin_vbin_vbin
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "plaintext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: binary
             optional: false
             keyword_only: false
         return: binary
       # Signature: aead.encrypt:struct_str_str
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: struct
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "plaintext"
             value: string
             optional: false
             keyword_only: false
-          - name: "arg2"
+          - name: "additional_data"
             value: string
             optional: false
             keyword_only: false
         return: binary
       # Signature: aead.encrypt:struct_vbin_vbin
       - args:
-          - name: "arg0"
+          - name: "keyset"
             value: struct
             optional: false
             keyword_only: false
-          - name: "arg1"
+          - name: "plaintext"
             value: binary
             optional: false
             keyword_only: false
-          - name: "arg2"
-            value: binary
-            optional: false
-            keyword_only: false
-        return: binary
-  - name: "aead.envelope_decrypt_bytes"
-    description: "GoogleSQL scalar function aead.envelope_decrypt_bytes"
-    impls:
-      # Signature: aead.envelope_decrypt_bytes:str_vbin_vbin_vbin
-      - args:
-          - name: "arg0"
-            value: string
-            optional: false
-            keyword_only: false
-          - name: "arg1"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg2"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg3"
-            value: binary
-            optional: false
-            keyword_only: false
-        return: binary
-  - name: "aead.envelope_decrypt_string"
-    description: "GoogleSQL scalar function aead.envelope_decrypt_string"
-    impls:
-      # Signature: aead.envelope_decrypt_string:str_vbin_vbin_str
-      - args:
-          - name: "arg0"
-            value: string
-            optional: false
-            keyword_only: false
-          - name: "arg1"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg2"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg3"
-            value: string
-            optional: false
-            keyword_only: false
-        return: string
-  - name: "aead.envelope_encrypt"
-    description: "GoogleSQL scalar function aead.envelope_encrypt"
-    impls:
-      # Signature: aead.envelope_encrypt:str_vbin_str_str
-      - args:
-          - name: "arg0"
-            value: string
-            optional: false
-            keyword_only: false
-          - name: "arg1"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg2"
-            value: string
-            optional: false
-            keyword_only: false
-          - name: "arg3"
-            value: string
-            optional: false
-            keyword_only: false
-        return: binary
-      # Signature: aead.envelope_encrypt:str_vbin_vbin_vbin
-      - args:
-          - name: "arg0"
-            value: string
-            optional: false
-            keyword_only: false
-          - name: "arg1"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg2"
-            value: binary
-            optional: false
-            keyword_only: false
-          - name: "arg3"
+          - name: "additional_data"
             value: binary
             optional: false
             keyword_only: false

From c6b94fa2aaedae84d82d7f433acedf4e65b9d9e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Mon, 18 May 2026 19:15:38 +0000
Subject: [PATCH 03/20] create a spec for code generation

---
 .pre-commit-config.yaml                       |  6 +-
 .../bigframes/bigframes/core/sentinels.py     | 33 ++++++++++
 .../specs/bigframes-bigquery-generator.md     | 62 +++++++++++++++++++
 3 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 packages/bigframes/bigframes/core/sentinels.py
 create mode 100644 packages/bigframes/specs/bigframes-bigquery-generator.md

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 427359befa73..15b074bf647e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,16 +16,16 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v6.0.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
     -   id: check-yaml
 -   repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 23.7.0
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/flake8
-    rev: 3.9.2 # version-scanner: ignore
+    rev: 6.1.0 # version-scanner: ignore
     hooks:
     - id: flake8
diff --git a/packages/bigframes/bigframes/core/sentinels.py b/packages/bigframes/bigframes/core/sentinels.py
new file mode 100644
index 000000000000..fc2bfac970e5
--- /dev/null
+++ b/packages/bigframes/bigframes/core/sentinels.py
@@ -0,0 +1,33 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Sentinel values used throughout BigFrames."""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class Default(Enum):
+    """Default values used throughout BigFrames.
+
+    When a parameter is set to this, that parameter is explicitly omitted
+    from the SQL text. This allows for NULL (None in Python) to be explicitly
+    passed in to optional parameters.
+    """
+
+    token = 0
+
+
+DEFAULT = Default.token
diff --git a/packages/bigframes/specs/bigframes-bigquery-generator.md b/packages/bigframes/specs/bigframes-bigquery-generator.md
new file mode 100644
index 000000000000..75ad578f844c
--- /dev/null
+++ b/packages/bigframes/specs/bigframes-bigquery-generator.md
@@ -0,0 +1,62 @@
+# Code generation for bigframes.bigquery
+
+This document describes code generation for the `bigframes.bigquery` modules.
+For detailed specifications on input and output types, refer to
+[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md).
+
+## Overview
+
+The script at `packages/bigframes/scripts/generate_bigframes_bigquery.py`
+generates python submodules for the `bigframes.bigquery` module. When run
+without any arguments, it iterates through all yaml files at
+`packages/bigframes/scripts/data/sql-functions/**/*.yaml` to generate the code.
+
+The script at `packages/bigframes/scripts/check_bigframes_bigquery.py` iterates
+through all the same yaml files and checks that the functions have been included
+in the `bigframes.bigquery` module, as the `__init__.py` file requires manual
+updates.
+
+## Generated code organization
+
+The `generate_bigframes_bigquery.py` script generates submodules of
+`bigframes.bigquery._operations`, with the full path reflecting the organization
+of the YAML files. For example, a YAML file at
+`packages/bigframes/scripts/data/sql-functions/aead.yaml` corresponds to a
+generated Python module at `bigframes.bigquery._operations.aead`. Likewise,
+`packages/bigframes/scripts/data/sql-functions/builtins/bit.yaml` corresponds
+to the `bigframes.bigquery._operations.builtins.bit` submodule.
+
+## Generated module implementation
+
+Each generated module has all functions defined in the YAML file converted to
+the equivalent Python definition, including keyword arguments and docstrings.
+
+### Handling optional arguments
+
+When the user calls a Python function without specifying the optional
+argument, that argument is omitted from the SQL text. To allow for explicit
+NULL values to be passed in (None in Python), the default value is specified
+to be a default sentinel value enum `bigframes.core.sentinels.DEFAULT`. For
+example:
+
+```python
+import bigframes.core.sentinels
+
+def current_date(
+    time_zone_expression: str | bigframes.core.sentinels.Default = bigframes.core.sentinels.DEFAULT,
+):
+    ...
+```
+
+### Input and output types
+
+Refer to the table in
+[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md).
+
+### Internal bigframes operator
+
+Scalar functions should generate an expression using the `GoogleSqlScalarOp`.
+This keeps the implementation as scalar SQL functions consistent.
+
+Aggregate, analytic, and table-valued functions currently require custom ops. As
+such, those functions are currently out of scope for this generator.

From 7cb7e76dd4d81bb89d2db68e1438da59f7a55a06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Mon, 18 May 2026 19:32:01 +0000
Subject: [PATCH 04/20] use uv for script reproducibility

---
 .../specs/bigframes-bigquery-generator.md     | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/packages/bigframes/specs/bigframes-bigquery-generator.md b/packages/bigframes/specs/bigframes-bigquery-generator.md
index 75ad578f844c..26d1d45c9f51 100644
--- a/packages/bigframes/specs/bigframes-bigquery-generator.md
+++ b/packages/bigframes/specs/bigframes-bigquery-generator.md
@@ -16,6 +16,40 @@ through all the same yaml files and checks that the functions have been included
 in the `bigframes.bigquery` module, as the `__init__.py` file requires manual
 updates.
 
+## Running the generator
+
+Since the dependencies for the script differ from that of bigframes
+and its test suite, use the self-contained Python script technique described at
+https://docs.astral.sh/uv/guides/scripts/
+to automatically manage dependencies using `uv`. Therefore, the header of the
+script will look something like:
+
+```python
+#!/usr/bin/env -S uv run --script
+#
+# /// script
+# dependencies = [
+#   "jinja2",
+#   "pyyaml",
+# ]
+# ///
+#
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# ...
+```
+
+To run the script:
+
+```bash
+cd packages/bigframes
+uv run scripts/generate_bigframes_bigquery.py
+```
+
+To improve reproducibility, we also check in the uv lock file generated by
+running `uv lock --script scripts/generate_bigframes_bigquery.py`.
+
 ## Generated code organization
 
 The `generate_bigframes_bigquery.py` script generates submodules of
@@ -31,6 +65,11 @@ to the `bigframes.bigquery._operations.builtins.bit` submodule.
 Each generated module has all functions defined in the YAML file converted to
 the equivalent Python definition, including keyword arguments and docstrings.
 
+### Code generation
+
+The code will be templated using the jinja2 template engine. This allows
+proposed changes to the templated code to be reviewed more easily.
+
 ### Handling optional arguments
 
 When the user calls a Python function without specifying the optional

From d2f1e985b5802fec5e57468dea023e34df127302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Mon, 18 May 2026 20:00:28 +0000
Subject: [PATCH 05/20] first pass at code generation

---
 .../bigframes/bigquery/_operations/aead.py    | 138 ++++++++
 .../scripts/generate_bigframes_bigquery.py    | 301 ++++++++++++++++++
 .../generate_bigframes_bigquery.py.lock       |  77 +++++
 3 files changed, 516 insertions(+)
 create mode 100644 packages/bigframes/bigframes/bigquery/_operations/aead.py
 create mode 100755 packages/bigframes/scripts/generate_bigframes_bigquery.py
 create mode 100644 packages/bigframes/scripts/generate_bigframes_bigquery.py.lock

diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py
new file mode 100644
index 000000000000..1bde8d748638
--- /dev/null
+++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py
@@ -0,0 +1,138 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: scripts/data/sql-functions/aead.yaml
+# by the script: scripts/generate_bigframes_bigquery.py
+
+from __future__ import annotations
+
+import datetime
+from typing import Any, Optional, TypeVar, Union
+
+import bigframes.core.col
+import bigframes.core.expression as ex
+import bigframes.core.sentinels as sentinels
+import bigframes.operations as ops
+import bigframes.series as series
+from bigframes import dtypes
+from bigframes.operations import googlesql
+
+T = TypeVar("T", series.Series, bigframes.core.col.Expression)
+
+_DECRYPT_BYTES_OP = googlesql.GoogleSqlScalarOp(
+    "AEAD.DECRYPT_BYTES",
+    args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()),
+    signature=lambda *args: dtypes.BYTES_DTYPE,
+)
+_DECRYPT_STRING_OP = googlesql.GoogleSqlScalarOp(
+    "AEAD.DECRYPT_STRING",
+    args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()),
+    signature=lambda *args: dtypes.STRING_DTYPE,
+)
+_ENCRYPT_OP = googlesql.GoogleSqlScalarOp(
+    "AEAD.ENCRYPT",
+    args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()),
+    signature=lambda *args: dtypes.BYTES_DTYPE,
+)
+
+
+def _apply_googlesql_op(
+    op: googlesql.GoogleSqlScalarOp,
+    *args: Any,
+) -> Union[series.Series, bigframes.core.col.Expression]:
+    """Applies a GoogleSQL scalar operator to the given arguments.
+
+    Handles a mix of Series, Expression, and literal inputs.
+    """
+    # Find the first Series to use for alignment
+    first_series = None
+    for arg in args:
+        if isinstance(arg, series.Series):
+            first_series = arg
+            break
+
+    if first_series is not None:
+        processed_args = []
+        block = first_series._block
+        for arg in args:
+            if isinstance(arg, bigframes.core.col.Expression):
+                # Project expression onto the block
+                block, col_id = block.project_expr(arg._expr)
+                processed_args.append(series.Series(block.select_column(col_id)))
+            elif arg is sentinels.DEFAULT:
+                # OmittedArg is handled by GoogleSqlScalarOp in compiler
+                processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
+            else:
+                processed_args.append(arg)
+
+        # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals.
+        result = first_series._apply_nary_op(op, processed_args, ignore_self=True)
+        result.name = None
+        return result
+
+    # No Series, return an Expression
+    expr_args = []
+    for arg in args:
+        if isinstance(arg, bigframes.core.col.Expression):
+            expr_args.append(arg._expr)
+        elif arg is sentinels.DEFAULT:
+            expr_args.append(ex.OmittedArg())
+        else:
+            expr_args.append(ex.const(arg))
+
+    return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args)))
+
+
+def decrypt_bytes(
+    keyset: Union[T, Union[bytes, dict]],
+    ciphertext: Union[T, bytes],
+    additional_data: Union[T, bytes],
+) -> T:
+    """Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails."""
+    return _apply_googlesql_op(
+        _DECRYPT_BYTES_OP,
+        keyset,
+        ciphertext,
+        additional_data,
+    )  # type: ignore
+
+
+def decrypt_string(
+    keyset: Union[T, Union[bytes, dict]],
+    ciphertext: Union[T, bytes],
+    additional_data: Union[T, str],
+) -> T:
+    """Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING."""
+    return _apply_googlesql_op(
+        _DECRYPT_STRING_OP,
+        keyset,
+        ciphertext,
+        additional_data,
+    )  # type: ignore
+
+
+def encrypt(
+    keyset: Union[T, Union[bytes, dict]],
+    plaintext: Union[T, Union[bytes, str]],
+    additional_data: Union[T, Union[bytes, str]],
+) -> T:
+    """Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL."""
+    return _apply_googlesql_op(
+        _ENCRYPT_OP,
+        keyset,
+        plaintext,
+        additional_data,
+    )  # type: ignore
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
new file mode 100755
index 000000000000..b15aace5f17c
--- /dev/null
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env -S uv run --script
+#
+# /// script
+# dependencies = [
+#   "jinja2",
+#   "pyyaml",
+# ]
+# ///
+#
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+import re
+import yaml
+import jinja2
+
+# Directory containing the YAML files
+DATA_DIR = pathlib.Path("scripts/data/sql-functions")
+# Directory where the generated Python files will be placed
+OUTPUT_DIR = pathlib.Path("bigframes/bigquery/_operations")
+
+LICENSE_HEADER = """# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+TEMPLATE = """{{ license_header }}
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: {{ yaml_path }}
+# by the script: {{ script_path }}
+
+from __future__ import annotations
+
+import datetime
+from typing import Any, Optional, TypeVar, Union
+
+import bigframes.core.col
+import bigframes.core.expression as ex
+import bigframes.core.sentinels as sentinels
+import bigframes.operations as ops
+import bigframes.series as series
+from bigframes import dtypes
+from bigframes.operations import googlesql
+
+T = TypeVar("T", series.Series, bigframes.core.col.Expression)
+
+{% for op in ops %}
+{{ op.internal_name }} = googlesql.GoogleSqlScalarOp(
+    "{{ op.sql_name }}",
+    args=({{ op.arg_specs }}),
+    signature={{ op.signature }},
+)
+{% endfor %}
+
+def _apply_googlesql_op(
+    op: googlesql.GoogleSqlScalarOp,
+    *args: Any,
+) -> Union[series.Series, bigframes.core.col.Expression]:
+    \"\"\"Applies a GoogleSQL scalar operator to the given arguments.
+
+    Handles a mix of Series, Expression, and literal inputs.
+    \"\"\"
+    # Find the first Series to use for alignment
+    first_series = None
+    for arg in args:
+        if isinstance(arg, series.Series):
+            first_series = arg
+            break
+
+    if first_series is not None:
+        processed_args = []
+        block = first_series._block
+        for arg in args:
+            if isinstance(arg, bigframes.core.col.Expression):
+                # Project expression onto the block
+                block, col_id = block.project_expr(arg._expr)
+                processed_args.append(series.Series(block.select_column(col_id)))
+            elif arg is sentinels.DEFAULT:
+                # OmittedArg is handled by GoogleSqlScalarOp in compiler
+                processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
+            else:
+                processed_args.append(arg)
+
+        # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals.
+        result = first_series._apply_nary_op(op, processed_args, ignore_self=True)
+        result.name = None
+        return result
+
+    # No Series, return an Expression
+    expr_args = []
+    for arg in args:
+        if isinstance(arg, bigframes.core.col.Expression):
+            expr_args.append(arg._expr)
+        elif arg is sentinels.DEFAULT:
+            expr_args.append(ex.OmittedArg())
+        else:
+            expr_args.append(ex.const(arg))
+
+    return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args)))
+
+{% for func in functions %}
+def {{ func.name }}(
+{% for arg in func.args %}
+    {{ arg.name }}: Union[T, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
+{% endfor %}
+) -> T:
+    \"\"\"{{ func.description }}\"\"\"
+    return _apply_googlesql_op(
+        {{ func.op_name }},
+{% for arg in func.args %}
+        {{ arg.name }},
+{% endfor %}
+    )  # type: ignore
+
+{% endfor %}
+"""
+
+DTYPE_MAP = {
+    "binary": "dtypes.BYTES_DTYPE",
+    "string": "dtypes.STRING_DTYPE",
+    "int64": "dtypes.INT_DTYPE",
+    "float64": "dtypes.FLOAT_DTYPE",
+    "bool": "dtypes.BOOL_DTYPE",
+    "geography": "dtypes.GEO_DTYPE",
+    "json": "dtypes.JSON_DTYPE",
+    "date": "dtypes.DATE_DTYPE",
+    "time": "dtypes.TIME_DTYPE",
+    "datetime": "dtypes.DATETIME_DTYPE",
+    "timestamp": "dtypes.TIMESTAMP_DTYPE",
+}
+
+PY_TYPE_MAP = {
+    "binary": "bytes",
+    "string": "str",
+    "int64": "int",
+    "float64": "float",
+    "bool": "bool",
+    "geography": "Any",
+    "json": "Any",
+    "date": "datetime.date",
+    "time": "datetime.time",
+    "datetime": "datetime.datetime",
+    "timestamp": "datetime.datetime",
+    "struct": "dict",
+}
+
+
+def to_snake_case(name):
+    # Replace dots with underscores
+    name = name.replace(".", "_")
+    # Handle CamelCase to snake_case
+    name = re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
+    # Replace multiple underscores with one
+    name = re.sub(r"_+", "_", name)
+    return name
+
+
+def main():
+    env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)
+    template = env.from_string(TEMPLATE)
+
+    for yaml_file in DATA_DIR.glob("**/*.yaml"):
+        print(f"Processing {yaml_file}...")
+        with open(yaml_file, "r") as f:
+            data = yaml.safe_load(f)
+
+        rel_path = yaml_file.relative_to(DATA_DIR)
+        module_path = rel_path.with_suffix("")
+        module_name = module_path.name
+        output_file = OUTPUT_DIR.joinpath(module_path).with_suffix(".py")
+
+        ops_list = []
+        functions_list = []
+
+        if "scalar_functions" in data:
+            for func_data in data["scalar_functions"]:
+                sql_name = func_data["name"]
+                python_name = to_snake_case(sql_name)
+                if python_name.startswith(module_name + "_"):
+                    python_name = python_name[len(module_name) + 1 :]
+
+                internal_op_name = f"_{python_name.upper()}_OP"
+
+                # Aggregate args across impls
+                args_by_name = {}
+                arg_order = []
+                for impl in func_data["impls"]:
+                    for arg in impl["args"]:
+                        name = arg["name"]
+                        if name not in args_by_name:
+                            args_by_name[name] = {
+                                "types": set(),
+                                "optional": arg["optional"],
+                                "keyword_only": arg["keyword_only"],
+                            }
+                            arg_order.append(name)
+                        args_by_name[name]["types"].add(arg["value"])
+
+                # Build ArgSpecs
+                arg_specs = []
+                for name in arg_order:
+                    arg_info = args_by_name[name]
+                    spec = "googlesql.ArgSpec("
+                    if arg_info["keyword_only"]:
+                        spec += f'arg_name="{name}", '
+                    if arg_info["optional"]:
+                        spec += "optional=True, "
+                    spec = spec.rstrip(", ") + ")"
+                    arg_specs.append(spec)
+
+                # Determine return dtype
+                return_types = {impl["return"] for impl in func_data["impls"]}
+                if len(return_types) == 1:
+                    ret_type = list(return_types)[0]
+                    signature = f"lambda *args: {DTYPE_MAP.get(ret_type, 'None')}"
+                else:
+                    # Fallback to Any/None if ambiguous
+                    signature = "lambda *args: None"
+
+                ops_list.append(
+                    {
+                        "internal_name": internal_op_name,
+                        "sql_name": sql_name.upper(),
+                        "arg_specs": ", ".join(arg_specs),
+                        "signature": signature,
+                    }
+                )
+
+                # Function args
+                func_args = []
+                for name in arg_order:
+                    arg_info = args_by_name[name]
+                    types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]]
+                    type_hint = (
+                        "Union[" + ", ".join(sorted(set(types))) + "]"
+                        if len(types) > 1
+                        else types[0]
+                    )
+                    default = "sentinels.DEFAULT" if arg_info["optional"] else ""
+                    func_args.append(
+                        {
+                            "name": name,
+                            "type_hint": type_hint,
+                            "default": default,
+                        }
+                    )
+
+                # Clean up default values for mandatory args
+                # In Python, mandatory args come first.
+                for arg in func_args:
+                    if not arg["default"]:
+                        del arg["default"]
+
+                functions_list.append(
+                    {
+                        "name": python_name,
+                        "op_name": internal_op_name,
+                        "description": func_data["description"],
+                        "args": func_args,
+                    }
+                )
+
+        # Render and write
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        content = template.render(
+            license_header=LICENSE_HEADER,
+            yaml_path=str(yaml_file),
+            script_path="scripts/generate_bigframes_bigquery.py",
+            ops=ops_list,
+            functions=functions_list,
+        )
+        with open(output_file, "w") as f:
+            f.write(content)
+        print(f"  Generated {output_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
new file mode 100644
index 000000000000..0d28e42101bd
--- /dev/null
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
@@ -0,0 +1,77 @@
+version = 1
+revision = 3
+requires-python = ">=3.14"
+
+[manifest]
+requirements = [
+    { name = "jinja2" },
+    { name = "pyyaml" },
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
+    { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
+    { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" },
+    { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
+    { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
+    { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" },
+    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]

From 3a06e11d4ae9b8449def1f6173fe0a38f55a8156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 15:20:09 +0000
Subject: [PATCH 06/20] manual edits to generator

---
 .pre-commit-config.yaml                       |  1 +
 .../bigframes/bigquery/_googlesql.py          | 81 ++++++++++++++++++
 .../bigframes/bigquery/_operations/aead.py    | 82 ++++---------------
 packages/bigframes/bigframes/dtypes.py        |  5 +-
 .../scripts/generate_bigframes_bigquery.py    | 54 ++----------
 5 files changed, 106 insertions(+), 117 deletions(-)
 create mode 100644 packages/bigframes/bigframes/bigquery/_googlesql.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 15b074bf647e..101c6bdc13d1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,3 +29,4 @@ repos:
     rev: 6.1.0 # version-scanner: ignore
     hooks:
     - id: flake8
+      args: [--config, packages/google-cloud-alloydb/.flake8]
diff --git a/packages/bigframes/bigframes/bigquery/_googlesql.py b/packages/bigframes/bigframes/bigquery/_googlesql.py
new file mode 100644
index 000000000000..ebf0bb81dc7e
--- /dev/null
+++ b/packages/bigframes/bigframes/bigquery/_googlesql.py
@@ -0,0 +1,81 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for working with GoogleSqlScalarOps."""
+
+from __future__ import annotations
+
+from typing import Any, Union
+
+import bigframes.core.col
+import bigframes.core.expression as ex
+import bigframes.core.sentinels as sentinels
+from bigframes.operations import googlesql
+import bigframes.series as series
+
+
+def apply_googlesql_scalar_op(
+    op: googlesql.GoogleSqlScalarOp,
+    *args: Any,
+) -> Union[series.Series, bigframes.core.col.Expression]:
+    """Applies a GoogleSQL scalar operator to the given arguments.
+
+    Handles a mix of Series, Expression, and literal inputs.
+
+    Args:
+        op (googlesql.GoogleSqlScalarOp):
+            The operator to apply.
+        *args (Any):
+            The arguments to apply the operator to.
+
+    Returns:
+        bigframes.pandas.Series | bigframes.core.col.Expression:
+            The result of the operation. If any of ``args`` is a Series, returns
+            a Series. Otherwise, returns an Expression.
+    """
+    # Find the first Series to use for alignment
+    first_series = None
+    for arg in args:
+        if isinstance(arg, series.Series):
+            first_series = arg
+            break
+
+    if first_series is not None:
+        processed_args = []
+        block = first_series._block
+        for arg in args:
+            if isinstance(arg, bigframes.core.col.Expression):
+                block, col_id = block.project_expr(bigframes.core.col._as_bf_expr(arg))
+                processed_args.append(series.Series(block.select_column(col_id)))
+            elif arg is sentinels.DEFAULT:
+                processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
+            else:
+                processed_args.append(arg)
+
+        # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals.
+        result = first_series._apply_nary_op(op, processed_args, ignore_self=True)
+        result.name = None
+        return result
+
+    # No Series, return an Expression
+    expr_args = []
+    for arg in args:
+        if isinstance(arg, bigframes.core.col.Expression):
+            expr_args.append(bigframes.core.col._as_bf_expr(arg))
+        elif arg is sentinels.DEFAULT:
+            expr_args.append(ex.OmittedArg())
+        else:
+            expr_args.append(ex.const(arg))
+
+    return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args)))
diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py
index 1bde8d748638..cbc995b1d5d7 100644
--- a/packages/bigframes/bigframes/bigquery/_operations/aead.py
+++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py
@@ -19,16 +19,13 @@
 
 from __future__ import annotations
 
-import datetime
-from typing import Any, Optional, TypeVar, Union
+from typing import TypeVar, Union
 
-import bigframes.core.col
-import bigframes.core.expression as ex
-import bigframes.core.sentinels as sentinels
-import bigframes.operations as ops
-import bigframes.series as series
 from bigframes import dtypes
+import bigframes.bigquery._googlesql
+import bigframes.core.col
 from bigframes.operations import googlesql
+import bigframes.series as series
 
 T = TypeVar("T", series.Series, bigframes.core.col.Expression)
 
@@ -49,60 +46,13 @@
 )
 
 
-def _apply_googlesql_op(
-    op: googlesql.GoogleSqlScalarOp,
-    *args: Any,
-) -> Union[series.Series, bigframes.core.col.Expression]:
-    """Applies a GoogleSQL scalar operator to the given arguments.
-
-    Handles a mix of Series, Expression, and literal inputs.
-    """
-    # Find the first Series to use for alignment
-    first_series = None
-    for arg in args:
-        if isinstance(arg, series.Series):
-            first_series = arg
-            break
-
-    if first_series is not None:
-        processed_args = []
-        block = first_series._block
-        for arg in args:
-            if isinstance(arg, bigframes.core.col.Expression):
-                # Project expression onto the block
-                block, col_id = block.project_expr(arg._expr)
-                processed_args.append(series.Series(block.select_column(col_id)))
-            elif arg is sentinels.DEFAULT:
-                # OmittedArg is handled by GoogleSqlScalarOp in compiler
-                processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
-            else:
-                processed_args.append(arg)
-
-        # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals.
-        result = first_series._apply_nary_op(op, processed_args, ignore_self=True)
-        result.name = None
-        return result
-
-    # No Series, return an Expression
-    expr_args = []
-    for arg in args:
-        if isinstance(arg, bigframes.core.col.Expression):
-            expr_args.append(arg._expr)
-        elif arg is sentinels.DEFAULT:
-            expr_args.append(ex.OmittedArg())
-        else:
-            expr_args.append(ex.const(arg))
-
-    return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args)))
-
-
 def decrypt_bytes(
-    keyset: Union[T, Union[bytes, dict]],
-    ciphertext: Union[T, bytes],
-    additional_data: Union[T, bytes],
+    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
+    ciphertext: Union[T, bigframes.core.col.Expression, bytes],
+    additional_data: Union[T, bigframes.core.col.Expression, bytes],
 ) -> T:
     """Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails."""
-    return _apply_googlesql_op(
+    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
         _DECRYPT_BYTES_OP,
         keyset,
         ciphertext,
@@ -111,12 +61,12 @@ def decrypt_bytes(
 
 
 def decrypt_string(
-    keyset: Union[T, Union[bytes, dict]],
-    ciphertext: Union[T, bytes],
-    additional_data: Union[T, str],
+    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
+    ciphertext: Union[T, bigframes.core.col.Expression, bytes],
+    additional_data: Union[T, bigframes.core.col.Expression, str],
 ) -> T:
     """Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING."""
-    return _apply_googlesql_op(
+    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
         _DECRYPT_STRING_OP,
         keyset,
         ciphertext,
@@ -125,12 +75,12 @@ def decrypt_string(
 
 
 def encrypt(
-    keyset: Union[T, Union[bytes, dict]],
-    plaintext: Union[T, Union[bytes, str]],
-    additional_data: Union[T, Union[bytes, str]],
+    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
+    plaintext: Union[T, bigframes.core.col.Expression, Union[bytes, str]],
+    additional_data: Union[T, bigframes.core.col.Expression, Union[bytes, str]],
 ) -> T:
     """Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL."""
-    return _apply_googlesql_op(
+    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
         _ENCRYPT_OP,
         keyset,
         plaintext,
diff --git a/packages/bigframes/bigframes/dtypes.py b/packages/bigframes/bigframes/dtypes.py
index e7539c59c7d7..95689b91dbd2 100644
--- a/packages/bigframes/bigframes/dtypes.py
+++ b/packages/bigframes/bigframes/dtypes.py
@@ -14,13 +14,13 @@
 
 """Mappings for Pandas dtypes supported by BigQuery DataFrames package"""
 
+from dataclasses import dataclass
 import datetime
 import decimal
 import textwrap
 import typing
-import warnings
-from dataclasses import dataclass
 from typing import Any, Dict, List, Literal, Sequence, Union
+import warnings
 
 import bigframes_vendored.constants as constants
 import db_dtypes  # type: ignore
@@ -39,6 +39,7 @@
     pd.Float64Dtype,
     pd.Int64Dtype,
     pd.StringDtype,
+    pd.StringDtype[Literal["pyarrow"]],
     pd.ArrowDtype,
     gpd.array.GeometryDtype,
 ]
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index b15aace5f17c..5479c040eddd 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -23,8 +23,9 @@
 
 import pathlib
 import re
-import yaml
+
 import jinja2
+import yaml
 
 # Directory containing the YAML files
 DATA_DIR = pathlib.Path("scripts/data/sql-functions")
@@ -64,6 +65,7 @@
 import bigframes.series as series
 from bigframes import dtypes
 from bigframes.operations import googlesql
+import bigframes.bigquery._googlesql
 
 T = TypeVar("T", series.Series, bigframes.core.col.Expression)
 
@@ -75,60 +77,14 @@
 )
 {% endfor %}
 
-def _apply_googlesql_op(
-    op: googlesql.GoogleSqlScalarOp,
-    *args: Any,
-) -> Union[series.Series, bigframes.core.col.Expression]:
-    \"\"\"Applies a GoogleSQL scalar operator to the given arguments.
-
-    Handles a mix of Series, Expression, and literal inputs.
-    \"\"\"
-    # Find the first Series to use for alignment
-    first_series = None
-    for arg in args:
-        if isinstance(arg, series.Series):
-            first_series = arg
-            break
-
-    if first_series is not None:
-        processed_args = []
-        block = first_series._block
-        for arg in args:
-            if isinstance(arg, bigframes.core.col.Expression):
-                # Project expression onto the block
-                block, col_id = block.project_expr(arg._expr)
-                processed_args.append(series.Series(block.select_column(col_id)))
-            elif arg is sentinels.DEFAULT:
-                # OmittedArg is handled by GoogleSqlScalarOp in compiler
-                processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
-            else:
-                processed_args.append(arg)
-
-        # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals.
-        result = first_series._apply_nary_op(op, processed_args, ignore_self=True)
-        result.name = None
-        return result
-
-    # No Series, return an Expression
-    expr_args = []
-    for arg in args:
-        if isinstance(arg, bigframes.core.col.Expression):
-            expr_args.append(arg._expr)
-        elif arg is sentinels.DEFAULT:
-            expr_args.append(ex.OmittedArg())
-        else:
-            expr_args.append(ex.const(arg))
-
-    return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args)))
-
 {% for func in functions %}
 def {{ func.name }}(
 {% for arg in func.args %}
-    {{ arg.name }}: Union[T, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
+    {{ arg.name }}: Union[T, bigframes.core.col.Expression, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
 {% endfor %}
 ) -> T:
     \"\"\"{{ func.description }}\"\"\"
-    return _apply_googlesql_op(
+    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
         {{ func.op_name }},
 {% for arg in func.args %}
         {{ arg.name }},

From 47c47af85bd1ac487d407f54a7b06c1276743864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 15:35:13 +0000
Subject: [PATCH 07/20] remove imports after generation

---
 .../scripts/generate_bigframes_bigquery.py    | 22 ++++++++++++++-----
 .../generate_bigframes_bigquery.py.lock       | 22 +++++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 5479c040eddd..7f0f54aec87d 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -2,6 +2,7 @@
 #
 # /// script
 # dependencies = [
+#   "autoflake",
 #   "jinja2",
 #   "pyyaml",
 # ]
@@ -23,6 +24,7 @@
 
 import pathlib
 import re
+import subprocess
 
 import jinja2
 import yaml
@@ -58,14 +60,14 @@
 import datetime
 from typing import Any, Optional, TypeVar, Union
 
+from bigframes import dtypes
+import bigframes.bigquery._googlesql
 import bigframes.core.col
 import bigframes.core.expression as ex
 import bigframes.core.sentinels as sentinels
+from bigframes.operations import googlesql
 import bigframes.operations as ops
 import bigframes.series as series
-from bigframes import dtypes
-from bigframes.operations import googlesql
-import bigframes.bigquery._googlesql
 
 T = TypeVar("T", series.Series, bigframes.core.col.Expression)
 
@@ -76,8 +78,9 @@
     signature={{ op.signature }},
 )
 {% endfor %}
-
 {% for func in functions %}
+
+
 def {{ func.name }}(
 {% for arg in func.args %}
     {{ arg.name }}: Union[T, bigframes.core.col.Expression, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
@@ -90,7 +93,6 @@ def {{ func.name }}(
         {{ arg.name }},
 {% endfor %}
     )  # type: ignore
-
 {% endfor %}
 """
 
@@ -250,6 +252,16 @@ def main():
         )
         with open(output_file, "w") as f:
             f.write(content)
+
+        subprocess.run(
+            [
+                "autoflake",
+                "--in-place",
+                "--remove-all-unused-imports",
+                str(output_file),
+            ],
+            check=True,
+        )
         print(f"  Generated {output_file}")
 
 
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
index 0d28e42101bd..3cba9097522d 100644
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
@@ -4,10 +4,23 @@ requires-python = ">=3.14"
 
 [manifest]
 requirements = [
+    { name = "autoflake" },
     { name = "jinja2" },
     { name = "pyyaml" },
 ]
 
+[[package]]
+name = "autoflake"
+version = "2.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyflakes" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/0b/70c277eef225133763bf05c02c88df182e57d5c5c0730d3998958096a82e/autoflake-2.3.3.tar.gz", hash = "sha256:c24809541e23999f7a7b0d2faadf15deb0bc04cdde49728a2fd943a0c8055504", size = 16515, upload-time = "2026-02-20T05:01:43.448Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/21/26f1680ec3a598ea31768f9ebcd427e42986d077a005416094b580635532/autoflake-2.3.3-py3-none-any.whl", hash = "sha256:a51a3412aff16135ee5b3ec25922459fef10c1f23ce6d6c4977188df859e8b53", size = 17715, upload-time = "2026-02-20T05:01:42.137Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -50,6 +63,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]
 
+[[package]]
+name = "pyflakes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/dc/fd034dc20b4b264b3d015808458391acbf9df40b1e54750ef175d39180b1/pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58", size = 64669, upload-time = "2025-06-20T18:45:27.834Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/2f/81d580a0fb83baeb066698975cb14a618bdbed7720678566f1b046a95fe8/pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f", size = 63551, upload-time = "2025-06-20T18:45:26.937Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"

From 639cfc7ca69291e54994646f9738e43d4f583b97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 15:52:10 +0000
Subject: [PATCH 08/20] generate tests

---
 packages/bigframes/bigframes/dtypes.py        |   1 -
 .../scripts/generate_bigframes_bigquery.py    |  93 ++++++
 .../unit/bigquery/_operations/conftest.py     | 280 ++++++++++++++++++
 .../test_aead/test_decrypt_bytes/out.sql      |   4 +
 .../test_aead/test_decrypt_string/out.sql     |   4 +
 .../snapshots/test_aead/test_encrypt/out.sql  |   4 +
 .../unit/bigquery/_operations/test_aead.py    |  54 ++++
 7 files changed, 439 insertions(+), 1 deletion(-)
 create mode 100644 packages/bigframes/tests/unit/bigquery/_operations/conftest.py
 create mode 100644 packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
 create mode 100644 packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
 create mode 100644 packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
 create mode 100644 packages/bigframes/tests/unit/bigquery/_operations/test_aead.py

diff --git a/packages/bigframes/bigframes/dtypes.py b/packages/bigframes/bigframes/dtypes.py
index 95689b91dbd2..e2bd9dd601f7 100644
--- a/packages/bigframes/bigframes/dtypes.py
+++ b/packages/bigframes/bigframes/dtypes.py
@@ -39,7 +39,6 @@
     pd.Float64Dtype,
     pd.Int64Dtype,
     pd.StringDtype,
-    pd.StringDtype[Literal["pyarrow"]],
     pd.ArrowDtype,
     gpd.array.GeometryDtype,
 ]
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 7f0f54aec87d..ff5dcb1232cd 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -33,6 +33,8 @@
 DATA_DIR = pathlib.Path("scripts/data/sql-functions")
 # Directory where the generated Python files will be placed
 OUTPUT_DIR = pathlib.Path("bigframes/bigquery/_operations")
+# Directory where the generated test files will be placed
+TEST_OUTPUT_DIR = pathlib.Path("tests/unit/bigquery/_operations")
 
 LICENSE_HEADER = """# Copyright 2026 Google LLC
 #
@@ -96,6 +98,35 @@ def {{ func.name }}(
 {% endfor %}
 """
 
+TEST_TEMPLATE = """{{ license_header }}
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: {{ yaml_path }}
+# by the script: {{ script_path }}
+
+from typing import cast
+
+import pytest
+
+import bigframes.pandas as bpd
+import {{ import_path }} as {{ short_name }}
+
+pytest.importorskip("pytest_snapshot")
+
+
+{% for func in functions %}
+def test_{{ func.name }}(scalar_types_df: bpd.DataFrame, snapshot):
+    result = {{ short_name }}.{{ func.name }}(
+{% for arg in func.test_args %}
+        cast(bpd.Series, scalar_types_df["{{ arg.col_name }}"]),
+{% endfor %}
+    ).to_frame()
+    snapshot.assert_match(result.sql, "out.sql")
+
+
+{% endfor %}
+"""
+
 DTYPE_MAP = {
     "binary": "dtypes.BYTES_DTYPE",
     "string": "dtypes.STRING_DTYPE",
@@ -125,6 +156,19 @@ def {{ func.name }}(
     "struct": "dict",
 }
 
+YAML_TYPE_TO_COL = {
+    "binary": "bytes_col",
+    "string": "string_col",
+    "int64": "int64_col",
+    "float64": "float64_col",
+    "bool": "bool_col",
+    "geography": "geography_col",
+    "date": "date_col",
+    "time": "time_col",
+    "datetime": "datetime_col",
+    "timestamp": "timestamp_col",
+}
+
 
 def to_snake_case(name):
     # Replace dots with underscores
@@ -139,6 +183,7 @@ def to_snake_case(name):
 def main():
     env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)
     template = env.from_string(TEMPLATE)
+    test_template = env.from_string(TEST_TEMPLATE)
 
     for yaml_file in DATA_DIR.glob("**/*.yaml"):
         print(f"Processing {yaml_file}...")
@@ -232,12 +277,21 @@ def main():
                     if not arg["default"]:
                         del arg["default"]
 
+                # Test args
+                test_args = []
+                for name in arg_order:
+                    arg_info = args_by_name[name]
+                    some_type = list(arg_info["types"])[0]
+                    col_name = YAML_TYPE_TO_COL.get(some_type, "string_col")
+                    test_args.append({"col_name": col_name})
+
                 functions_list.append(
                     {
                         "name": python_name,
                         "op_name": internal_op_name,
                         "description": func_data["description"],
                         "args": func_args,
+                        "test_args": test_args,
                     }
                 )
 
@@ -264,6 +318,45 @@ def main():
         )
         print(f"  Generated {output_file}")
 
+        # Render and write test
+        import_path = "bigframes.bigquery._operations." + ".".join(module_path.parts)
+        test_output_file = TEST_OUTPUT_DIR.joinpath(
+            module_path.with_name(f"test_{module_path.name}")
+        ).with_suffix(".py")
+
+        test_output_file.parent.mkdir(parents=True, exist_ok=True)
+        test_content = test_template.render(
+            license_header=LICENSE_HEADER,
+            yaml_path=str(yaml_file),
+            script_path="scripts/generate_bigframes_bigquery.py",
+            import_path=import_path,
+            short_name=module_path.name,
+            functions=functions_list,
+        )
+        with open(test_output_file, "w") as f:
+            f.write(test_content)
+
+        subprocess.run(
+            [
+                "autoflake",
+                "--in-place",
+                "--remove-all-unused-imports",
+                str(test_output_file),
+            ],
+            check=True,
+        )
+        print(f"  Generated {test_output_file}")
+
+        print(f"  Updating snapshots for {test_output_file}...")
+        subprocess.run(
+            [
+                "pytest",
+                str(test_output_file),
+                "--snapshot-update",
+            ],
+            check=False,
+        )
+
 
 if __name__ == "__main__":
     main()
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/conftest.py b/packages/bigframes/tests/unit/bigquery/_operations/conftest.py
new file mode 100644
index 000000000000..127902241acb
--- /dev/null
+++ b/packages/bigframes/tests/unit/bigquery/_operations/conftest.py
@@ -0,0 +1,280 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+import typing
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+from google.cloud import bigquery
+
+import bigframes.core as core
+import bigframes.pandas as bpd
+import bigframes.testing.mocks as mocks
+import bigframes.testing.utils
+from bigframes import dtypes
+
+CURRENT_DIR = pathlib.Path(__file__).parent
+DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data"
+
+
+def _create_compiler_session(table_name, table_schema):
+    """Helper function to create a compiler session."""
+    from bigframes.testing import compiler_session
+
+    anonymous_dataset = bigquery.DatasetReference.from_string(
+        "bigframes-dev.sqlglot_test"
+    )
+    session = mocks.create_bigquery_session(
+        table_name=table_name,
+        table_schema=table_schema,
+        anonymous_dataset=anonymous_dataset,
+    )
+    session._executor = compiler_session.SQLCompilerExecutor()
+    return session
+
+
+@pytest.fixture(scope="session")
+def compiler_session(scalar_types_table_schema):
+    """Compiler session for scalar types."""
+    return _create_compiler_session("scalar_types", scalar_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_repeated_types(repeated_types_table_schema):
+    """Compiler session for repeated data types."""
+    return _create_compiler_session("repeated_types", repeated_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_nested_structs_types(nested_structs_types_table_schema):
+    """Compiler session for nested STRUCT data types."""
+    return _create_compiler_session(
+        "nested_structs_types", nested_structs_types_table_schema
+    )
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_json_types(json_types_table_schema):
+    """Compiler session for JSON data types."""
+    return _create_compiler_session("json_types", json_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("bool_col", "BOOLEAN"),
+        bigquery.SchemaField("bytes_col", "BYTES"),
+        bigquery.SchemaField("date_col", "DATE"),
+        bigquery.SchemaField("datetime_col", "DATETIME"),
+        bigquery.SchemaField("geography_col", "GEOGRAPHY"),
+        bigquery.SchemaField("int64_col", "INTEGER"),
+        bigquery.SchemaField("int64_too", "INTEGER"),
+        bigquery.SchemaField("numeric_col", "NUMERIC"),
+        bigquery.SchemaField("float64_col", "FLOAT"),
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"),
+        bigquery.SchemaField("string_col", "STRING"),
+        bigquery.SchemaField("time_col", "TIME"),
+        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+        bigquery.SchemaField("duration_col", "INTEGER"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def scalar_types_df(compiler_session) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.scalar_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("rowindex", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def scalar_types_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    # TODO: add tests for empty dataframes
+    df = pd.read_json(
+        DATA_DIR / "scalars.jsonl",
+        lines=True,
+    )
+    bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True)
+
+    df = df.set_index("rowindex", drop=False)
+    return df
+
+
+@pytest.fixture(scope="module")
+def scalar_types_array_value(
+    scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session
+) -> core.ArrayValue:
+    managed_data_source = core.local_data.ManagedArrowTable.from_pandas(
+        scalar_types_pandas_df
+    )
+    return core.ArrayValue.from_managed(managed_data_source, compiler_session)
+
+
+@pytest.fixture(scope="session")
+def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("id", "INTEGER"),
+        bigquery.SchemaField(
+            "people",
+            "RECORD",
+            fields=[
+                bigquery.SchemaField("name", "STRING"),
+                bigquery.SchemaField("age", "INTEGER"),
+                bigquery.SchemaField(
+                    "address",
+                    "RECORD",
+                    fields=[
+                        bigquery.SchemaField("city", "STRING"),
+                        bigquery.SchemaField("country", "STRING"),
+                    ],
+                ),
+            ],
+        ),
+    ]
+
+
+@pytest.fixture(scope="session")
+def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.nested_structs_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("id", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def nested_structs_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing STRUCT types and using the `id`
+    column as the index."""
+
+    df = pd.read_json(
+        DATA_DIR / "nested_structs.jsonl",
+        lines=True,
+    )
+    df = df.set_index("id")
+
+    address_struct_schema = pa.struct(
+        [pa.field("city", pa.string()), pa.field("country", pa.string())]
+    )
+    person_struct_schema = pa.struct(
+        [
+            pa.field("name", pa.string()),
+            pa.field("age", pa.int64()),
+            pa.field("address", address_struct_schema),
+        ]
+    )
+    df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema))
+    return df
+
+
+@pytest.fixture(scope="session")
+def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"),
+        bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"),
+        bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"),
+        bigquery.SchemaField("date_list_col", "DATE", "REPEATED"),
+        bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"),
+        bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"),
+        bigquery.SchemaField("string_list_col", "STRING", "REPEATED"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_repeated_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.repeated_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("rowindex", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def repeated_types_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing LIST types and using the `rowindex`
+    column as the index."""
+
+    df = pd.read_json(
+        DATA_DIR / "repeated.jsonl",
+        lines=True,
+    )
+    # TODO: add dtype conversion here if needed.
+    df = df.set_index("rowindex")
+    return df
+
+
+@pytest.fixture(scope="session")
+def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("json_col", "JSON"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing JSON types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_json_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.json_types",
+        enable_snapshot=False,
+    )
+    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
+    bf_df = bf_df.set_index("rowindex", drop=True)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def json_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing JSON types and using the `rowindex`
+    column as the index."""
+    json_data = [
+        "null",
+        "true",
+        "100",
+        "0.98",
+        '"a string"',
+        "[]",
+        "[1, 2, 3]",
+        '[{"a": 1}, {"a": 2}, {"a": null}, {}]',
+        '"100"',
+        '{"date": "2024-07-16"}',
+        '{"int_value": 2, "null_filed": null}',
+        '{"list_data": [10, 20, 30]}',
+    ]
+    df = pd.DataFrame(
+        {
+            "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE),
+            "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE),
+        },
+    )
+    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
+    df = df.set_index("rowindex", drop=True)
+    return df
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
new file mode 100644
index 000000000000..d74f1fa20eee
--- /dev/null
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
@@ -0,0 +1,4 @@
+SELECT
+  `rowindex`,
+  AEAD.DECRYPT_BYTES(`string_col`, `bytes_col`, `bytes_col`) AS `0`
+FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
new file mode 100644
index 000000000000..1c2b75812b9e
--- /dev/null
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
@@ -0,0 +1,4 @@
+SELECT
+  `rowindex`,
+  AEAD.DECRYPT_STRING(`string_col`, `bytes_col`, `string_col`) AS `0`
+FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
new file mode 100644
index 000000000000..e62f74d8fd9d
--- /dev/null
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
@@ -0,0 +1,4 @@
+SELECT
+  `rowindex`,
+  AEAD.ENCRYPT(`string_col`, `bytes_col`, `bytes_col`) AS `0`
+FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
new file mode 100644
index 000000000000..1b9168687b41
--- /dev/null
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -0,0 +1,54 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: scripts/data/sql-functions/aead.yaml
+# by the script: scripts/generate_bigframes_bigquery.py
+
+from typing import cast
+
+import pytest
+
+import bigframes.pandas as bpd
+import bigframes.bigquery._operations.aead as aead
+
+pytest.importorskip("pytest_snapshot")
+
+
+def test_decrypt_bytes(scalar_types_df: bpd.DataFrame, snapshot):
+    result = aead.decrypt_bytes(
+        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+    ).to_frame()
+    snapshot.assert_match(result.sql, "out.sql")
+
+
+def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
+    result = aead.decrypt_string(
+        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+        cast(bpd.Series, scalar_types_df["string_col"]),
+    ).to_frame()
+    snapshot.assert_match(result.sql, "out.sql")
+
+
+def test_encrypt(scalar_types_df: bpd.DataFrame, snapshot):
+    result = aead.encrypt(
+        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+    ).to_frame()
+    snapshot.assert_match(result.sql, "out.sql")

From e84cbdcdba4e3c8a7d3aa0fba35a112e82eb493e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:03:03 +0000
Subject: [PATCH 09/20] align pre-commit with migration to ruff

---
 .pre-commit-config.yaml                          | 16 +++++++---------
 .../bigframes/bigframes/bigquery/_googlesql.py   |  2 +-
 .../bigframes/bigquery/_operations/aead.py       |  4 ++--
 packages/bigframes/bigframes/dtypes.py           |  4 ++--
 .../tests/unit/bigquery/_operations/test_aead.py |  5 ++---
 5 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 101c6bdc13d1..e0fdf49d917c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,12 +21,10 @@ repos:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
     -   id: check-yaml
--   repo: https://github.com/psf/black
-    rev: 23.7.0
-    hooks:
-    - id: black
--   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0 # version-scanner: ignore
-    hooks:
-    - id: flake8
-      args: [--config, packages/google-cloud-alloydb/.flake8]
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.14.14
+  hooks:
+    # Run the linter.
+    - id: ruff-check
+      args: [ --select, I, --fix, --target-version=py310, --line-length=88 ]
diff --git a/packages/bigframes/bigframes/bigquery/_googlesql.py b/packages/bigframes/bigframes/bigquery/_googlesql.py
index ebf0bb81dc7e..4f15d1f3c277 100644
--- a/packages/bigframes/bigframes/bigquery/_googlesql.py
+++ b/packages/bigframes/bigframes/bigquery/_googlesql.py
@@ -21,8 +21,8 @@
 import bigframes.core.col
 import bigframes.core.expression as ex
 import bigframes.core.sentinels as sentinels
-from bigframes.operations import googlesql
 import bigframes.series as series
+from bigframes.operations import googlesql
 
 
 def apply_googlesql_scalar_op(
diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py
index cbc995b1d5d7..1a7c02ec2e63 100644
--- a/packages/bigframes/bigframes/bigquery/_operations/aead.py
+++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py
@@ -21,11 +21,11 @@
 
 from typing import TypeVar, Union
 
-from bigframes import dtypes
 import bigframes.bigquery._googlesql
 import bigframes.core.col
-from bigframes.operations import googlesql
 import bigframes.series as series
+from bigframes import dtypes
+from bigframes.operations import googlesql
 
 T = TypeVar("T", series.Series, bigframes.core.col.Expression)
 
diff --git a/packages/bigframes/bigframes/dtypes.py b/packages/bigframes/bigframes/dtypes.py
index e2bd9dd601f7..e7539c59c7d7 100644
--- a/packages/bigframes/bigframes/dtypes.py
+++ b/packages/bigframes/bigframes/dtypes.py
@@ -14,13 +14,13 @@
 
 """Mappings for Pandas dtypes supported by BigQuery DataFrames package"""
 
-from dataclasses import dataclass
 import datetime
 import decimal
 import textwrap
 import typing
-from typing import Any, Dict, List, Literal, Sequence, Union
 import warnings
+from dataclasses import dataclass
+from typing import Any, Dict, List, Literal, Sequence, Union
 
 import bigframes_vendored.constants as constants
 import db_dtypes  # type: ignore
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
index 1b9168687b41..95ab84c447d3 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -19,10 +19,9 @@
 
 from typing import cast
 
-import pytest
-
-import bigframes.pandas as bpd
 import bigframes.bigquery._operations.aead as aead
+import bigframes.pandas as bpd
+import pytest
 
 pytest.importorskip("pytest_snapshot")
 

From 8fa1159204860c22a75f14573c03ec24052a5ac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:09:08 +0000
Subject: [PATCH 10/20] sort imports

---
 packages/bigframes/pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/bigframes/pyproject.toml b/packages/bigframes/pyproject.toml
index fed528d4a7a1..fb658d69dafe 100644
--- a/packages/bigframes/pyproject.toml
+++ b/packages/bigframes/pyproject.toml
@@ -1,3 +1,6 @@
 [build-system]
 requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
+
+[tool.ruff.lint.isort]
+known-first-party = ["bigframes", "bigframes_vendored"]

From 8e4b7c551d9b2fe614de5e83eda6a06ffd6ab030 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:10:08 +0000
Subject: [PATCH 11/20] sort imports

---
 .../bigframes/tests/unit/bigquery/_operations/test_aead.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
index 95ab84c447d3..d30474183ae2 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -19,9 +19,10 @@
 
 from typing import cast
 
+import pytest
+
 import bigframes.bigquery._operations.aead as aead
 import bigframes.pandas as bpd
-import pytest
 
 pytest.importorskip("pytest_snapshot")
 

From 1332bc5eb7e2d9769c34387e5dc52d546f82f7bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:14:49 +0000
Subject: [PATCH 12/20] update gen to run ruff

---
 .../bigframes/bigquery/_operations/aead.py    |  6 ++-
 .../scripts/generate_bigframes_bigquery.py    | 22 +++++----
 .../generate_bigframes_bigquery.py.lock       | 49 ++++++++++---------
 .../test_aead/test_decrypt_bytes/out.sql      |  2 +-
 .../test_aead/test_decrypt_string/out.sql     |  2 +-
 .../snapshots/test_aead/test_encrypt/out.sql  |  2 +-
 .../unit/bigquery/_operations/test_aead.py    |  6 +--
 7 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py
index 1a7c02ec2e63..a7b3f9dfae54 100644
--- a/packages/bigframes/bigframes/bigquery/_operations/aead.py
+++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py
@@ -19,10 +19,14 @@
 
 from __future__ import annotations
 
-from typing import TypeVar, Union
+import datetime
+from typing import Any, Optional, TypeVar, Union
 
 import bigframes.bigquery._googlesql
 import bigframes.core.col
+import bigframes.core.expression as ex
+import bigframes.core.sentinels as sentinels
+import bigframes.operations as ops
 import bigframes.series as series
 from bigframes import dtypes
 from bigframes.operations import googlesql
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index ff5dcb1232cd..02914b24dae7 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -2,9 +2,9 @@
 #
 # /// script
 # dependencies = [
-#   "autoflake",
 #   "jinja2",
 #   "pyyaml",
+#   "ruff==0.14.14",
 # ]
 # ///
 #
@@ -36,6 +36,16 @@
 # Directory where the generated test files will be placed
 TEST_OUTPUT_DIR = pathlib.Path("tests/unit/bigquery/_operations")
 
+RUFF_ARGS = [
+                "ruff",
+                "check",
+                "--select",
+                "I",
+                "--fix",
+                "--target-version=py310",
+                "--line-length=88",
+]
+
 LICENSE_HEADER = """# Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -308,10 +318,7 @@ def main():
             f.write(content)
 
         subprocess.run(
-            [
-                "autoflake",
-                "--in-place",
-                "--remove-all-unused-imports",
+            RUFF_ARGS + [
                 str(output_file),
             ],
             check=True,
@@ -337,10 +344,7 @@ def main():
             f.write(test_content)
 
         subprocess.run(
-            [
-                "autoflake",
-                "--in-place",
-                "--remove-all-unused-imports",
+            RUFF_ARGS + [
                 str(test_output_file),
             ],
             check=True,
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
index 3cba9097522d..0c89fde6d406 100644
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock
@@ -4,21 +4,9 @@ requires-python = ">=3.14"
 
 [manifest]
 requirements = [
-    { name = "autoflake" },
     { name = "jinja2" },
     { name = "pyyaml" },
-]
-
-[[package]]
-name = "autoflake"
-version = "2.3.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyflakes" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c3/0b/70c277eef225133763bf05c02c88df182e57d5c5c0730d3998958096a82e/autoflake-2.3.3.tar.gz", hash = "sha256:c24809541e23999f7a7b0d2faadf15deb0bc04cdde49728a2fd943a0c8055504", size = 16515, upload-time = "2026-02-20T05:01:43.448Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/da/21/26f1680ec3a598ea31768f9ebcd427e42986d077a005416094b580635532/autoflake-2.3.3-py3-none-any.whl", hash = "sha256:a51a3412aff16135ee5b3ec25922459fef10c1f23ce6d6c4977188df859e8b53", size = 17715, upload-time = "2026-02-20T05:01:42.137Z" },
+    { name = "ruff", specifier = "==0.14.14" },
 ]
 
 [[package]]
@@ -63,15 +51,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]
 
-[[package]]
-name = "pyflakes"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/45/dc/fd034dc20b4b264b3d015808458391acbf9df40b1e54750ef175d39180b1/pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58", size = 64669, upload-time = "2025-06-20T18:45:27.834Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/2f/81d580a0fb83baeb066698975cb14a618bdbed7720678566f1b046a95fe8/pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f", size = 63551, upload-time = "2025-06-20T18:45:26.937Z" },
-]
-
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -97,3 +76,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
+
+[[package]]
+name = "ruff"
+version = "0.14.14"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" },
+    { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" },
+    { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" },
+]
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
index d74f1fa20eee..5b8b6416b36f 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
-  AEAD.DECRYPT_BYTES(`string_col`, `bytes_col`, `bytes_col`) AS `0`
+  AEAD.DECRYPT_BYTES(`bytes_col`, `bytes_col`, `bytes_col`) AS `0`
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
index 1c2b75812b9e..97b1ccff9c75 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
-  AEAD.DECRYPT_STRING(`string_col`, `bytes_col`, `string_col`) AS `0`
+  AEAD.DECRYPT_STRING(`bytes_col`, `bytes_col`, `string_col`) AS `0`
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
index e62f74d8fd9d..9ab9f8c0a7bb 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
-  AEAD.ENCRYPT(`string_col`, `bytes_col`, `bytes_col`) AS `0`
+  AEAD.ENCRYPT(`bytes_col`, `bytes_col`, `bytes_col`) AS `0`
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
index d30474183ae2..cafcd7d95278 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -29,7 +29,7 @@
 
 def test_decrypt_bytes(scalar_types_df: bpd.DataFrame, snapshot):
     result = aead.decrypt_bytes(
-        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
     ).to_frame()
@@ -38,7 +38,7 @@ def test_decrypt_bytes(scalar_types_df: bpd.DataFrame, snapshot):
 
 def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
     result = aead.decrypt_string(
-        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["string_col"]),
     ).to_frame()
@@ -47,7 +47,7 @@ def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
 
 def test_encrypt(scalar_types_df: bpd.DataFrame, snapshot):
     result = aead.encrypt(
-        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
     ).to_frame()

From 4f3564426ea9462a3a145b8c8e6dc5dc09890776 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:38:33 +0000
Subject: [PATCH 13/20] fix mypy

---
 packages/bigframes/bigframes/bigquery/_googlesql.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/bigframes/bigframes/bigquery/_googlesql.py b/packages/bigframes/bigframes/bigquery/_googlesql.py
index 4f15d1f3c277..5ac8ce826e9b 100644
--- a/packages/bigframes/bigframes/bigquery/_googlesql.py
+++ b/packages/bigframes/bigframes/bigquery/_googlesql.py
@@ -52,7 +52,7 @@ def apply_googlesql_scalar_op(
             break
 
     if first_series is not None:
-        processed_args = []
+        processed_args: list[Union[bigframes.core.col.Expression, series.Series]] = []
         block = first_series._block
         for arg in args:
             if isinstance(arg, bigframes.core.col.Expression):

From c69a7f34681f0f62a9d51a519fd4a905f2921200 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 16:59:19 +0000
Subject: [PATCH 14/20] include namespace in bbq

---
 .../bigframes/bigframes/bigquery/__init__.py  |  3 ++-
 packages/bigframes/bigframes/bigquery/aead.py | 25 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 packages/bigframes/bigframes/bigquery/aead.py

diff --git a/packages/bigframes/bigframes/bigquery/__init__.py b/packages/bigframes/bigframes/bigquery/__init__.py
index a31d7dd83f93..86a45546b748 100644
--- a/packages/bigframes/bigframes/bigquery/__init__.py
+++ b/packages/bigframes/bigframes/bigquery/__init__.py
@@ -47,7 +47,7 @@
 
 import sys
 
-from bigframes.bigquery import ai, ml, obj
+from bigframes.bigquery import aead, ai, ml, obj
 from bigframes.bigquery._operations.approx_agg import approx_top_count
 from bigframes.bigquery._operations.array import (
     array_agg,
@@ -208,6 +208,7 @@
     # io ops
     "load_data",
     # Modules / SQL namespaces
+    "aead",
     "ai",
     "ml",
     "obj",
diff --git a/packages/bigframes/bigframes/bigquery/aead.py b/packages/bigframes/bigframes/bigquery/aead.py
new file mode 100644
index 000000000000..f18e12bc5cf5
--- /dev/null
+++ b/packages/bigframes/bigframes/bigquery/aead.py
@@ -0,0 +1,25 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""AEAD encryption functions"""
+
+from __future__ import annotations
+
+from bigframes.bigquery._operations.aead import decrypt_bytes, decrypt_string, encrypt
+
+__all__ = [
+    "decrypt_bytes",
+    "decrypt_string",
+    "encrypt",
+]

From c555f4a5668da7420fb3cc83c19c442100cbed8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 17:09:31 +0000
Subject: [PATCH 15/20] attempt to fix snapshot

---
 .../bigframes/scripts/generate_bigframes_bigquery.py   |  4 ++--
 .../snapshots/test_aead/test_encrypt/out.sql           |  2 +-
 .../tests/unit/bigquery/_operations/test_aead.py       | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 02914b24dae7..bd634dd0c815 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -108,7 +108,7 @@ def {{ func.name }}(
 {% endfor %}
 """
 
-TEST_TEMPLATE = """{{ license_header }}
+TEST_TEMPLATE = r"""{{ license_header }}
 #
 # DO NOT MODIFY THIS FILE DIRECTLY.
 # This file was generated from: {{ yaml_path }}
@@ -131,7 +131,7 @@ def test_{{ func.name }}(scalar_types_df: bpd.DataFrame, snapshot):
         cast(bpd.Series, scalar_types_df["{{ arg.col_name }}"]),
 {% endfor %}
     ).to_frame()
-    snapshot.assert_match(result.sql, "out.sql")
+    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
 
 
 {% endfor %}
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
index 9ab9f8c0a7bb..eba30737f631 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
-  AEAD.ENCRYPT(`bytes_col`, `bytes_col`, `bytes_col`) AS `0`
+  AEAD.ENCRYPT(`bytes_col`, `string_col`, `string_col`) AS `0`
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
index cafcd7d95278..62a3b0d95725 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -33,7 +33,7 @@ def test_decrypt_bytes(scalar_types_df: bpd.DataFrame, snapshot):
         cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["bytes_col"]),
     ).to_frame()
-    snapshot.assert_match(result.sql, "out.sql")
+    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
 
 
 def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
@@ -42,13 +42,13 @@ def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
         cast(bpd.Series, scalar_types_df["bytes_col"]),
         cast(bpd.Series, scalar_types_df["string_col"]),
     ).to_frame()
-    snapshot.assert_match(result.sql, "out.sql")
+    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
 
 
 def test_encrypt(scalar_types_df: bpd.DataFrame, snapshot):
     result = aead.encrypt(
         cast(bpd.Series, scalar_types_df["bytes_col"]),
-        cast(bpd.Series, scalar_types_df["bytes_col"]),
-        cast(bpd.Series, scalar_types_df["bytes_col"]),
+        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["string_col"]),
     ).to_frame()
-    snapshot.assert_match(result.sql, "out.sql")
+    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")

From d8d2cbfbe75b8535ef532d9b961679b4caf35dd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 18:07:19 +0000
Subject: [PATCH 16/20] remove bigframes vendored as first-party

---
 packages/bigframes/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/bigframes/pyproject.toml b/packages/bigframes/pyproject.toml
index fb658d69dafe..e7d9c326a936 100644
--- a/packages/bigframes/pyproject.toml
+++ b/packages/bigframes/pyproject.toml
@@ -3,4 +3,4 @@ requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
 [tool.ruff.lint.isort]
-known-first-party = ["bigframes", "bigframes_vendored"]
+known-first-party = ["bigframes"]

From 5ea61c269b4dc50828f9c547c427cec9f07fc8d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 18:09:33 +0000
Subject: [PATCH 17/20] ruff

---
 .../scripts/generate_bigframes_bigquery.py    | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index bd634dd0c815..39063c7bee1d 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -37,13 +37,13 @@
 TEST_OUTPUT_DIR = pathlib.Path("tests/unit/bigquery/_operations")
 
 RUFF_ARGS = [
-                "ruff",
-                "check",
-                "--select",
-                "I",
-                "--fix",
-                "--target-version=py310",
-                "--line-length=88",
+    "ruff",
+    "check",
+    "--select",
+    "I",
+    "--fix",
+    "--target-version=py310",
+    "--line-length=88",
 ]
 
 LICENSE_HEADER = """# Copyright 2026 Google LLC
@@ -318,7 +318,8 @@ def main():
             f.write(content)
 
         subprocess.run(
-            RUFF_ARGS + [
+            RUFF_ARGS
+            + [
                 str(output_file),
             ],
             check=True,
@@ -344,7 +345,8 @@ def main():
             f.write(test_content)
 
         subprocess.run(
-            RUFF_ARGS + [
+            RUFF_ARGS
+            + [
                 str(test_output_file),
             ],
             check=True,

From a463ea6cbcfcea9353bb74ef1756441ab5b71802 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 21:52:44 +0000
Subject: [PATCH 18/20] create template files

---
 .../bigframes/bigquery/_googlesql.py          |   4 +-
 .../bigframes/bigquery/_operations/aead.py    |  21 +-
 .../bigframes/bigframes/core/sentinels.py     |  14 +-
 .../scripts/generate_bigframes_bigquery.py    | 109 +------
 .../bigframes/scripts/templates/license.py.j2 |  13 +
 .../scripts/templates/operation.py.j2         |  46 +++
 .../scripts/templates/test_operation.py.j2    |  28 ++
 .../unit/bigquery/_operations/conftest.py     | 280 ------------------
 .../snapshots/test_aead/test_encrypt/out.sql  |   2 +-
 .../unit/bigquery/_operations/test_aead.py    |   5 +-
 packages/bigframes/tests/unit/conftest.py     | 265 +++++++++++++++++
 .../unit/core/compile/sqlglot/conftest.py     | 280 ------------------
 12 files changed, 384 insertions(+), 683 deletions(-)
 create mode 100644 packages/bigframes/scripts/templates/license.py.j2
 create mode 100644 packages/bigframes/scripts/templates/operation.py.j2
 create mode 100644 packages/bigframes/scripts/templates/test_operation.py.j2
 delete mode 100644 packages/bigframes/tests/unit/bigquery/_operations/conftest.py
 delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py

diff --git a/packages/bigframes/bigframes/bigquery/_googlesql.py b/packages/bigframes/bigframes/bigquery/_googlesql.py
index 5ac8ce826e9b..a37c9790ff56 100644
--- a/packages/bigframes/bigframes/bigquery/_googlesql.py
+++ b/packages/bigframes/bigframes/bigquery/_googlesql.py
@@ -58,7 +58,7 @@ def apply_googlesql_scalar_op(
             if isinstance(arg, bigframes.core.col.Expression):
                 block, col_id = block.project_expr(bigframes.core.col._as_bf_expr(arg))
                 processed_args.append(series.Series(block.select_column(col_id)))
-            elif arg is sentinels.DEFAULT:
+            elif arg is sentinels.Sentinel.ARGUMENT_DEFAULT:
                 processed_args.append(bigframes.core.col.Expression(ex.OmittedArg()))
             else:
                 processed_args.append(arg)
@@ -73,7 +73,7 @@ def apply_googlesql_scalar_op(
     for arg in args:
         if isinstance(arg, bigframes.core.col.Expression):
             expr_args.append(bigframes.core.col._as_bf_expr(arg))
-        elif arg is sentinels.DEFAULT:
+        elif arg is sentinels.Sentinel.ARGUMENT_DEFAULT:
             expr_args.append(ex.OmittedArg())
         else:
             expr_args.append(ex.const(arg))
diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py
index a7b3f9dfae54..fb98bed59be9 100644
--- a/packages/bigframes/bigframes/bigquery/_operations/aead.py
+++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 #
 # DO NOT MODIFY THIS FILE DIRECTLY.
 # This file was generated from: scripts/data/sql-functions/aead.yaml
@@ -20,7 +19,7 @@
 from __future__ import annotations
 
 import datetime
-from typing import Any, Optional, TypeVar, Union
+from typing import Any, Literal, Optional, TypeVar, Union
 
 import bigframes.bigquery._googlesql
 import bigframes.core.col
@@ -51,9 +50,9 @@
 
 
 def decrypt_bytes(
-    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
-    ciphertext: Union[T, bigframes.core.col.Expression, bytes],
-    additional_data: Union[T, bigframes.core.col.Expression, bytes],
+    keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]],
+    ciphertext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]],
+    additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]],
 ) -> T:
     """Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails."""
     return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
@@ -65,9 +64,9 @@ def decrypt_bytes(
 
 
 def decrypt_string(
-    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
-    ciphertext: Union[T, bigframes.core.col.Expression, bytes],
-    additional_data: Union[T, bigframes.core.col.Expression, str],
+    keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]],
+    ciphertext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]],
+    additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str]],
 ) -> T:
     """Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING."""
     return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
@@ -79,9 +78,9 @@ def decrypt_string(
 
 
 def encrypt(
-    keyset: Union[T, bigframes.core.col.Expression, Union[bytes, dict]],
-    plaintext: Union[T, bigframes.core.col.Expression, Union[bytes, str]],
-    additional_data: Union[T, bigframes.core.col.Expression, Union[bytes, str]],
+    keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]],
+    plaintext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str]],
+    additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str]],
 ) -> T:
     """Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL."""
     return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
diff --git a/packages/bigframes/bigframes/core/sentinels.py b/packages/bigframes/bigframes/core/sentinels.py
index fc2bfac970e5..ff9913f7c6fd 100644
--- a/packages/bigframes/bigframes/core/sentinels.py
+++ b/packages/bigframes/bigframes/core/sentinels.py
@@ -16,18 +16,16 @@
 
 from __future__ import annotations
 
-from enum import Enum
+import enum
 
 
-class Default(Enum):
-    """Default values used throughout BigFrames.
+class Sentinel(enum.Enum):
+    """Default values used throughout BigFrames."""
+
+    """Default value for an optional argument.
 
     When a parameter is set to this, that parameter is explicitly omitted
     from the SQL text. This allows for NULL (None in Python) to be explicitly
     passed in to optional parameters.
     """
-
-    token = 0
-
-
-DEFAULT = Default.token
+    ARGUMENT_DEFAULT = enum.auto()
diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 39063c7bee1d..4baaef3ce6b0 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -35,6 +35,8 @@
 OUTPUT_DIR = pathlib.Path("bigframes/bigquery/_operations")
 # Directory where the generated test files will be placed
 TEST_OUTPUT_DIR = pathlib.Path("tests/unit/bigquery/_operations")
+# Directory containing the Jinja2 templates
+TEMPLATE_DIR = pathlib.Path("scripts/templates")
 
 RUFF_ARGS = [
     "ruff",
@@ -46,97 +48,6 @@
     "--line-length=88",
 ]
 
-LICENSE_HEADER = """# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-TEMPLATE = """{{ license_header }}
-#
-# DO NOT MODIFY THIS FILE DIRECTLY.
-# This file was generated from: {{ yaml_path }}
-# by the script: {{ script_path }}
-
-from __future__ import annotations
-
-import datetime
-from typing import Any, Optional, TypeVar, Union
-
-from bigframes import dtypes
-import bigframes.bigquery._googlesql
-import bigframes.core.col
-import bigframes.core.expression as ex
-import bigframes.core.sentinels as sentinels
-from bigframes.operations import googlesql
-import bigframes.operations as ops
-import bigframes.series as series
-
-T = TypeVar("T", series.Series, bigframes.core.col.Expression)
-
-{% for op in ops %}
-{{ op.internal_name }} = googlesql.GoogleSqlScalarOp(
-    "{{ op.sql_name }}",
-    args=({{ op.arg_specs }}),
-    signature={{ op.signature }},
-)
-{% endfor %}
-{% for func in functions %}
-
-
-def {{ func.name }}(
-{% for arg in func.args %}
-    {{ arg.name }}: Union[T, bigframes.core.col.Expression, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
-{% endfor %}
-) -> T:
-    \"\"\"{{ func.description }}\"\"\"
-    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
-        {{ func.op_name }},
-{% for arg in func.args %}
-        {{ arg.name }},
-{% endfor %}
-    )  # type: ignore
-{% endfor %}
-"""
-
-TEST_TEMPLATE = r"""{{ license_header }}
-#
-# DO NOT MODIFY THIS FILE DIRECTLY.
-# This file was generated from: {{ yaml_path }}
-# by the script: {{ script_path }}
-
-from typing import cast
-
-import pytest
-
-import bigframes.pandas as bpd
-import {{ import_path }} as {{ short_name }}
-
-pytest.importorskip("pytest_snapshot")
-
-
-{% for func in functions %}
-def test_{{ func.name }}(scalar_types_df: bpd.DataFrame, snapshot):
-    result = {{ short_name }}.{{ func.name }}(
-{% for arg in func.test_args %}
-        cast(bpd.Series, scalar_types_df["{{ arg.col_name }}"]),
-{% endfor %}
-    ).to_frame()
-    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
-
-
-{% endfor %}
-"""
-
 DTYPE_MAP = {
     "binary": "dtypes.BYTES_DTYPE",
     "string": "dtypes.STRING_DTYPE",
@@ -191,9 +102,13 @@ def to_snake_case(name):
 
 
 def main():
-    env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)
-    template = env.from_string(TEMPLATE)
-    test_template = env.from_string(TEST_TEMPLATE)
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(TEMPLATE_DIR),
+        trim_blocks=True,
+        lstrip_blocks=True,
+    )
+    template = env.get_template("operation.py.j2")
+    test_template = env.get_template("test_operation.py.j2")
 
     for yaml_file in DATA_DIR.glob("**/*.yaml"):
         print(f"Processing {yaml_file}...")
@@ -266,13 +181,13 @@ def main():
                 func_args = []
                 for name in arg_order:
                     arg_info = args_by_name[name]
-                    types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]]
+                    types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]] + ["Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]"]
                     type_hint = (
                         "Union[" + ", ".join(sorted(set(types))) + "]"
                         if len(types) > 1
                         else types[0]
                     )
-                    default = "sentinels.DEFAULT" if arg_info["optional"] else ""
+                    default = "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else ""
                     func_args.append(
                         {
                             "name": name,
@@ -308,7 +223,6 @@ def main():
         # Render and write
         output_file.parent.mkdir(parents=True, exist_ok=True)
         content = template.render(
-            license_header=LICENSE_HEADER,
             yaml_path=str(yaml_file),
             script_path="scripts/generate_bigframes_bigquery.py",
             ops=ops_list,
@@ -334,7 +248,6 @@ def main():
 
         test_output_file.parent.mkdir(parents=True, exist_ok=True)
         test_content = test_template.render(
-            license_header=LICENSE_HEADER,
             yaml_path=str(yaml_file),
             script_path="scripts/generate_bigframes_bigquery.py",
             import_path=import_path,
diff --git a/packages/bigframes/scripts/templates/license.py.j2 b/packages/bigframes/scripts/templates/license.py.j2
new file mode 100644
index 000000000000..58d482ea3866
--- /dev/null
+++ b/packages/bigframes/scripts/templates/license.py.j2
@@ -0,0 +1,13 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/packages/bigframes/scripts/templates/operation.py.j2 b/packages/bigframes/scripts/templates/operation.py.j2
new file mode 100644
index 000000000000..7e7c7f95b62f
--- /dev/null
+++ b/packages/bigframes/scripts/templates/operation.py.j2
@@ -0,0 +1,46 @@
+{% include 'license.py.j2' %}
+
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: {{ yaml_path }}
+# by the script: {{ script_path }}
+
+from __future__ import annotations
+
+import datetime
+from typing import Any, Literal, Optional, TypeVar, Union
+
+from bigframes import dtypes
+import bigframes.bigquery._googlesql
+import bigframes.core.col
+import bigframes.core.expression as ex
+import bigframes.core.sentinels as sentinels
+from bigframes.operations import googlesql
+import bigframes.operations as ops
+import bigframes.series as series
+
+T = TypeVar("T", series.Series, bigframes.core.col.Expression)
+
+{% for op in ops %}
+{{ op.internal_name }} = googlesql.GoogleSqlScalarOp(
+    "{{ op.sql_name }}",
+    args=({{ op.arg_specs }}),
+    signature={{ op.signature }},
+)
+{% endfor %}
+{% for func in functions %}
+
+
+def {{ func.name }}(
+{% for arg in func.args %}
+    {{ arg.name }}: Union[T, bigframes.core.col.Expression, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %},
+{% endfor %}
+) -> T:
+    """{{ func.description }}"""
+    return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
+        {{ func.op_name }},
+{% for arg in func.args %}
+        {{ arg.name }},
+{% endfor %}
+    )  # type: ignore
+{% endfor %}
diff --git a/packages/bigframes/scripts/templates/test_operation.py.j2 b/packages/bigframes/scripts/templates/test_operation.py.j2
new file mode 100644
index 000000000000..aa87fcb94bee
--- /dev/null
+++ b/packages/bigframes/scripts/templates/test_operation.py.j2
@@ -0,0 +1,28 @@
+{% include 'license.py.j2' %}
+
+#
+# DO NOT MODIFY THIS FILE DIRECTLY.
+# This file was generated from: {{ yaml_path }}
+# by the script: {{ script_path }}
+
+from typing import cast
+
+import pytest
+
+import bigframes.pandas as bpd
+import {{ import_path }} as {{ short_name }}
+
+pytest.importorskip("pytest_snapshot")
+
+
+{% for func in functions %}
+def test_{{ func.name }}(scalar_types_df: bpd.DataFrame, snapshot):
+    result = {{ short_name }}.{{ func.name }}(
+{% for arg in func.test_args %}
+        cast(bpd.Series, scalar_types_df["{{ arg.col_name }}"]),
+{% endfor %}
+    ).to_frame()
+    snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
+
+
+{% endfor %}
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/conftest.py b/packages/bigframes/tests/unit/bigquery/_operations/conftest.py
deleted file mode 100644
index 127902241acb..000000000000
--- a/packages/bigframes/tests/unit/bigquery/_operations/conftest.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pathlib
-import typing
-
-import pandas as pd
-import pyarrow as pa
-import pytest
-from google.cloud import bigquery
-
-import bigframes.core as core
-import bigframes.pandas as bpd
-import bigframes.testing.mocks as mocks
-import bigframes.testing.utils
-from bigframes import dtypes
-
-CURRENT_DIR = pathlib.Path(__file__).parent
-DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data"
-
-
-def _create_compiler_session(table_name, table_schema):
-    """Helper function to create a compiler session."""
-    from bigframes.testing import compiler_session
-
-    anonymous_dataset = bigquery.DatasetReference.from_string(
-        "bigframes-dev.sqlglot_test"
-    )
-    session = mocks.create_bigquery_session(
-        table_name=table_name,
-        table_schema=table_schema,
-        anonymous_dataset=anonymous_dataset,
-    )
-    session._executor = compiler_session.SQLCompilerExecutor()
-    return session
-
-
-@pytest.fixture(scope="session")
-def compiler_session(scalar_types_table_schema):
-    """Compiler session for scalar types."""
-    return _create_compiler_session("scalar_types", scalar_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_repeated_types(repeated_types_table_schema):
-    """Compiler session for repeated data types."""
-    return _create_compiler_session("repeated_types", repeated_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_nested_structs_types(nested_structs_types_table_schema):
-    """Compiler session for nested STRUCT data types."""
-    return _create_compiler_session(
-        "nested_structs_types", nested_structs_types_table_schema
-    )
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_json_types(json_types_table_schema):
-    """Compiler session for JSON data types."""
-    return _create_compiler_session("json_types", json_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("bool_col", "BOOLEAN"),
-        bigquery.SchemaField("bytes_col", "BYTES"),
-        bigquery.SchemaField("date_col", "DATE"),
-        bigquery.SchemaField("datetime_col", "DATETIME"),
-        bigquery.SchemaField("geography_col", "GEOGRAPHY"),
-        bigquery.SchemaField("int64_col", "INTEGER"),
-        bigquery.SchemaField("int64_too", "INTEGER"),
-        bigquery.SchemaField("numeric_col", "NUMERIC"),
-        bigquery.SchemaField("float64_col", "FLOAT"),
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"),
-        bigquery.SchemaField("string_col", "STRING"),
-        bigquery.SchemaField("time_col", "TIME"),
-        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
-        bigquery.SchemaField("duration_col", "INTEGER"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def scalar_types_df(compiler_session) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.scalar_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("rowindex", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def scalar_types_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    # TODO: add tests for empty dataframes
-    df = pd.read_json(
-        DATA_DIR / "scalars.jsonl",
-        lines=True,
-    )
-    bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True)
-
-    df = df.set_index("rowindex", drop=False)
-    return df
-
-
-@pytest.fixture(scope="module")
-def scalar_types_array_value(
-    scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session
-) -> core.ArrayValue:
-    managed_data_source = core.local_data.ManagedArrowTable.from_pandas(
-        scalar_types_pandas_df
-    )
-    return core.ArrayValue.from_managed(managed_data_source, compiler_session)
-
-
-@pytest.fixture(scope="session")
-def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("id", "INTEGER"),
-        bigquery.SchemaField(
-            "people",
-            "RECORD",
-            fields=[
-                bigquery.SchemaField("name", "STRING"),
-                bigquery.SchemaField("age", "INTEGER"),
-                bigquery.SchemaField(
-                    "address",
-                    "RECORD",
-                    fields=[
-                        bigquery.SchemaField("city", "STRING"),
-                        bigquery.SchemaField("country", "STRING"),
-                    ],
-                ),
-            ],
-        ),
-    ]
-
-
-@pytest.fixture(scope="session")
-def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.nested_structs_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("id", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def nested_structs_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing STRUCT types and using the `id`
-    column as the index."""
-
-    df = pd.read_json(
-        DATA_DIR / "nested_structs.jsonl",
-        lines=True,
-    )
-    df = df.set_index("id")
-
-    address_struct_schema = pa.struct(
-        [pa.field("city", pa.string()), pa.field("country", pa.string())]
-    )
-    person_struct_schema = pa.struct(
-        [
-            pa.field("name", pa.string()),
-            pa.field("age", pa.int64()),
-            pa.field("address", address_struct_schema),
-        ]
-    )
-    df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema))
-    return df
-
-
-@pytest.fixture(scope="session")
-def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"),
-        bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"),
-        bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"),
-        bigquery.SchemaField("date_list_col", "DATE", "REPEATED"),
-        bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"),
-        bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"),
-        bigquery.SchemaField("string_list_col", "STRING", "REPEATED"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_repeated_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.repeated_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("rowindex", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def repeated_types_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing LIST types and using the `rowindex`
-    column as the index."""
-
-    df = pd.read_json(
-        DATA_DIR / "repeated.jsonl",
-        lines=True,
-    )
-    # TODO: add dtype conversion here if needed.
-    df = df.set_index("rowindex")
-    return df
-
-
-@pytest.fixture(scope="session")
-def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("json_col", "JSON"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing JSON types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_json_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.json_types",
-        enable_snapshot=False,
-    )
-    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
-    bf_df = bf_df.set_index("rowindex", drop=True)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def json_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing JSON types and using the `rowindex`
-    column as the index."""
-    json_data = [
-        "null",
-        "true",
-        "100",
-        "0.98",
-        '"a string"',
-        "[]",
-        "[1, 2, 3]",
-        '[{"a": 1}, {"a": 2}, {"a": null}, {}]',
-        '"100"',
-        '{"date": "2024-07-16"}',
-        '{"int_value": 2, "null_filed": null}',
-        '{"list_data": [10, 20, 30]}',
-    ]
-    df = pd.DataFrame(
-        {
-            "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE),
-            "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE),
-        },
-    )
-    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
-    df = df.set_index("rowindex", drop=True)
-    return df
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
index eba30737f631..9ab9f8c0a7bb 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
+++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
-  AEAD.ENCRYPT(`bytes_col`, `string_col`, `string_col`) AS `0`
+  AEAD.ENCRYPT(`bytes_col`, `bytes_col`, `bytes_col`) AS `0`
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
index 62a3b0d95725..f9f8fdd2dd88 100644
--- a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
+++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 #
 # DO NOT MODIFY THIS FILE DIRECTLY.
 # This file was generated from: scripts/data/sql-functions/aead.yaml
@@ -48,7 +47,7 @@ def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot):
 def test_encrypt(scalar_types_df: bpd.DataFrame, snapshot):
     result = aead.encrypt(
         cast(bpd.Series, scalar_types_df["bytes_col"]),
-        cast(bpd.Series, scalar_types_df["string_col"]),
-        cast(bpd.Series, scalar_types_df["string_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
+        cast(bpd.Series, scalar_types_df["bytes_col"]),
     ).to_frame()
     snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql")
diff --git a/packages/bigframes/tests/unit/conftest.py b/packages/bigframes/tests/unit/conftest.py
index a9b26afeef29..3ab217cf09ba 100644
--- a/packages/bigframes/tests/unit/conftest.py
+++ b/packages/bigframes/tests/unit/conftest.py
@@ -12,7 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pathlib
+import typing
+
+import pandas as pd
+import pyarrow as pa
 import pytest
+from google.cloud import bigquery
+
+import bigframes.core as core
+import bigframes.pandas as bpd
+import bigframes.testing.mocks as mocks
+import bigframes.testing.utils
+from bigframes import dtypes
+
+CURRENT_DIR = pathlib.Path(__file__).parent
+DATA_DIR = CURRENT_DIR.parent / "data"
 
 
 @pytest.fixture(scope="session")
@@ -22,3 +37,253 @@ def polars_session():
     from bigframes.testing import polars_session
 
     return polars_session.TestSession()
+
+
+def _create_compiler_session(table_name, table_schema):
+    """Helper function to create a compiler session."""
+    from bigframes.testing import compiler_session
+
+    anonymous_dataset = bigquery.DatasetReference.from_string(
+        "bigframes-dev.sqlglot_test"
+    )
+    session = mocks.create_bigquery_session(
+        table_name=table_name,
+        table_schema=table_schema,
+        anonymous_dataset=anonymous_dataset,
+    )
+    session._executor = compiler_session.SQLCompilerExecutor()
+    return session
+
+
+@pytest.fixture(scope="session")
+def compiler_session(scalar_types_table_schema):
+    """Compiler session for scalar types."""
+    return _create_compiler_session("scalar_types", scalar_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_repeated_types(repeated_types_table_schema):
+    """Compiler session for repeated data types."""
+    return _create_compiler_session("repeated_types", repeated_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_nested_structs_types(nested_structs_types_table_schema):
+    """Compiler session for nested STRUCT data types."""
+    return _create_compiler_session(
+        "nested_structs_types", nested_structs_types_table_schema
+    )
+
+
+@pytest.fixture(scope="session")
+def compiler_session_w_json_types(json_types_table_schema):
+    """Compiler session for JSON data types."""
+    return _create_compiler_session("json_types", json_types_table_schema)
+
+
+@pytest.fixture(scope="session")
+def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("bool_col", "BOOLEAN"),
+        bigquery.SchemaField("bytes_col", "BYTES"),
+        bigquery.SchemaField("date_col", "DATE"),
+        bigquery.SchemaField("datetime_col", "DATETIME"),
+        bigquery.SchemaField("geography_col", "GEOGRAPHY"),
+        bigquery.SchemaField("int64_col", "INTEGER"),
+        bigquery.SchemaField("int64_too", "INTEGER"),
+        bigquery.SchemaField("numeric_col", "NUMERIC"),
+        bigquery.SchemaField("float64_col", "FLOAT"),
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"),
+        bigquery.SchemaField("string_col", "STRING"),
+        bigquery.SchemaField("time_col", "TIME"),
+        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+        bigquery.SchemaField("duration_col", "INTEGER"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def scalar_types_df(compiler_session) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.scalar_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("rowindex", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def scalar_types_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    # TODO: add tests for empty dataframes
+    df = pd.read_json(
+        DATA_DIR / "scalars.jsonl",
+        lines=True,
+    )
+    bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True)
+
+    df = df.set_index("rowindex", drop=False)
+    return df
+
+
+@pytest.fixture(scope="module")
+def scalar_types_array_value(
+    scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session
+) -> core.ArrayValue:
+    managed_data_source = core.local_data.ManagedArrowTable.from_pandas(
+        scalar_types_pandas_df
+    )
+    return core.ArrayValue.from_managed(managed_data_source, compiler_session)
+
+
+@pytest.fixture(scope="session")
+def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("id", "INTEGER"),
+        bigquery.SchemaField(
+            "people",
+            "RECORD",
+            fields=[
+                bigquery.SchemaField("name", "STRING"),
+                bigquery.SchemaField("age", "INTEGER"),
+                bigquery.SchemaField(
+                    "address",
+                    "RECORD",
+                    fields=[
+                        bigquery.SchemaField("city", "STRING"),
+                        bigquery.SchemaField("country", "STRING"),
+                    ],
+                ),
+            ],
+        ),
+    ]
+
+
+@pytest.fixture(scope="session")
+def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.nested_structs_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("id", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def nested_structs_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing STRUCT types and using the `id`
+    column as the index."""
+
+    df = pd.read_json(
+        DATA_DIR / "nested_structs.jsonl",
+        lines=True,
+    )
+    df = df.set_index("id")
+
+    address_struct_schema = pa.struct(
+        [pa.field("city", pa.string()), pa.field("country", pa.string())]
+    )
+    person_struct_schema = pa.struct(
+        [
+            pa.field("name", pa.string()),
+            pa.field("age", pa.int64()),
+            pa.field("address", address_struct_schema),
+        ]
+    )
+    df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema))
+    return df
+
+
+@pytest.fixture(scope="session")
+def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"),
+        bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"),
+        bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"),
+        bigquery.SchemaField("date_list_col", "DATE", "REPEATED"),
+        bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"),
+        bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"),
+        bigquery.SchemaField("string_list_col", "STRING", "REPEATED"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_repeated_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.repeated_types",
+        enable_snapshot=False,
+    )
+    bf_df = bf_df.set_index("rowindex", drop=False)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def repeated_types_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing LIST types and using the `rowindex`
+    column as the index."""
+
+    df = pd.read_json(
+        DATA_DIR / "repeated.jsonl",
+        lines=True,
+    )
+    # TODO: add dtype conversion here if needed.
+    df = df.set_index("rowindex")
+    return df
+
+
+@pytest.fixture(scope="session")
+def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("json_col", "JSON"),
+    ]
+
+
+@pytest.fixture(scope="session")
+def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame:
+    """Returns a BigFrames DataFrame containing JSON types and using the `rowindex`
+    column as the index."""
+    bf_df = compiler_session_w_json_types._loader.read_gbq_table(
+        "bigframes-dev.sqlglot_test.json_types",
+        enable_snapshot=False,
+    )
+    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
+    bf_df = bf_df.set_index("rowindex", drop=True)
+    return bf_df
+
+
+@pytest.fixture(scope="session")
+def json_pandas_df() -> pd.DataFrame:
+    """Returns a pandas DataFrame containing JSON types and using the `rowindex`
+    column as the index."""
+    json_data = [
+        "null",
+        "true",
+        "100",
+        "0.98",
+        '"a string"',
+        "[]",
+        "[1, 2, 3]",
+        '[{"a": 1}, {"a": 2}, {"a": null}, {}]',
+        '"100"',
+        '{"date": "2024-07-16"}',
+        '{"int_value": 2, "null_filed": null}',
+        '{"list_data": [10, 20, 30]}',
+    ]
+    df = pd.DataFrame(
+        {
+            "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE),
+            "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE),
+        },
+    )
+    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
+    df = df.set_index("rowindex", drop=True)
+    return df
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py b/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py
deleted file mode 100644
index fd914f589a50..000000000000
--- a/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pathlib
-import typing
-
-import pandas as pd
-import pyarrow as pa
-import pytest
-from google.cloud import bigquery
-
-import bigframes.core as core
-import bigframes.pandas as bpd
-import bigframes.testing.mocks as mocks
-import bigframes.testing.utils
-from bigframes import dtypes
-
-CURRENT_DIR = pathlib.Path(__file__).parent
-DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data"
-
-
-def _create_compiler_session(table_name, table_schema):
-    """Helper function to create a compiler session."""
-    from bigframes.testing import compiler_session
-
-    anonymous_dataset = bigquery.DatasetReference.from_string(
-        "bigframes-dev.sqlglot_test"
-    )
-    session = mocks.create_bigquery_session(
-        table_name=table_name,
-        table_schema=table_schema,
-        anonymous_dataset=anonymous_dataset,
-    )
-    session._executor = compiler_session.SQLCompilerExecutor()
-    return session
-
-
-@pytest.fixture(scope="session")
-def compiler_session(scalar_types_table_schema):
-    """Compiler session for scalar types."""
-    return _create_compiler_session("scalar_types", scalar_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_repeated_types(repeated_types_table_schema):
-    """Compiler session for repeated data types."""
-    return _create_compiler_session("repeated_types", repeated_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_nested_structs_types(nested_structs_types_table_schema):
-    """Compiler session for nested STRUCT data types."""
-    return _create_compiler_session(
-        "nested_structs_types", nested_structs_types_table_schema
-    )
-
-
-@pytest.fixture(scope="session")
-def compiler_session_w_json_types(json_types_table_schema):
-    """Compiler session for JSON data types."""
-    return _create_compiler_session("json_types", json_types_table_schema)
-
-
-@pytest.fixture(scope="session")
-def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("bool_col", "BOOLEAN"),
-        bigquery.SchemaField("bytes_col", "BYTES"),
-        bigquery.SchemaField("date_col", "DATE"),
-        bigquery.SchemaField("datetime_col", "DATETIME"),
-        bigquery.SchemaField("geography_col", "GEOGRAPHY"),
-        bigquery.SchemaField("int64_col", "INTEGER"),
-        bigquery.SchemaField("int64_too", "INTEGER"),
-        bigquery.SchemaField("numeric_col", "NUMERIC"),
-        bigquery.SchemaField("float64_col", "FLOAT"),
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"),
-        bigquery.SchemaField("string_col", "STRING"),
-        bigquery.SchemaField("time_col", "TIME"),
-        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
-        bigquery.SchemaField("duration_col", "INTEGER"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def scalar_types_df(compiler_session) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.scalar_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("rowindex", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def scalar_types_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    # TODO: add tests for empty dataframes
-    df = pd.read_json(
-        DATA_DIR / "scalars.jsonl",
-        lines=True,
-    )
-    bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True)
-
-    df = df.set_index("rowindex", drop=False)
-    return df
-
-
-@pytest.fixture(scope="module")
-def scalar_types_array_value(
-    scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session
-) -> core.ArrayValue:
-    managed_data_source = core.local_data.ManagedArrowTable.from_pandas(
-        scalar_types_pandas_df
-    )
-    return core.ArrayValue.from_managed(managed_data_source, compiler_session)
-
-
-@pytest.fixture(scope="session")
-def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("id", "INTEGER"),
-        bigquery.SchemaField(
-            "people",
-            "RECORD",
-            fields=[
-                bigquery.SchemaField("name", "STRING"),
-                bigquery.SchemaField("age", "INTEGER"),
-                bigquery.SchemaField(
-                    "address",
-                    "RECORD",
-                    fields=[
-                        bigquery.SchemaField("city", "STRING"),
-                        bigquery.SchemaField("country", "STRING"),
-                    ],
-                ),
-            ],
-        ),
-    ]
-
-
-@pytest.fixture(scope="session")
-def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.nested_structs_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("id", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def nested_structs_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing STRUCT types and using the `id`
-    column as the index."""
-
-    df = pd.read_json(
-        DATA_DIR / "nested_structs.jsonl",
-        lines=True,
-    )
-    df = df.set_index("id")
-
-    address_struct_schema = pa.struct(
-        [pa.field("city", pa.string()), pa.field("country", pa.string())]
-    )
-    person_struct_schema = pa.struct(
-        [
-            pa.field("name", pa.string()),
-            pa.field("age", pa.int64()),
-            pa.field("address", address_struct_schema),
-        ]
-    )
-    df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema))
-    return df
-
-
-@pytest.fixture(scope="session")
-def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"),
-        bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"),
-        bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"),
-        bigquery.SchemaField("date_list_col", "DATE", "REPEATED"),
-        bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"),
-        bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"),
-        bigquery.SchemaField("string_list_col", "STRING", "REPEATED"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_repeated_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.repeated_types",
-        enable_snapshot=False,
-    )
-    bf_df = bf_df.set_index("rowindex", drop=False)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def repeated_types_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing LIST types and using the `rowindex`
-    column as the index."""
-
-    df = pd.read_json(
-        DATA_DIR / "repeated.jsonl",
-        lines=True,
-    )
-    # TODO: add dtype conversion here if needed.
-    df = df.set_index("rowindex")
-    return df
-
-
-@pytest.fixture(scope="session")
-def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
-    return [
-        bigquery.SchemaField("rowindex", "INTEGER"),
-        bigquery.SchemaField("json_col", "JSON"),
-    ]
-
-
-@pytest.fixture(scope="session")
-def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame:
-    """Returns a BigFrames DataFrame containing JSON types and using the `rowindex`
-    column as the index."""
-    bf_df = compiler_session_w_json_types._loader.read_gbq_table(
-        "bigframes-dev.sqlglot_test.json_types",
-        enable_snapshot=False,
-    )
-    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
-    bf_df = bf_df.set_index("rowindex", drop=True)
-    return bf_df
-
-
-@pytest.fixture(scope="session")
-def json_pandas_df() -> pd.DataFrame:
-    """Returns a pandas DataFrame containing JSON types and using the `rowindex`
-    column as the index."""
-    json_data = [
-        "null",
-        "true",
-        "100",
-        "0.98",
-        '"a string"',
-        "[]",
-        "[1, 2, 3]",
-        '[{"a": 1}, {"a": 2}, {"a": null}, {}]',
-        '"100"',
-        '{"date": "2024-07-16"}',
-        '{"int_value": 2, "null_filed": null}',
-        '{"list_data": [10, 20, 30]}',
-    ]
-    df = pd.DataFrame(
-        {
-            "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE),
-            "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE),
-        },
-    )
-    # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
-    df = df.set_index("rowindex", drop=True)
-    return df

From 7ad2c2b469f1a76bbca7eed0eabb9d237a1cf2f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 21:56:06 +0000
Subject: [PATCH 19/20] split main

---
 .../scripts/generate_bigframes_bigquery.py    | 349 +++++++++---------
 1 file changed, 183 insertions(+), 166 deletions(-)

diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 4baaef3ce6b0..6c7078af789a 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -101,181 +101,198 @@ def to_snake_case(name):
     return name
 
 
-def main():
+def load_templates():
     env = jinja2.Environment(
         loader=jinja2.FileSystemLoader(TEMPLATE_DIR),
         trim_blocks=True,
         lstrip_blocks=True,
     )
-    template = env.get_template("operation.py.j2")
-    test_template = env.get_template("test_operation.py.j2")
-
-    for yaml_file in DATA_DIR.glob("**/*.yaml"):
-        print(f"Processing {yaml_file}...")
-        with open(yaml_file, "r") as f:
-            data = yaml.safe_load(f)
-
-        rel_path = yaml_file.relative_to(DATA_DIR)
-        module_path = rel_path.with_suffix("")
-        module_name = module_path.name
-        output_file = OUTPUT_DIR.joinpath(module_path).with_suffix(".py")
-
-        ops_list = []
-        functions_list = []
-
-        if "scalar_functions" in data:
-            for func_data in data["scalar_functions"]:
-                sql_name = func_data["name"]
-                python_name = to_snake_case(sql_name)
-                if python_name.startswith(module_name + "_"):
-                    python_name = python_name[len(module_name) + 1 :]
-
-                internal_op_name = f"_{python_name.upper()}_OP"
-
-                # Aggregate args across impls
-                args_by_name = {}
-                arg_order = []
-                for impl in func_data["impls"]:
-                    for arg in impl["args"]:
-                        name = arg["name"]
-                        if name not in args_by_name:
-                            args_by_name[name] = {
-                                "types": set(),
-                                "optional": arg["optional"],
-                                "keyword_only": arg["keyword_only"],
-                            }
-                            arg_order.append(name)
-                        args_by_name[name]["types"].add(arg["value"])
-
-                # Build ArgSpecs
-                arg_specs = []
-                for name in arg_order:
-                    arg_info = args_by_name[name]
-                    spec = "googlesql.ArgSpec("
-                    if arg_info["keyword_only"]:
-                        spec += f'arg_name="{name}", '
-                    if arg_info["optional"]:
-                        spec += "optional=True, "
-                    spec = spec.rstrip(", ") + ")"
-                    arg_specs.append(spec)
-
-                # Determine return dtype
-                return_types = {impl["return"] for impl in func_data["impls"]}
-                if len(return_types) == 1:
-                    ret_type = list(return_types)[0]
-                    signature = f"lambda *args: {DTYPE_MAP.get(ret_type, 'None')}"
-                else:
-                    # Fallback to Any/None if ambiguous
-                    signature = "lambda *args: None"
-
-                ops_list.append(
-                    {
-                        "internal_name": internal_op_name,
-                        "sql_name": sql_name.upper(),
-                        "arg_specs": ", ".join(arg_specs),
-                        "signature": signature,
-                    }
-                )
-
-                # Function args
-                func_args = []
-                for name in arg_order:
-                    arg_info = args_by_name[name]
-                    types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]] + ["Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]"]
-                    type_hint = (
-                        "Union[" + ", ".join(sorted(set(types))) + "]"
-                        if len(types) > 1
-                        else types[0]
-                    )
-                    default = "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else ""
-                    func_args.append(
-                        {
-                            "name": name,
-                            "type_hint": type_hint,
-                            "default": default,
-                        }
-                    )
-
-                # Clean up default values for mandatory args
-                # In Python, mandatory args come first.
-                for arg in func_args:
-                    if not arg["default"]:
-                        del arg["default"]
-
-                # Test args
-                test_args = []
-                for name in arg_order:
-                    arg_info = args_by_name[name]
-                    some_type = list(arg_info["types"])[0]
-                    col_name = YAML_TYPE_TO_COL.get(some_type, "string_col")
-                    test_args.append({"col_name": col_name})
-
-                functions_list.append(
-                    {
-                        "name": python_name,
-                        "op_name": internal_op_name,
-                        "description": func_data["description"],
-                        "args": func_args,
-                        "test_args": test_args,
+    return env.get_template("operation.py.j2"), env.get_template("test_operation.py.j2")
+
+
+def parse_scalar_functions(data, module_name):
+    ops_list = []
+    functions_list = []
+
+    if "scalar_functions" not in data:
+        return ops_list, functions_list
+
+    for func_data in data["scalar_functions"]:
+        sql_name = func_data["name"]
+        python_name = to_snake_case(sql_name)
+        if python_name.startswith(module_name + "_"):
+            python_name = python_name[len(module_name) + 1 :]
+
+        internal_op_name = f"_{python_name.upper()}_OP"
+
+        # Aggregate args across impls
+        args_by_name = {}
+        arg_order = []
+        for impl in func_data["impls"]:
+            for arg in impl["args"]:
+                name = arg["name"]
+                if name not in args_by_name:
+                    args_by_name[name] = {
+                        "types": set(),
+                        "optional": arg["optional"],
+                        "keyword_only": arg["keyword_only"],
                     }
-                )
-
-        # Render and write
-        output_file.parent.mkdir(parents=True, exist_ok=True)
-        content = template.render(
-            yaml_path=str(yaml_file),
-            script_path="scripts/generate_bigframes_bigquery.py",
-            ops=ops_list,
-            functions=functions_list,
-        )
-        with open(output_file, "w") as f:
-            f.write(content)
-
-        subprocess.run(
-            RUFF_ARGS
-            + [
-                str(output_file),
-            ],
-            check=True,
-        )
-        print(f"  Generated {output_file}")
-
-        # Render and write test
-        import_path = "bigframes.bigquery._operations." + ".".join(module_path.parts)
-        test_output_file = TEST_OUTPUT_DIR.joinpath(
-            module_path.with_name(f"test_{module_path.name}")
-        ).with_suffix(".py")
-
-        test_output_file.parent.mkdir(parents=True, exist_ok=True)
-        test_content = test_template.render(
-            yaml_path=str(yaml_file),
-            script_path="scripts/generate_bigframes_bigquery.py",
-            import_path=import_path,
-            short_name=module_path.name,
-            functions=functions_list,
-        )
-        with open(test_output_file, "w") as f:
-            f.write(test_content)
-
-        subprocess.run(
-            RUFF_ARGS
-            + [
-                str(test_output_file),
-            ],
-            check=True,
+                    arg_order.append(name)
+                args_by_name[name]["types"].add(arg["value"])
+
+        # Build ArgSpecs
+        arg_specs = []
+        for name in arg_order:
+            arg_info = args_by_name[name]
+            spec = "googlesql.ArgSpec("
+            if arg_info["keyword_only"]:
+                spec += f'arg_name="{name}", '
+            if arg_info["optional"]:
+                spec += "optional=True, "
+            spec = spec.rstrip(", ") + ")"
+            arg_specs.append(spec)
+
+        # Determine return dtype
+        return_types = {impl["return"] for impl in func_data["impls"]}
+        if len(return_types) == 1:
+            ret_type = list(return_types)[0]
+            signature = f"lambda *args: {DTYPE_MAP.get(ret_type, 'None')}"
+        else:
+            # Fallback to Any/None if ambiguous
+            signature = "lambda *args: None"
+
+        ops_list.append(
+            {
+                "internal_name": internal_op_name,
+                "sql_name": sql_name.upper(),
+                "arg_specs": ", ".join(arg_specs),
+                "signature": signature,
+            }
         )
-        print(f"  Generated {test_output_file}")
-
-        print(f"  Updating snapshots for {test_output_file}...")
-        subprocess.run(
-            [
-                "pytest",
-                str(test_output_file),
-                "--snapshot-update",
-            ],
-            check=False,
+
+        # Function args
+        func_args = []
+        for name in arg_order:
+            arg_info = args_by_name[name]
+            types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]] + [
+                "Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]"
+            ]
+            type_hint = (
+                "Union[" + ", ".join(sorted(set(types))) + "]"
+                if len(types) > 1
+                else types[0]
+            )
+            default = (
+                "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else ""
+            )
+            func_args.append(
+                {
+                    "name": name,
+                    "type_hint": type_hint,
+                    "default": default,
+                }
+            )
+
+        # Clean up default values for mandatory args
+        # In Python, mandatory args come first.
+        for arg in func_args:
+            if not arg.get("default"):
+                arg.pop("default", None)
+
+        # Test args
+        test_args = []
+        for name in arg_order:
+            arg_info = args_by_name[name]
+            some_type = list(arg_info["types"])[0]
+            col_name = YAML_TYPE_TO_COL.get(some_type, "string_col")
+            test_args.append({"col_name": col_name})
+
+        functions_list.append(
+            {
+                "name": python_name,
+                "op_name": internal_op_name,
+                "description": func_data["description"],
+                "args": func_args,
+                "test_args": test_args,
+            }
         )
 
+    return ops_list, functions_list
+
+
+def run_ruff(path: pathlib.Path):
+    subprocess.run(
+        RUFF_ARGS
+        + [
+            str(path),
+        ],
+        check=True,
+    )
+
+
+def process_yaml_file(yaml_file, template, test_template):
+    print(f"Processing {yaml_file}...")
+    with open(yaml_file, "r") as f:
+        data = yaml.safe_load(f)
+
+    rel_path = yaml_file.relative_to(DATA_DIR)
+    module_path = rel_path.with_suffix("")
+    module_name = module_path.name
+    output_file = OUTPUT_DIR.joinpath(module_path).with_suffix(".py")
+
+    ops_list, functions_list = parse_scalar_functions(data, module_name)
+
+    # Render and write
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    content = template.render(
+        yaml_path=str(yaml_file),
+        script_path="scripts/generate_bigframes_bigquery.py",
+        ops=ops_list,
+        functions=functions_list,
+    )
+    with open(output_file, "w") as f:
+        f.write(content)
+
+    run_ruff(output_file)
+    print(f"  Generated {output_file}")
+
+    # Render and write test
+    import_path = "bigframes.bigquery._operations." + ".".join(module_path.parts)
+    test_output_file = TEST_OUTPUT_DIR.joinpath(
+        module_path.with_name(f"test_{module_path.name}")
+    ).with_suffix(".py")
+
+    test_output_file.parent.mkdir(parents=True, exist_ok=True)
+    test_content = test_template.render(
+        yaml_path=str(yaml_file),
+        script_path="scripts/generate_bigframes_bigquery.py",
+        import_path=import_path,
+        short_name=module_path.name,
+        functions=functions_list,
+    )
+    with open(test_output_file, "w") as f:
+        f.write(test_content)
+
+    run_ruff(test_output_file)
+    print(f"  Generated {test_output_file}")
+
+    print(f"  Updating snapshots for {test_output_file}...")
+    subprocess.run(
+        [
+            "pytest",
+            str(test_output_file),
+            "--snapshot-update",
+        ],
+        check=False,
+    )
+
+
+def main():
+    template, test_template = load_templates()
+
+    for yaml_file in DATA_DIR.glob("**/*.yaml"):
+        process_yaml_file(yaml_file, template, test_template)
+
 
 if __name__ == "__main__":
     main()

From 8a48769fbed822789b8777e358191ff5f3f29a10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 19 May 2026 22:09:57 +0000
Subject: [PATCH 20/20] make generation more deterministic

---
 .../scripts/generate_bigframes_bigquery.py    | 151 ++++++++++--------
 1 file changed, 87 insertions(+), 64 deletions(-)

diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py
index 6c7078af789a..4cd98bddbf21 100755
--- a/packages/bigframes/scripts/generate_bigframes_bigquery.py
+++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py
@@ -110,6 +110,87 @@ def load_templates():
     return env.get_template("operation.py.j2"), env.get_template("test_operation.py.j2")
 
 
+def _collect_args(impls):
+    args_by_name = {}
+    arg_order = []
+    for impl in impls:
+        for arg in impl["args"]:
+            name = arg["name"]
+            if name not in args_by_name:
+                args_by_name[name] = {
+                    "types": set(),
+                    "optional": arg["optional"],
+                    "keyword_only": arg["keyword_only"],
+                }
+                arg_order.append(name)
+            args_by_name[name]["types"].add(arg["value"])
+    return args_by_name, arg_order
+
+
+def _build_arg_specs(args_by_name, arg_order):
+    arg_specs = []
+    for name in arg_order:
+        arg_info = args_by_name[name]
+        spec = "googlesql.ArgSpec("
+        if arg_info["keyword_only"]:
+            spec += f'arg_name="{name}", '
+        if arg_info["optional"]:
+            spec += "optional=True, "
+        spec = spec.rstrip(", ") + ")"
+        arg_specs.append(spec)
+    return arg_specs
+
+
+def _get_return_signature(impls):
+    return_types = {impl["return"] for impl in impls}
+    if len(return_types) == 1:
+        ret_type = sorted(return_types)[0]
+        return f"lambda *args: {DTYPE_MAP.get(ret_type, 'None')}"
+    else:
+        # Fallback to Any/None if ambiguous
+        return "lambda *args: None"
+
+
+def _get_func_args(args_by_name, arg_order):
+    func_args = []
+    for name in arg_order:
+        arg_info = args_by_name[name]
+        types = [PY_TYPE_MAP.get(t, "Any") for t in sorted(arg_info["types"])] + [
+            "Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]"
+        ]
+        type_hint = (
+            "Union[" + ", ".join(sorted(set(types))) + "]"
+            if len(types) > 1
+            else types[0]
+        )
+        default = "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else ""
+        func_args.append(
+            {
+                "name": name,
+                "type_hint": type_hint,
+                "default": default,
+            }
+        )
+
+    # Clean up default values for mandatory args
+    # In Python, mandatory args come first.
+    for arg in func_args:
+        if not arg.get("default"):
+            arg.pop("default", None)
+
+    return func_args
+
+
+def _get_test_args(args_by_name, arg_order):
+    test_args = []
+    for name in arg_order:
+        arg_info = args_by_name[name]
+        some_type = sorted(arg_info["types"])[0]
+        col_name = YAML_TYPE_TO_COL.get(some_type, "string_col")
+        test_args.append({"col_name": col_name})
+    return test_args
+
+
 def parse_scalar_functions(data, module_name):
     ops_list = []
     functions_list = []
@@ -126,40 +207,13 @@ def parse_scalar_functions(data, module_name):
         internal_op_name = f"_{python_name.upper()}_OP"
 
         # Aggregate args across impls
-        args_by_name = {}
-        arg_order = []
-        for impl in func_data["impls"]:
-            for arg in impl["args"]:
-                name = arg["name"]
-                if name not in args_by_name:
-                    args_by_name[name] = {
-                        "types": set(),
-                        "optional": arg["optional"],
-                        "keyword_only": arg["keyword_only"],
-                    }
-                    arg_order.append(name)
-                args_by_name[name]["types"].add(arg["value"])
+        args_by_name, arg_order = _collect_args(func_data["impls"])
 
         # Build ArgSpecs
-        arg_specs = []
-        for name in arg_order:
-            arg_info = args_by_name[name]
-            spec = "googlesql.ArgSpec("
-            if arg_info["keyword_only"]:
-                spec += f'arg_name="{name}", '
-            if arg_info["optional"]:
-                spec += "optional=True, "
-            spec = spec.rstrip(", ") + ")"
-            arg_specs.append(spec)
+        arg_specs = _build_arg_specs(args_by_name, arg_order)
 
         # Determine return dtype
-        return_types = {impl["return"] for impl in func_data["impls"]}
-        if len(return_types) == 1:
-            ret_type = list(return_types)[0]
-            signature = f"lambda *args: {DTYPE_MAP.get(ret_type, 'None')}"
-        else:
-            # Fallback to Any/None if ambiguous
-            signature = "lambda *args: None"
+        signature = _get_return_signature(func_data["impls"])
 
         ops_list.append(
             {
@@ -171,41 +225,10 @@ def parse_scalar_functions(data, module_name):
         )
 
         # Function args
-        func_args = []
-        for name in arg_order:
-            arg_info = args_by_name[name]
-            types = [PY_TYPE_MAP.get(t, "Any") for t in arg_info["types"]] + [
-                "Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]"
-            ]
-            type_hint = (
-                "Union[" + ", ".join(sorted(set(types))) + "]"
-                if len(types) > 1
-                else types[0]
-            )
-            default = (
-                "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else ""
-            )
-            func_args.append(
-                {
-                    "name": name,
-                    "type_hint": type_hint,
-                    "default": default,
-                }
-            )
-
-        # Clean up default values for mandatory args
-        # In Python, mandatory args come first.
-        for arg in func_args:
-            if not arg.get("default"):
-                arg.pop("default", None)
+        func_args = _get_func_args(args_by_name, arg_order)
 
         # Test args
-        test_args = []
-        for name in arg_order:
-            arg_info = args_by_name[name]
-            some_type = list(arg_info["types"])[0]
-            col_name = YAML_TYPE_TO_COL.get(some_type, "string_col")
-            test_args.append({"col_name": col_name})
+        test_args = _get_test_args(args_by_name, arg_order)
 
         functions_list.append(
             {
@@ -290,7 +313,7 @@ def process_yaml_file(yaml_file, template, test_template):
 def main():
     template, test_template = load_templates()
 
-    for yaml_file in DATA_DIR.glob("**/*.yaml"):
+    for yaml_file in sorted(DATA_DIR.glob("**/*.yaml")):
         process_yaml_file(yaml_file, template, test_template)