geometric-kernels · vabor112 · Dec 15, 2025 · Nov 14, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/docs/examples/HammingGraph.nblink b/docs/examples/HammingGraph.nblink
@@ -0,0 +1 @@
+{"path": "../../notebooks/HammingGraph.ipynb"}
diff --git a/docs/examples/examples.rst b/docs/examples/examples.rst
@@ -9,6 +9,7 @@ Spaces
    Custom Spaces and Kernels <CustomSpacesAndKernels>
    Graph <Graph>
    GraphEdges <GraphEdges>
+   HammingGraph <HammingGraph>
    Hyperbolic <Hyperbolic>
    HypercubeGraph <HypercubeGraph>
    Hypersphere <Hypersphere>

diff --git a/docs/index.rst b/docs/index.rst
@@ -32,11 +32,13 @@ Before doing anything, you might want to create and activate a new virtual envir
 ..  code-block:: bash
 
    uv venv --python python[version] [venv_dir]
+
 where [env_dir] is the directory (default is `.venv`) of the environment and [version] is the version of Python you want to use, we currently support 3.9, 3.10, 3.11, 3.12.
 
 [Optional] activate the environment. However, this is not strictly necessary for `uv`. Instead, use tools like `uv run python` to run Python inside the environment. See `uv documentation <https://docs.astral.sh/uv/>` for more details.
 
 ..  code-block:: bash
+
    source [venv_dir]/bin/activate
 
 .. raw:: html

diff --git a/docs/references.bib b/docs/references.bib
@@ -152,4 +152,11 @@ @article{sawyer1992
 	title = {The heat equation on the spaces of positive definite matrices},
 	volume = {44},
 	year = {1992},
+}
+
+@inproceedings{doumont2025,
+    title = {Omnipresent Yet Overlooked: Heat Kernels in Combinatorial Bayesian Optimization},
+    author = {Colin Doumont and Victor Picheny and Viacheslav Borovitskiy and Henry Moss},
+    booktitle = {Advances in Neural Information Processing Systems},
+    year = {2025},
 }
diff --git a/docs/theory/hamming_graph.rst b/docs/theory/hamming_graph.rst
@@ -0,0 +1,113 @@
+################################
+  Kernels on the Hamming Graph
+################################
+
+.. warning::
+    You can get by fine without reading this page for almost all use cases, just use the standard :class:`~.kernels.MaternGeometricKernel`, following the respective :doc:`example notebook </examples/HammingGraph>`.
+
+    This is optional material meant to explain the basic theory and based mainly on :cite:t:`doumont2025` and :cite:t:`borovitskiy2023`.
+
+==========================
+Motivation
+==========================
+
+The :class:`~.spaces.HammingGraph` space $H(d,q)$ can be used to model $d$-dimensional *categorical vector* inputs, where each component takes values in $\{0, 1, ..., q-1\}$.
+
+There are many settings where inputs are categorical vectors or can be represented as such. For instance, DNA sequences (with $q=4$ for A, C, G, T), protein sequences (with $q=20$ for the standard amino acids), or error-correcting codes over finite alphabets. When $q=2$, the Hamming graph reduces to the hypercube graph $C^d$ (see :doc:`HypercubeGraph </theory/hypercube_graph>`), which represents binary vectors.
+
+==========================
+Structure of the Space
+==========================
+
+The elements of this space—given its `dim` is $d \in \mathbb{Z}_{>0}$ and `n_cat` is $q \in \mathbb{Z}_{>1}$—are exactly the categorical vectors of length $d$ where each component is in $\{0, 1, ..., q-1\}$.
+
+The geometry of this space is simple: it is a graph such that $x, x' \in H(d,q)$ are connected by an edge if and only if they differ in exactly one coordinate (i.e. *Hamming distance* $1$ between them is exactly $1$).
+
+Being a graph, $H(d,q)$ could also be represented using the general :class:`~.spaces.Graph` space.
+However, the number of nodes in $H(d,q)$ is $q^d$, which is exponential in $d$ (and grows rapidly with $q$), rendering the general graph representation intractable in most cases.
+
+==========================
+Eigenfunctions
+==========================
+
+On graphs, kernels are computed using the eigenfunctions and eigenvalues of the Laplacian.
+
+The eigenfunctions of the Laplacian on the Hamming graph are the *Vilenkin functions* (also known as *generalized Walsh functions*) [#]_. These are characters of the Abelian group $\mathbb{Z}_q^d$ and can be expressed using $q$-th roots of unity.
+
+The Vilenkin functions $\psi_{\mathbf{s}}$ are indexed by frequency vectors $\mathbf{s} = (s_0, \ldots, s_{d-1})$ where each $s_i \in \{0, 1, \ldots, q-1\}$, and take the form
+$$
+\psi_{\mathbf{s}}(x_0, \ldots, x_{d-1}) = \prod_{i=0}^{d-1} \omega_q^{s_i x_i}
+$$
+where $\omega_q = e^{2\pi i/q}$ is a primitive $q$-th root of unity and $x = (x_0, \ldots, x_{d-1}) \in H(d,q)$.
+
+The eigenfunctions with the same number of non-zero frequency components $j = \lvert\{i : s_i \neq 0\}\rvert$ share the same eigenvalue $\lambda_j = \frac{q j}{(q-1)d}$ and form a *level* $j$. The dimension of level $j$ is $\binom{d}{j}(q-1)^j$.
+
+However, the problem is that the number of eigenfunctions is $q^d$.
+Hence naive truncation of the sum in the kernel formula to a few hundred terms leads to a poor approximation of the kernel for larger $d$ or $q$.
+
+**Note:** Direct evaluation of Vilenkin functions is not currently implemented for $q > 2$, but this is not required for kernel computation thanks to the addition theorem below.
+
+==========================
+Addition Theorem
+==========================
+
+Much like for hyperspheres and hypercube graphs, there is an :doc:`addition theorem <addition_theorem>` for the Hamming graph:
+
+$$
+\sum_{\substack{T \subseteq \{0, .., d-1\} \\ \lvert T \rvert = j}} \sum_{\alpha \in \{1,..,q-1\}^T} \psi_{T,\alpha}(x) \overline{\psi_{T,\alpha}(x')}
+=
+\binom{d}{j} (q-1)^j
+\widetilde{K}_{d, q, j}(m)
+$$
+
+where $\psi_{T,\alpha}$ denotes the Vilenkin function indexed by the subset $T$ with frequency values $\alpha = (\alpha_i)_{i \in T}$, $m$ is the Hamming distance between $x$ and $x'$, and $\widetilde{K}_{d, q, j}(m)$ is the generalized Kravchuk polynomial of degree $d$, alphabet size $q$, and order $j$, normalized such that $\widetilde{K}_{d, q, j}(0) = 1$.
+
+Normalized generalized Kravchuk polynomials $\widetilde{K}_{d, q, j}(m)$ satisfy the following three-term recurrence relation:
+$$
+\widetilde{K}_{d, q, j}(m)
+=
+\frac{d(q-1) - qm + q - j(q-1)}{(d - j + 1)(q-1)} \widetilde{K}_{d, q, j-1}(m)
+- \frac{j-1}{d - j + 1} \widetilde{K}_{d, q, j-2}(m)
+$$
+with initial conditions:
+$$
+\widetilde{K}_{d, q, 0}(m) = 1,
+\quad
+\widetilde{K}_{d, q, 1}(m) = 1 - \frac{q}{(q-1)d} m,
+$$
+which allows for their efficient computation without the need to compute large sums of the individual Vilenkin functions.
+
+With that, the kernels on the Hamming graph can be computed efficiently using the formula:
+$$
+k_{\nu, \kappa}(x, x')
+=
+\frac{1}{C_{\nu, \kappa}}
+\sum_{j=0}^{L-1}
+\Phi_{\nu, \kappa}(\lambda_j)
+\binom{d}{j} (q-1)^j \widetilde{K}_{d, q, j}(m)
+\qquad
+\Phi_{\nu, \kappa}(\lambda)
+=
+\begin{cases}
+\left(\frac{2\nu}{\kappa^2} + \lambda\right)^{-\nu-\frac{d}{2}}
+&
+\nu < \infty \text{ — Matérn}
+\\
+e^{-\frac{\kappa^2}{2} \lambda}
+&
+\nu = \infty \text{ — Heat (RBF)}
+\end{cases}
+$$
+where $m$ is the Hamming distance between $x$ and $x'$, and $L \leq d + 1$ is the user-controlled number of levels parameter.
+
+**Notes:**
+
+#. We define the dimension of the :class:`~.spaces.HammingGraph` space $H(d,q)$ to be $d$, in contrast to the graphs represented by the :class:`~.spaces.Graph` space, whose dimension is defined to be $0$.
+
+   Because of this, much like in the Euclidean or the manifold case, the $1/2, 3/2, 5/2$ *are* in fact reasonable values for the smoothness parameter $\nu$.
+
+#. When $q=2$, the Hamming graph $H(d,2)$ is isomorphic to the hypercube graph $C^d$, and the generalized Kravchuk polynomials reduce to the standard Kravchuk polynomials ($q=2$).
+
+.. rubric:: Footnotes
+
+.. [#] Since the Hamming graph $H(d,q)$ is $(q-1)d$-regular, the unnormalized Laplacian and the symmetric normalized Laplacian coincide up to a multiplication by $(q-1)d$. Thus their eigenfunctions are the same and eigenvalues coincide up to this multiplication. For better numerical stability, we use symmetric normalized Laplacian in the implementation and assume its use throughout this page.
diff --git a/docs/theory/index.rst b/docs/theory/index.rst
@@ -14,3 +14,4 @@
    Product kernels <product_kernels>
    Feature maps and sampling <feature_maps>
    Hypercube graph space <hypercube_graph>
+   Hamming graph space <hamming_graph>
diff --git a/geometric_kernels/feature_maps/random_phase.py b/geometric_kernels/feature_maps/random_phase.py
@@ -109,7 +109,7 @@ def __call__(
 
         weights = B.power(spectrum, 0.5)  # [L, 1]
 
-        random_phases_b = B.cast(dtype, from_numpy(X, random_phases))
+        random_phases_b = B.cast(B.dtype(X), from_numpy(X, random_phases))
 
         phi_product = self.eigenfunctions.phi_product(
             X, random_phases_b, **params

diff --git a/geometric_kernels/kernels/__init__.py b/geometric_kernels/kernels/__init__.py
@@ -15,4 +15,7 @@
     MaternGeometricKernel,
     default_feature_map,
 )
+from geometric_kernels.kernels.matern_kernel_hamming_graph import (
+    MaternKernelHammingGraph,
+)
 from geometric_kernels.kernels.product import ProductGeometricKernel
diff --git a/geometric_kernels/kernels/matern_kernel.py b/geometric_kernels/kernels/matern_kernel.py
@@ -21,11 +21,15 @@
 from geometric_kernels.kernels.feature_map import MaternFeatureMapKernel
 from geometric_kernels.kernels.hodge_compositional import MaternHodgeCompositionalKernel
 from geometric_kernels.kernels.karhunen_loeve import MaternKarhunenLoeveKernel
+from geometric_kernels.kernels.matern_kernel_hamming_graph import (
+    MaternKernelHammingGraph,
+)
 from geometric_kernels.spaces import (
     CompactMatrixLieGroup,
     DiscreteSpectrumSpace,
     Graph,
     GraphEdges,
+    HammingGraph,
     HodgeDiscreteSpectrumSpace,
     Hyperbolic,
     HypercubeGraph,
@@ -76,7 +80,7 @@ def default_feature_map(
 
 @overload
 def feature_map_from_kernel(kernel: MaternKarhunenLoeveKernel):
-    if isinstance(kernel.space, CompactMatrixLieGroup):
+    if isinstance(kernel.space, (CompactMatrixLieGroup, HammingGraph)):
         # Because `CompactMatrixLieGroup` does not currently support explicit
         # eigenfunction computation (they only support addition theorem).
         return RandomPhaseFeatureMapCompact(
@@ -136,7 +140,7 @@ def feature_map_from_kernel(kernel: BaseGeometricKernel):
 
 @overload
 def feature_map_from_space(space: DiscreteSpectrumSpace, num: int):
-    if isinstance(space, CompactMatrixLieGroup):
+    if isinstance(space, (CompactMatrixLieGroup, HammingGraph)):
         return RandomPhaseFeatureMapCompact(
             space, num, MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES
         )
@@ -212,7 +216,7 @@ def default_num(space: DiscreteSpectrumSpace) -> int:
         )
     elif isinstance(space, GraphEdges):
         return min(MaternGeometricKernel._DEFAULT_NUM_EIGENFUNCTIONS, space.num_edges)
-    elif isinstance(space, HypercubeGraph):
+    elif isinstance(space, (HypercubeGraph, HammingGraph)):
         return min(MaternGeometricKernel._DEFAULT_NUM_LEVELS, space.dim + 1)
     else:
         return MaternGeometricKernel._DEFAULT_NUM_LEVELS
@@ -289,6 +293,7 @@ def __new__(
 
         :param space:
             Space to construct the kernel on.
+
         :param num:
             If provided, controls the "order of approximation" of the kernel.
             For the discrete spectrum spaces, this means the number of "levels"
@@ -302,6 +307,7 @@ def __new__(
 
             If num=None, we use a (hopefully) reasonable default, which is
             space-dependent.
+
         :param normalize:
             Normalize the kernel (and the feature map). If normalize=True,
             then either $k(x, x) = 1$ for all $x \in X$, where $X$ is the
@@ -339,6 +345,8 @@ def __new__(
             num = num or default_num(space)
             if isinstance(space, HodgeDiscreteSpectrumSpace):
                 kernel = MaternHodgeCompositionalKernel(space, num, normalize=normalize)
+            elif isinstance(space, (HypercubeGraph, HammingGraph)):
+                kernel = MaternKernelHammingGraph(space, num, normalize=normalize)
             else:
                 kernel = MaternKarhunenLoeveKernel(space, num, normalize=normalize)
             if return_feature_map:

diff --git a/geometric_kernels/kernels/matern_kernel_hamming_graph.py b/geometric_kernels/kernels/matern_kernel_hamming_graph.py
@@ -0,0 +1,96 @@
+r"""
+This module provides the :class:`MaternKernelHammingGraph` kernel, a subclass of
+:class:`MaternKarhunenLoeveKernel` for :class:`HammingGraph` and
+:class:`HypercubeGraph` spaces implementing the closed-form formula for the
+heat kernel when $\nu = \infty$.
+"""
+
+import lab as B
+import numpy as np
+from beartype.typing import Dict, Optional, Union
+
+from geometric_kernels.kernels.karhunen_loeve import MaternKarhunenLoeveKernel
+from geometric_kernels.spaces.eigenfunctions import Eigenfunctions
+from geometric_kernels.spaces.hamming_graph import HammingGraph
+from geometric_kernels.spaces.hypercube_graph import HypercubeGraph
+from geometric_kernels.utils.kernel_formulas.hamming_graph import (
+    hamming_graph_heat_kernel,
+)
+from geometric_kernels.utils.utils import _check_1_vector, _check_field_in_params
+
+
+class MaternKernelHammingGraph(MaternKarhunenLoeveKernel):
+    r"""
+    For $\nu = \infty$, there exists a closed-form formula for the heat kernel
+    on hamming graphs :class:`HammingGraph` (including the binary hypercube case
+    :class:`HypercubeGraph`). This class extends :class:`MaternKarhunenLoeveKernel`
+    to implement this formula in the case of $\nu = \infty$ for efficiency.
+
+    .. note::
+        We only use the closed form expression if `num_levels` is `d + 1` which
+        corresponds to exact computation. When truncated to fewer levels, we
+        must use the parent class implementation to ensure consistency with
+        feature map approximations.
+    """
+
+    def __init__(
+        self,
+        space: Union[HammingGraph, HypercubeGraph],
+        num_levels: int,
+        normalize: bool = True,
+        eigenvalues_laplacian: Optional[B.Numeric] = None,
+        eigenfunctions: Optional[Eigenfunctions] = None,
+    ):
+        if not isinstance(space, (HammingGraph, HypercubeGraph)):
+            raise ValueError(
+                f"`space` must be an instance of HammingGraph or HypercubeGraph, but got {type(space)}"
+            )
+
+        super().__init__(
+            space,
+            num_levels,
+            normalize,
+            eigenvalues_laplacian,
+            eigenfunctions,
+        )
+
+    def K(
+        self,
+        params: Dict[str, B.Numeric],
+        X: B.Numeric,
+        X2: Optional[B.Numeric] = None,
+        **kwargs,
+    ) -> B.Numeric:
+        _check_field_in_params(params, "lengthscale")
+        _check_1_vector(params["lengthscale"], 'params["lengthscale"]')
+
+        _check_field_in_params(params, "nu")
+        _check_1_vector(params["nu"], 'params["nu"]')
+
+        if B.all(params["nu"] == np.inf):
+            d = X.shape[-1]
+
+            # Only use fast path when we have all levels (exact computation)
+            if self.num_levels == d + 1:
+                # Get q from space (HammingGraph has n_cat, HypercubeGraph is binary q=2)
+                q = getattr(self.space, "n_cat", 2)
+
+                return hamming_graph_heat_kernel(params["lengthscale"], X, X2, q=q)
+
+        return super().K(params, X, X2, **kwargs)
+
+    def K_diag(self, params: Dict[str, B.Numeric], X: B.Numeric, **kwargs) -> B.Numeric:
+        _check_field_in_params(params, "lengthscale")
+        _check_1_vector(params["lengthscale"], 'params["lengthscale"]')
+
+        _check_field_in_params(params, "nu")
+        _check_1_vector(params["nu"], 'params["nu"]')
+
+        if B.all(params["nu"] == np.inf):
+            d = X.shape[-1]
+
+            # Only use fast path when we have all levels (exact computation)
+            if self.num_levels == d + 1:
+                return B.ones(B.dtype(params["nu"]), X.shape[0])
+
+        return super().K_diag(params, X, **kwargs)
diff --git a/geometric_kernels/spaces/__init__.py b/geometric_kernels/spaces/__init__.py
@@ -12,6 +12,7 @@
 from geometric_kernels.spaces.circle import Circle
 from geometric_kernels.spaces.graph import Graph
 from geometric_kernels.spaces.graph_edges import GraphEdges
+from geometric_kernels.spaces.hamming_graph import HammingGraph
 from geometric_kernels.spaces.hyperbolic import Hyperbolic
 from geometric_kernels.spaces.hypercube_graph import HypercubeGraph
 from geometric_kernels.spaces.hypersphere import Hypersphere