DsysDML · nbereux · Mar 4, 2026 · Oct 17, 2025 · Oct 17, 2025 · Nov 4, 2025
diff --git a/.github/workflows/codecov.yaml b/.github/workflows/codecov.yaml
@@ -15,7 +15,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: 3.14
+          python-version: 3.13
 
       - name: Install test dependencies
         run: pip install pytest pytest-cov

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.12, 3.13, 3.14]
+        python-version: [3.12, 3.13]
     steps:
       - name: Checkout
         uses: actions/checkout@v4

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "rbms"
-version = "0.5.0"
+version = "0.6.0"
 authors = [
   {name="Nicolas Béreux", email="nicolas.bereux@gmail.com"},
   {name="Aurélien Decelle"},
@@ -19,12 +19,12 @@ maintainers = [
 ]
 description = "Training and analyzing Restricted Boltzmann Machines in PyTorch"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.12, <3.14"
 dependencies = [
-  "h5py>=3.12.0",
+  "h5py>=3.14.0",
   "numpy>=2.0.0",
   "matplotlib>=3.8.0",
-  "torch>=2.5.0",
+  "torch>=2.10.0",
   "tqdm>=4.65.0",
 ]
 
@@ -85,4 +85,4 @@ docstring-code-format = false
 [dependency-groups]
 dev = [
     "pytest>=8.4.1",
-]
+]
diff --git a/rbms/__init__.py b/rbms/__init__.py
@@ -0,0 +1,42 @@
+from rbms.bernoulli_bernoulli.classes import BBRBM
+from rbms.bernoulli_gaussian.classes import BGRBM
+from rbms.dataset import load_dataset
+from rbms.dataset.utils import convert_data
+from rbms.io import load_model, load_params
+from rbms.ising_ising.classes import IIRBM
+from rbms.map_model import map_model
+from rbms.plot import plot_image, plot_mult_PCA
+from rbms.potts_bernoulli.classes import PBRBM
+from rbms.utils import (
+    bernoulli_to_ising,
+    compute_log_likelihood,
+    get_categorical_configurations,
+    get_eigenvalues_history,
+    get_flagged_updates,
+    get_saved_updates,
+    ising_to_bernoulli,
+)
+
+__all__ = [
+    BBRBM,
+    BGRBM,
+    IIRBM,
+    PBRBM,
+    map_model,
+    bernoulli_to_ising,
+    ising_to_bernoulli,
+    compute_log_likelihood,
+    get_eigenvalues_history,
+    get_saved_updates,
+    get_flagged_updates,
+    get_categorical_configurations,
+    plot_mult_PCA,
+    plot_image,
+    load_params,
+    load_model,
+    load_dataset,
+    convert_data,
+]
+
+
+__version__ = "0.5.1"
diff --git a/rbms/bernoulli_bernoulli/__init__.py b/rbms/bernoulli_bernoulli/__init__.py
@@ -1,3 +1,12 @@
 # ruff: noqa
 from rbms.bernoulli_bernoulli.classes import BBRBM
-from rbms.bernoulli_bernoulli.functional import *
+from rbms.bernoulli_bernoulli.functional import (
+    compute_energy,
+    compute_energy_hiddens,
+    compute_energy_visibles,
+    compute_gradient,
+    init_chains,
+    init_parameters,
+    sample_hiddens,
+    sample_visibles,
+)
diff --git a/rbms/bernoulli_bernoulli/classes.py b/rbms/bernoulli_bernoulli/classes.py
@@ -1,4 +1,4 @@
-from typing import Self
+from __future__ import annotations
 
 import numpy as np
 import torch
@@ -15,17 +15,20 @@
     _sample_visibles,
 )
 from rbms.classes import RBM
+from rbms.custom_fn import check_keys_dict
 
 
 class BBRBM(RBM):
     """Parameters of the Bernoulli-Bernoulli RBM"""
 
+    visible_type: str = "bernoulli"
+
     def __init__(
         self,
         weight_matrix: Tensor,
         vbias: Tensor,
         hbias: Tensor,
-        device: torch.device | None = None,
+        device: torch.device | str | None = None,
         dtype: torch.dtype | None = None,
     ):
         """Initialize the parameters of the Bernoulli-Bernoulli RBM.
@@ -49,6 +52,7 @@ def __init__(
         self.vbias = vbias.to(device=self.device, dtype=self.dtype)
         self.hbias = hbias.to(device=self.device, dtype=self.dtype)
         self.name = "BBRBM"
+        self.flags = []
 
     def __add__(self, other):
         return BBRBM(
@@ -64,7 +68,9 @@ def __mul__(self, other):
             hbias=self.hbias * other,
         )
 
-    def clone(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+    def clone(
+        self, device: torch.device | str | None = None, dtype: torch.dtype | None = None
+    ):
         if device is None:
             device = self.device
         if dtype is None:
@@ -102,7 +108,7 @@ def compute_energy_visibles(self, v: Tensor) -> Tensor:
             weight_matrix=self.weight_matrix,
         )
 
-    def compute_gradient(self, data, chains, centered=True, lambda_l1=0.0, lambda_l2=0.0):
+    def compute_gradient(self, data, chains, centered=True):
         _compute_gradient(
             v_data=data["visible"],
             mh_data=data["hidden_mag"],
@@ -114,8 +120,6 @@ def compute_gradient(self, data, chains, centered=True, lambda_l1=0.0, lambda_l2
             hbias=self.hbias,
             weight_matrix=self.weight_matrix,
             centered=centered,
-            lambda_l1=lambda_l1,
-            lambda_l2=lambda_l2,
         )
 
     def independent_model(self):
@@ -159,25 +163,28 @@ def init_parameters(num_hiddens, dataset, device, dtype, var_init=0.0001):
         )
         return BBRBM(weight_matrix=weight_matrix, vbias=vbias, hbias=hbias)
 
-    def named_parameters(self):
+    def named_parameters(self) -> dict[str, np.ndarray]:
         return {
-            "weight_matrix": self.weight_matrix,
-            "vbias": self.vbias,
-            "hbias": self.hbias,
+            "weight_matrix": self.weight_matrix.cpu().numpy(),
+            "vbias": self.vbias.cpu().numpy(),
+            "hbias": self.hbias.cpu().numpy(),
         }
 
+    @property
     def num_hiddens(self):
         return self.hbias.shape[0]
 
+    @property
     def num_visibles(self):
         return self.vbias.shape[0]
 
     def parameters(self) -> list[Tensor]:
         return [self.weight_matrix, self.vbias, self.hbias]
 
+    @property
     def ref_log_z(self):
         return (
-            torch.log1p(torch.exp(self.vbias)).sum() + self.num_hiddens() * np.log(2)
+            torch.log1p(torch.exp(self.vbias)).sum() + self.num_hiddens * np.log(2)
         ).item()
 
     def sample_hiddens(self, chains: dict[str, Tensor], beta=1) -> dict[str, Tensor]:
@@ -199,25 +206,33 @@ def sample_visibles(self, chains: dict[str, Tensor], beta=1) -> dict[str, Tensor
         return chains
 
     @staticmethod
-    def set_named_parameters(named_params: dict[str, Tensor]) -> Self:
+    def set_named_parameters(
+        named_params: dict[str, np.ndarray],
+        device: torch.device | str,
+        dtype: torch.dtype,
+    ) -> BBRBM:
         names = ["vbias", "hbias", "weight_matrix"]
-        for k in names:
-            if k not in named_params.keys():
-                raise ValueError(
-                    f"""Dictionary params missing key '{k}'\n Provided keys : {named_params.keys()}\n Expected keys: {names}"""
-                )
+        check_keys_dict(d=named_params, names=names)
         params = BBRBM(
-            weight_matrix=named_params.pop("weight_matrix"),
-            vbias=named_params.pop("vbias"),
-            hbias=named_params.pop("hbias"),
+            weight_matrix=torch.from_numpy(named_params.pop("weight_matrix")).to(
+                device=device, dtype=dtype
+            ),
+            vbias=torch.from_numpy(named_params.pop("vbias")).to(
+                device=device, dtype=dtype
+            ),
+            hbias=torch.from_numpy(named_params.pop("hbias")).to(
+                device=device, dtype=dtype
+            ),
         )
         if len(named_params.keys()) > 0:
             raise ValueError(
                 f"Too many keys in params dictionary. Remaining keys: {named_params.keys()}"
             )
         return params
 
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+    def to(
+        self, device: torch.device | str | None = None, dtype: torch.dtype | None = None
+    ):
         if device is not None:
             self.device = device
         if dtype is not None:
@@ -226,3 +241,12 @@ def to(self, device: torch.device | None = None, dtype: torch.dtype | None = Non
         self.vbias = self.vbias.to(device=self.device, dtype=self.dtype)
         self.hbias = self.hbias.to(device=self.device, dtype=self.dtype)
         return self
+
+    def get_metrics(self, metrics):
+        return metrics
+
+    def post_grad_update(self):
+        pass
+
+    def pre_grad_update(self):
+        pass
diff --git a/rbms/bernoulli_bernoulli/functional.py b/rbms/bernoulli_bernoulli/functional.py
@@ -116,8 +116,6 @@ def compute_gradient(
     chains: dict[str, Tensor],
     params: BBRBM,
     centered: bool = True,
-    lambda_l1: float = 0.0,
-    lambda_l2: float = 0.0,
 ) -> None:
     """Compute the gradient for each of the parameters and attach it.
 
@@ -140,8 +138,6 @@ def compute_gradient(
         hbias=params.hbias,
         weight_matrix=params.weight_matrix,
         centered=centered,
-        lambda_l1=lambda_l1,
-        lambda_l2=lambda_l2,
     )
 
 

diff --git a/rbms/bernoulli_bernoulli/implement.py b/rbms/bernoulli_bernoulli/implement.py
@@ -1,6 +1,5 @@
 import torch
 from torch import Tensor
-from torch.nn.functional import softmax
 
 
 @torch.jit.script
@@ -59,7 +58,7 @@ def _compute_energy_hiddens(
     return -field - log_term.sum(1)
 
 
-@torch.jit.script
+# @torch.jit.script
 def _compute_gradient(
     v_data: Tensor,
     mh_data: Tensor,
@@ -71,13 +70,11 @@ def _compute_gradient(
     hbias: Tensor,
     weight_matrix: Tensor,
     centered: bool = True,
-    lambda_l1: float = 0.0,
-    lambda_l2: float = 0.0,
 ) -> None:
     w_data = w_data.view(-1, 1)
     w_chain = w_chain.view(-1, 1)
     # Turn the weights of the chains into normalized weights
-    chain_weights = softmax(-w_chain, dim=0)
+    chain_weights = w_chain / w_chain.sum()
     w_data_norm = w_data.sum()
 
     # Averages over data and generated samples
@@ -102,33 +99,18 @@ def _compute_gradient(
         grad_vbias = v_data_mean - v_gen_mean - (grad_weight_matrix @ h_data_mean)
         grad_hbias = h_data_mean - h_gen_mean - (v_data_mean @ grad_weight_matrix)
     else:
-        v_data_centered = v_data
-        h_data_centered = mh_data
-        v_gen_centered = v_chain
-        h_gen_centered = h_chain
-
         # Gradient
         grad_weight_matrix = ((v_data * w_data).T @ mh_data) / w_data_norm - (
             (v_chain * chain_weights).T @ h_chain
         )
         grad_vbias = v_data_mean - v_gen_mean
         grad_hbias = h_data_mean - h_gen_mean
 
-    if lambda_l1 > 0:
-        grad_weight_matrix -= lambda_l1 * torch.sign(weight_matrix)
-        grad_vbias -= lambda_l1 * torch.sign(vbias)
-        grad_hbias -= lambda_l1 * torch.sign(hbias)
-
-    if lambda_l2 > 0:
-        grad_weight_matrix -= 2 * lambda_l2 * weight_matrix
-        grad_vbias -= 2 * lambda_l2 * vbias
-        grad_hbias -= 2 * lambda_l2 * hbias
-
     # Attach to the parameters
 
-    weight_matrix.grad.set_(grad_weight_matrix)
-    vbias.grad.set_(grad_vbias)
-    hbias.grad.set_(grad_hbias)
+    weight_matrix.grad = grad_weight_matrix
+    vbias.grad = grad_vbias
+    hbias.grad = grad_hbias
 
 
 @torch.jit.script

diff --git a/rbms/bernoulli_gaussian/__init__.py b/rbms/bernoulli_gaussian/__init__.py
@@ -0,0 +1,12 @@
+# ruff: noqa
+from rbms.bernoulli_gaussian.classes import BGRBM
+from rbms.bernoulli_gaussian.functional import (
+    compute_energy,
+    compute_energy_hiddens,
+    compute_energy_visibles,
+    compute_gradient,
+    init_chains,
+    init_parameters,
+    sample_hiddens,
+    sample_visibles,
+)