From fd2e1cb693e5cd3b858e1e84be45e1b97e830dcc Mon Sep 17 00:00:00 2001
From: "Dmitry Ivanov @ helical-ai.com" <dmitry@helical-ai.com>
Date: Mon, 23 Mar 2026 15:26:18 +0100
Subject: [PATCH 1/2] Transcriptformer config download cache dir (#358)

Closes #348

Here, we parametrise the cache directory of
the Transcriptformer at init.
---
 .../transcriptformer_config.py                | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/helical/models/transcriptformer/transcriptformer_config.py b/helical/models/transcriptformer/transcriptformer_config.py
index bf91d679..7a9acc01 100644
--- a/helical/models/transcriptformer/transcriptformer_config.py
+++ b/helical/models/transcriptformer/transcriptformer_config.py
@@ -1,5 +1,7 @@
 from omegaconf import OmegaConf
 from typing import Literal, List, Union
+from pathlib import Path
+from helical.constants.paths import CACHE_DIR_HELICAL
 
 
 class TranscriptFormerConfig:
@@ -25,7 +27,9 @@ class TranscriptFormerConfig:
         load_checkpoint: str = None
             Path to model weights file (automatically set by inference.py)
         pretrained_embedding: Union[str, List[str]] = None
-            Path or list of paths to pretrained embeddings for out-of-distribution species
+            Path or list of paths to pretrained embeddings for out-of-distribution species. Mutually exclusive with `pretrained_embedding_species`.
+        pretrained_embedding_species: Union[str, List[str]] = None
+            Underscore-separated specie name or list of names to retrieve paths. Example: `pretrained_embedding_species="mus_musculus"` or `pretrained_embedding_species=["mus_musculus", "sus_scrofa"]`. Mutually exclusive with `pretrained_embedding`.
         gene_col_name: str = "ensembl_id"
             Column name in AnnData.var containing gene names which will be mapped to ensembl ids. If index is set, .var_names will be used.
         clip_counts: int = 30
@@ -58,6 +62,7 @@ def __init__(
         output_path: str = "./inference_results",
         load_checkpoint: str = None,
         pretrained_embedding: Union[str, List[str]] = None,
+        pretrained_embedding_species: Union[str, List[str]] = None,
         gene_col_name: str = "index",
         clip_counts: int = 30,
         filter_to_vocabs: bool = True,
@@ -68,6 +73,30 @@ def __init__(
         min_expressed_genes: int = 0,
     ):
 
+        if (
+            pretrained_embedding_species is not None
+            and pretrained_embedding is not None
+        ):
+            raise ValueError(
+                "pretrained_embedding_species and pretrained_embedding are mutually exclusive"
+            )
+
+        if pretrained_embedding_species is not None and pretrained_embedding is None:
+            species_list = (
+                [pretrained_embedding_species]
+                if isinstance(pretrained_embedding_species, str)
+                else pretrained_embedding_species
+            )
+            vocab_base = (
+                Path(CACHE_DIR_HELICAL)
+                / "models/transcriptformer"
+                / model_name
+                / "vocabs"
+            )
+            pretrained_embedding = [
+                str(vocab_base / f"{s}_gene.h5") for s in species_list
+            ]
+
         inference_config: dict = {
             "batch_size": batch_size,
             "output_keys": output_keys,

From fb80a76458e5bb7f12ca06b2749974513c1c5d57 Mon Sep 17 00:00:00 2001
From: Benoit Putzeys <157973952+bputzeys@users.noreply.github.com>
Date: Mon, 23 Mar 2026 14:36:05 +0000
Subject: [PATCH 2/2] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 73e728b1..db8e1b52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "helical"
-version = "1.10.1"
+version = "1.11.0"
 authors = [
   { name="Helical Team", email="support@helical-ai.com" },
 ]