From 2f9747156885cfb26cb8e407ab0141b28518a104 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Mon, 11 May 2026 11:49:01 -0400 Subject: [PATCH 1/3] cache models and uv packages --- .github/workflows/test-hf-models.yml | 14 ++- opt/package/test_hf_org.py | 132 ++++++++++++++++----------- opt/package/test_inference.py | 9 +- 3 files changed, 92 insertions(+), 63 deletions(-) diff --git a/.github/workflows/test-hf-models.yml b/.github/workflows/test-hf-models.yml index 6c777cb7..5f59a7c9 100644 --- a/.github/workflows/test-hf-models.yml +++ b/.github/workflows/test-hf-models.yml @@ -20,6 +20,13 @@ on: jobs: test-models: runs-on: ubuntu-latest + env: + HF_HOME: ${{ runner.temp }}/huggingface + HF_HUB_CACHE: ${{ runner.temp }}/huggingface/hub + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_DISABLE_XET: "1" + HF_MODULES_CACHE: ${{ runner.temp }}/huggingface/modules + WANDB_MODE: disabled strategy: fail-fast: false matrix: @@ -48,11 +55,12 @@ jobs: - name: Install project dependencies run: uv sync --all-extras --dev + - name: Free package download cache + run: uv cache clean + - name: Run tests - env: - WANDB_MODE: disabled run: | - uv run pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ + uv run --no-sync pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ -v \ --log-cli-level=INFO \ --durations=10 \ diff --git a/opt/package/test_hf_org.py b/opt/package/test_hf_org.py index 575ad4bb..9b31ffca 100644 --- a/opt/package/test_hf_org.py +++ b/opt/package/test_hf_org.py @@ -1,11 +1,16 @@ """Tests for models in the mist-models HuggingFace organization.""" +from contextlib import contextmanager +import gc import logging import os from pathlib import Path +import tempfile +from typing import Iterator + import pytest from huggingface_hub import HfApi -from transformers import AutoModel +from transformers import AutoConfig, AutoModel from .test_inference import ( single_molecule_smiles, @@ -19,6 +24,37 @@ logger = logging.getLogger(__name__) +@contextmanager +def loaded_hf_model(model_id: str, hf_token: str | None) -> Iterator[object]: + """Load one HF model in an isolated cache that is removed after the check.""" + with tempfile.TemporaryDirectory( + prefix="hf-model-cache-", dir=os.getenv("RUNNER_TEMP") or None + ) as cache_dir: + model = AutoModel.from_pretrained( + model_id, + trust_remote_code=True, + token=hf_token, + cache_dir=cache_dir, + ) + try: + yield model + finally: + del model + gc.collect() + + +def load_hf_config(model_id: str, hf_token: str | None): + with tempfile.TemporaryDirectory( + prefix="hf-config-cache-", dir=os.getenv("RUNNER_TEMP") or None + ) as cache_dir: + return AutoConfig.from_pretrained( + model_id, + trust_remote_code=True, + token=hf_token, + cache_dir=cache_dir, + ) + + @pytest.fixture def hf_token(): # Not testing private models for now @@ -49,27 +85,24 @@ def test_predict_single_molecules( for model_id in single_models: logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - - if "RobertaPreLayerNormModel" in type(model).__name__: - logger.info("Skipping encoder-only model") - continue - - predictions = model.predict(single_molecule_smiles) - assert predictions is not None - - if isinstance(predictions, dict): - assert len(predictions) > 0 - for task_name, task_data in predictions.items(): - if isinstance(task_data, dict) and "value" in task_data: - values = task_data["value"] - assert len(values) == len(single_molecule_smiles) - validate_predictions(values, name=f"{model_id}:{task_name}") - else: - assert len(predictions) == len(single_molecule_smiles) - validate_predictions(predictions, name=model_id) + with loaded_hf_model(model_id, hf_token) as model: + if "RobertaPreLayerNormModel" in type(model).__name__: + logger.info("Skipping encoder-only model") + continue + + predictions = model.predict(single_molecule_smiles) + assert predictions is not None + + if isinstance(predictions, dict): + assert len(predictions) > 0 + for task_name, task_data in predictions.items(): + if isinstance(task_data, dict) and "value" in task_data: + values = task_data["value"] + assert len(values) == len(single_molecule_smiles) + validate_predictions(values, name=f"{model_id}:{task_name}") + else: + assert len(predictions) == len(single_molecule_smiles) + validate_predictions(predictions, name=model_id) class TestHFOrgConductivityModels: @@ -81,18 +114,15 @@ def test_predict_mixtures(self, hf_org_models, hf_token, conductivity_test_data) ] for model_id in cond_models: logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - - predictions = model.predict(conductivity_test_data) - assert predictions is not None + with loaded_hf_model(model_id, hf_token) as model: + predictions = model.predict(conductivity_test_data) + assert predictions is not None - if isinstance(predictions, dict): - for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") - else: - validate_predictions(predictions, name=model_id) + if isinstance(predictions, dict): + for key, value in predictions.items(): + validate_predictions(value, name=f"{model_id}:{key}") + else: + validate_predictions(predictions, name=model_id) class TestHFOrgExcessPhysicsModels: @@ -104,32 +134,26 @@ def test_predict_binary_mixture(self, hf_org_models, hf_token, excess_test_data) for model_id in excess_models: logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) + with loaded_hf_model(model_id, hf_token) as model: + predictions = model.predict( + smiles_list=test_case["smiles_list"], + composition=test_case["composition"], + temperature=test_case["temperature"], + ) - predictions = model.predict( - smiles_list=test_case["smiles_list"], - composition=test_case["composition"], - temperature=test_case["temperature"], - ) + assert predictions is not None - assert predictions is not None - - if isinstance(predictions, dict): - for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") + if isinstance(predictions, dict): + for key, value in predictions.items(): + validate_predictions(value, name=f"{model_id}:{key}") class TestHFOrgModelIntegrity: def test_all_models_config(self, hf_org_models, hf_token): for model_id in hf_org_models: logger.info(f"Checking config for {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - assert hasattr(model, "config") - assert model.config is not None + config = load_hf_config(model_id, hf_token) + assert config is not None def test_models_required_files(self, hf_org_models, hf_token): api = HfApi(token=hf_token) @@ -149,10 +173,8 @@ def test_models_required_files(self, hf_org_models, hf_token): def test_multi_channel_model_labels(self, hf_org_models, hf_token): for model_id in hf_org_models: logger.info(f"Checking channels for {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - check_multi_channel_labels(model, model_id) + config = load_hf_config(model_id, hf_token) + check_multi_channel_labels(config, model_id) if __name__ == "__main__": diff --git a/opt/package/test_inference.py b/opt/package/test_inference.py index f4e73072..3e5901da 100644 --- a/opt/package/test_inference.py +++ b/opt/package/test_inference.py @@ -115,15 +115,14 @@ def get_model_type_from_path(path: Path) -> str: return "single" -def check_multi_channel_labels(model, model_name: str): +def check_multi_channel_labels(model_or_config, model_name: str): """Verify that multi-output models have channel labels.""" - if "RobertaPreLayerNormModel" in type(model).__name__: + type_name = type(model_or_config).__name__ + if "RobertaPreLayerNorm" in type_name: return - if not hasattr(model, "config"): - return + config = getattr(model_or_config, "config", model_or_config) - config = model.config if not hasattr(config, "task_network"): return From 2224a11cfb0af0bbf379d2dc78021bd0fd67d5f1 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Mon, 11 May 2026 11:54:08 -0400 Subject: [PATCH 2/3] move HF cahce env vars to later step --- .github/workflows/test-hf-models.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-hf-models.yml b/.github/workflows/test-hf-models.yml index 5f59a7c9..407427d1 100644 --- a/.github/workflows/test-hf-models.yml +++ b/.github/workflows/test-hf-models.yml @@ -21,11 +21,6 @@ jobs: test-models: runs-on: ubuntu-latest env: - HF_HOME: ${{ runner.temp }}/huggingface - HF_HUB_CACHE: ${{ runner.temp }}/huggingface/hub - HF_HUB_DISABLE_PROGRESS_BARS: "1" - HF_HUB_DISABLE_XET: "1" - HF_MODULES_CACHE: ${{ runner.temp }}/huggingface/modules WANDB_MODE: disabled strategy: fail-fast: false @@ -59,6 +54,12 @@ jobs: run: uv cache clean - name: Run tests + env: + HF_HOME: ${{ runner.temp }}/huggingface + HF_HUB_CACHE: ${{ runner.temp }}/huggingface/hub + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_DISABLE_XET: "1" + HF_MODULES_CACHE: ${{ runner.temp }}/huggingface/modules run: | uv run --no-sync pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ -v \ From 902c47074bf86b3ee1ddae5f25ec3e47f3f7b259 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Mon, 11 May 2026 16:02:08 -0400 Subject: [PATCH 3/3] seperate each model check into a test --- .github/workflows/test-hf-models.yml | 3 +- opt/package/test_hf_org.py | 221 ++++++++++++++++----------- 2 files changed, 129 insertions(+), 95 deletions(-) diff --git a/.github/workflows/test-hf-models.yml b/.github/workflows/test-hf-models.yml index 407427d1..2752aae1 100644 --- a/.github/workflows/test-hf-models.yml +++ b/.github/workflows/test-hf-models.yml @@ -64,8 +64,7 @@ jobs: uv run --no-sync pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ -v \ --log-cli-level=INFO \ - --durations=10 \ - --maxfail=5 + --durations=10 - name: Upload test results if: always() diff --git a/opt/package/test_hf_org.py b/opt/package/test_hf_org.py index 9b31ffca..9701f62c 100644 --- a/opt/package/test_hf_org.py +++ b/opt/package/test_hf_org.py @@ -1,6 +1,7 @@ """Tests for models in the mist-models HuggingFace organization.""" from contextlib import contextmanager +from functools import lru_cache import gc import logging import os @@ -22,6 +23,7 @@ ) logger = logging.getLogger(__name__) +HF_ORG = "mist-models" @contextmanager @@ -55,6 +57,69 @@ def load_hf_config(model_id: str, hf_token: str | None): ) +@lru_cache +def list_hf_org_model_ids(hf_token: str | None) -> tuple[str, ...]: + api = HfApi(token=hf_token) + model_ids = tuple(m.id for m in api.list_models(author=HF_ORG)) + logger.info("Found %d models in %s organization", len(model_ids), HF_ORG) + return model_ids + + +def parametrize_model_ids(metafunc, fixture_name: str, model_ids: tuple[str, ...]): + if model_ids: + metafunc.parametrize( + fixture_name, model_ids, ids=lambda m: m.rsplit("/", 1)[-1] + ) + return + + metafunc.parametrize( + fixture_name, + [ + pytest.param( + None, + marks=pytest.mark.skip(reason=f"No {fixture_name} models found"), + ) + ], + ids=["no-models"], + ) + + +def pytest_generate_tests(metafunc): + model_ids = list_hf_org_model_ids(os.getenv("HF_TOKEN")) + + if "hf_model_id" in metafunc.fixturenames: + parametrize_model_ids(metafunc, "hf_model_id", model_ids) + + if "single_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "single_model_id", + tuple( + m for m in model_ids if get_model_type_from_path(Path(m)) == "single" + ), + ) + + if "conductivity_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "conductivity_model_id", + tuple( + m + for m in model_ids + if get_model_type_from_path(Path(m)) == "conductivity" + ), + ) + + if "excess_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "excess_model_id", + tuple( + m for m in model_ids if get_model_type_from_path(Path(m)) == "excess" + ), + ) + + @pytest.fixture def hf_token(): # Not testing private models for now @@ -62,119 +127,89 @@ def hf_token(): return os.getenv("HF_TOKEN") -@pytest.fixture -def hf_org_models(hf_token): - api = HfApi(token=hf_token) - models = list(api.list_models(author="mist-models")) - - if not models: - pytest.skip("No models found in mist-models organization") - - model_ids = [m.id for m in models] - logger.info(f"Found {len(model_ids)} models in mist-models organization") - return model_ids - - class TestHFOrgSingleMoleculeModels: def test_predict_single_molecules( - self, hf_org_models, hf_token, single_molecule_smiles + self, single_model_id, hf_token, single_molecule_smiles ): - single_models = [ - m for m in hf_org_models if get_model_type_from_path(Path(m)) == "single" - ] - - for model_id in single_models: - logger.info(f"Testing {model_id}") - with loaded_hf_model(model_id, hf_token) as model: - if "RobertaPreLayerNormModel" in type(model).__name__: - logger.info("Skipping encoder-only model") - continue - - predictions = model.predict(single_molecule_smiles) - assert predictions is not None - - if isinstance(predictions, dict): - assert len(predictions) > 0 - for task_name, task_data in predictions.items(): - if isinstance(task_data, dict) and "value" in task_data: - values = task_data["value"] - assert len(values) == len(single_molecule_smiles) - validate_predictions(values, name=f"{model_id}:{task_name}") - else: - assert len(predictions) == len(single_molecule_smiles) - validate_predictions(predictions, name=model_id) + logger.info(f"Testing {single_model_id}") + with loaded_hf_model(single_model_id, hf_token) as model: + if "RobertaPreLayerNormModel" in type(model).__name__: + pytest.skip("Skipping encoder-only model") + + predictions = model.predict(single_molecule_smiles) + assert predictions is not None + + if isinstance(predictions, dict): + assert len(predictions) > 0 + for task_name, task_data in predictions.items(): + if isinstance(task_data, dict) and "value" in task_data: + values = task_data["value"] + assert len(values) == len(single_molecule_smiles) + validate_predictions( + values, name=f"{single_model_id}:{task_name}" + ) + else: + assert len(predictions) == len(single_molecule_smiles) + validate_predictions(predictions, name=single_model_id) class TestHFOrgConductivityModels: - def test_predict_mixtures(self, hf_org_models, hf_token, conductivity_test_data): - cond_models = [ - m - for m in hf_org_models - if get_model_type_from_path(Path(m)) == "conductivity" - ] - for model_id in cond_models: - logger.info(f"Testing {model_id}") - with loaded_hf_model(model_id, hf_token) as model: - predictions = model.predict(conductivity_test_data) - assert predictions is not None - - if isinstance(predictions, dict): - for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") - else: - validate_predictions(predictions, name=model_id) + def test_predict_mixtures( + self, conductivity_model_id, hf_token, conductivity_test_data + ): + logger.info(f"Testing {conductivity_model_id}") + with loaded_hf_model(conductivity_model_id, hf_token) as model: + predictions = model.predict(conductivity_test_data) + assert predictions is not None + + if isinstance(predictions, dict): + for key, value in predictions.items(): + validate_predictions(value, name=f"{conductivity_model_id}:{key}") + else: + validate_predictions(predictions, name=conductivity_model_id) class TestHFOrgExcessPhysicsModels: - def test_predict_binary_mixture(self, hf_org_models, hf_token, excess_test_data): - excess_models = [ - m for m in hf_org_models if get_model_type_from_path(Path(m)) == "excess" - ] + def test_predict_binary_mixture(self, excess_model_id, hf_token, excess_test_data): test_case = excess_test_data[0] + logger.info(f"Testing {excess_model_id}") + with loaded_hf_model(excess_model_id, hf_token) as model: + predictions = model.predict( + smiles_list=test_case["smiles_list"], + composition=test_case["composition"], + temperature=test_case["temperature"], + ) - for model_id in excess_models: - logger.info(f"Testing {model_id}") - with loaded_hf_model(model_id, hf_token) as model: - predictions = model.predict( - smiles_list=test_case["smiles_list"], - composition=test_case["composition"], - temperature=test_case["temperature"], - ) - - assert predictions is not None + assert predictions is not None - if isinstance(predictions, dict): - for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") + if isinstance(predictions, dict): + for key, value in predictions.items(): + validate_predictions(value, name=f"{excess_model_id}:{key}") class TestHFOrgModelIntegrity: - def test_all_models_config(self, hf_org_models, hf_token): - for model_id in hf_org_models: - logger.info(f"Checking config for {model_id}") - config = load_hf_config(model_id, hf_token) - assert config is not None + def test_all_models_config(self, hf_model_id, hf_token): + logger.info(f"Checking config for {hf_model_id}") + config = load_hf_config(hf_model_id, hf_token) + assert config is not None - def test_models_required_files(self, hf_org_models, hf_token): + def test_models_required_files(self, hf_model_id, hf_token): api = HfApi(token=hf_token) + model_info = api.model_info(hf_model_id) + siblings = {f.rfilename for f in model_info.siblings} - for model_id in hf_org_models: - model_info = api.model_info(model_id) - siblings = {f.rfilename for f in model_info.siblings} + assert "config.json" in siblings, f"{hf_model_id} missing config.json" + assert "README.md" in siblings, f"{hf_model_id} missing README.md" - assert "config.json" in siblings, f"{model_id} missing config.json" - assert "README.md" in siblings, f"{model_id} missing README.md" - - has_weights = any( - "safetensors" in f or "pytorch_model.bin" in f for f in siblings - ) - assert has_weights, f"{model_id} missing model weights" + has_weights = any( + "safetensors" in f or "pytorch_model.bin" in f for f in siblings + ) + assert has_weights, f"{hf_model_id} missing model weights" - def test_multi_channel_model_labels(self, hf_org_models, hf_token): - for model_id in hf_org_models: - logger.info(f"Checking channels for {model_id}") - config = load_hf_config(model_id, hf_token) - check_multi_channel_labels(config, model_id) + def test_multi_channel_model_labels(self, hf_model_id, hf_token): + logger.info(f"Checking channels for {hf_model_id}") + config = load_hf_config(hf_model_id, hf_token) + check_multi_channel_labels(config, hf_model_id) if __name__ == "__main__":