diff --git a/.github/workflows/test-hf-models.yml b/.github/workflows/test-hf-models.yml index 6c777cb7..2752aae1 100644 --- a/.github/workflows/test-hf-models.yml +++ b/.github/workflows/test-hf-models.yml @@ -20,6 +20,8 @@ on: jobs: test-models: runs-on: ubuntu-latest + env: + WANDB_MODE: disabled strategy: fail-fast: false matrix: @@ -48,15 +50,21 @@ jobs: - name: Install project dependencies run: uv sync --all-extras --dev + - name: Free package download cache + run: uv cache clean + - name: Run tests env: - WANDB_MODE: disabled + HF_HOME: ${{ runner.temp }}/huggingface + HF_HUB_CACHE: ${{ runner.temp }}/huggingface/hub + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_DISABLE_XET: "1" + HF_MODULES_CACHE: ${{ runner.temp }}/huggingface/modules run: | - uv run pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ + uv run --no-sync pytest opt/package/test_hf_org.py::${{ matrix.test-class }} \ -v \ --log-cli-level=INFO \ - --durations=10 \ - --maxfail=5 + --durations=10 - name: Upload test results if: always() diff --git a/opt/package/test_hf_org.py b/opt/package/test_hf_org.py index 575ad4bb..9701f62c 100644 --- a/opt/package/test_hf_org.py +++ b/opt/package/test_hf_org.py @@ -1,11 +1,17 @@ """Tests for models in the mist-models HuggingFace organization.""" +from contextlib import contextmanager +from functools import lru_cache +import gc import logging import os from pathlib import Path +import tempfile +from typing import Iterator + import pytest from huggingface_hub import HfApi -from transformers import AutoModel +from transformers import AutoConfig, AutoModel from .test_inference import ( single_molecule_smiles, @@ -17,6 +23,101 @@ ) logger = logging.getLogger(__name__) +HF_ORG = "mist-models" + + +@contextmanager +def loaded_hf_model(model_id: str, hf_token: str | None) -> Iterator[object]: + """Load one HF model in an isolated cache that is removed after the check.""" + with tempfile.TemporaryDirectory( + prefix="hf-model-cache-", dir=os.getenv("RUNNER_TEMP") or None + ) as cache_dir: + model = AutoModel.from_pretrained( + model_id, + trust_remote_code=True, + token=hf_token, + cache_dir=cache_dir, + ) + try: + yield model + finally: + del model + gc.collect() + + +def load_hf_config(model_id: str, hf_token: str | None): + with tempfile.TemporaryDirectory( + prefix="hf-config-cache-", dir=os.getenv("RUNNER_TEMP") or None + ) as cache_dir: + return AutoConfig.from_pretrained( + model_id, + trust_remote_code=True, + token=hf_token, + cache_dir=cache_dir, + ) + + +@lru_cache +def list_hf_org_model_ids(hf_token: str | None) -> tuple[str, ...]: + api = HfApi(token=hf_token) + model_ids = tuple(m.id for m in api.list_models(author=HF_ORG)) + logger.info("Found %d models in %s organization", len(model_ids), HF_ORG) + return model_ids + + +def parametrize_model_ids(metafunc, fixture_name: str, model_ids: tuple[str, ...]): + if model_ids: + metafunc.parametrize( + fixture_name, model_ids, ids=lambda m: m.rsplit("/", 1)[-1] + ) + return + + metafunc.parametrize( + fixture_name, + [ + pytest.param( + None, + marks=pytest.mark.skip(reason=f"No {fixture_name} models found"), + ) + ], + ids=["no-models"], + ) + + +def pytest_generate_tests(metafunc): + model_ids = list_hf_org_model_ids(os.getenv("HF_TOKEN")) + + if "hf_model_id" in metafunc.fixturenames: + parametrize_model_ids(metafunc, "hf_model_id", model_ids) + + if "single_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "single_model_id", + tuple( + m for m in model_ids if get_model_type_from_path(Path(m)) == "single" + ), + ) + + if "conductivity_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "conductivity_model_id", + tuple( + m + for m in model_ids + if get_model_type_from_path(Path(m)) == "conductivity" + ), + ) + + if "excess_model_id" in metafunc.fixturenames: + parametrize_model_ids( + metafunc, + "excess_model_id", + tuple( + m for m in model_ids if get_model_type_from_path(Path(m)) == "excess" + ), + ) @pytest.fixture @@ -26,36 +127,14 @@ def hf_token(): return os.getenv("HF_TOKEN") -@pytest.fixture -def hf_org_models(hf_token): - api = HfApi(token=hf_token) - models = list(api.list_models(author="mist-models")) - - if not models: - pytest.skip("No models found in mist-models organization") - - model_ids = [m.id for m in models] - logger.info(f"Found {len(model_ids)} models in mist-models organization") - return model_ids - - class TestHFOrgSingleMoleculeModels: def test_predict_single_molecules( - self, hf_org_models, hf_token, single_molecule_smiles + self, single_model_id, hf_token, single_molecule_smiles ): - single_models = [ - m for m in hf_org_models if get_model_type_from_path(Path(m)) == "single" - ] - - for model_id in single_models: - logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - + logger.info(f"Testing {single_model_id}") + with loaded_hf_model(single_model_id, hf_token) as model: if "RobertaPreLayerNormModel" in type(model).__name__: - logger.info("Skipping encoder-only model") - continue + pytest.skip("Skipping encoder-only model") predictions = model.predict(single_molecule_smiles) assert predictions is not None @@ -66,48 +145,35 @@ def test_predict_single_molecules( if isinstance(task_data, dict) and "value" in task_data: values = task_data["value"] assert len(values) == len(single_molecule_smiles) - validate_predictions(values, name=f"{model_id}:{task_name}") + validate_predictions( + values, name=f"{single_model_id}:{task_name}" + ) else: assert len(predictions) == len(single_molecule_smiles) - validate_predictions(predictions, name=model_id) + validate_predictions(predictions, name=single_model_id) class TestHFOrgConductivityModels: - def test_predict_mixtures(self, hf_org_models, hf_token, conductivity_test_data): - cond_models = [ - m - for m in hf_org_models - if get_model_type_from_path(Path(m)) == "conductivity" - ] - for model_id in cond_models: - logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - + def test_predict_mixtures( + self, conductivity_model_id, hf_token, conductivity_test_data + ): + logger.info(f"Testing {conductivity_model_id}") + with loaded_hf_model(conductivity_model_id, hf_token) as model: predictions = model.predict(conductivity_test_data) assert predictions is not None if isinstance(predictions, dict): for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") + validate_predictions(value, name=f"{conductivity_model_id}:{key}") else: - validate_predictions(predictions, name=model_id) + validate_predictions(predictions, name=conductivity_model_id) class TestHFOrgExcessPhysicsModels: - def test_predict_binary_mixture(self, hf_org_models, hf_token, excess_test_data): - excess_models = [ - m for m in hf_org_models if get_model_type_from_path(Path(m)) == "excess" - ] + def test_predict_binary_mixture(self, excess_model_id, hf_token, excess_test_data): test_case = excess_test_data[0] - - for model_id in excess_models: - logger.info(f"Testing {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - + logger.info(f"Testing {excess_model_id}") + with loaded_hf_model(excess_model_id, hf_token) as model: predictions = model.predict( smiles_list=test_case["smiles_list"], composition=test_case["composition"], @@ -118,41 +184,32 @@ def test_predict_binary_mixture(self, hf_org_models, hf_token, excess_test_data) if isinstance(predictions, dict): for key, value in predictions.items(): - validate_predictions(value, name=f"{model_id}:{key}") + validate_predictions(value, name=f"{excess_model_id}:{key}") class TestHFOrgModelIntegrity: - def test_all_models_config(self, hf_org_models, hf_token): - for model_id in hf_org_models: - logger.info(f"Checking config for {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - assert hasattr(model, "config") - assert model.config is not None + def test_all_models_config(self, hf_model_id, hf_token): + logger.info(f"Checking config for {hf_model_id}") + config = load_hf_config(hf_model_id, hf_token) + assert config is not None - def test_models_required_files(self, hf_org_models, hf_token): + def test_models_required_files(self, hf_model_id, hf_token): api = HfApi(token=hf_token) + model_info = api.model_info(hf_model_id) + siblings = {f.rfilename for f in model_info.siblings} - for model_id in hf_org_models: - model_info = api.model_info(model_id) - siblings = {f.rfilename for f in model_info.siblings} + assert "config.json" in siblings, f"{hf_model_id} missing config.json" + assert "README.md" in siblings, f"{hf_model_id} missing README.md" - assert "config.json" in siblings, f"{model_id} missing config.json" - assert "README.md" in siblings, f"{model_id} missing README.md" + has_weights = any( + "safetensors" in f or "pytorch_model.bin" in f for f in siblings + ) + assert has_weights, f"{hf_model_id} missing model weights" - has_weights = any( - "safetensors" in f or "pytorch_model.bin" in f for f in siblings - ) - assert has_weights, f"{model_id} missing model weights" - - def test_multi_channel_model_labels(self, hf_org_models, hf_token): - for model_id in hf_org_models: - logger.info(f"Checking channels for {model_id}") - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, token=hf_token - ) - check_multi_channel_labels(model, model_id) + def test_multi_channel_model_labels(self, hf_model_id, hf_token): + logger.info(f"Checking channels for {hf_model_id}") + config = load_hf_config(hf_model_id, hf_token) + check_multi_channel_labels(config, hf_model_id) if __name__ == "__main__": diff --git a/opt/package/test_inference.py b/opt/package/test_inference.py index f4e73072..3e5901da 100644 --- a/opt/package/test_inference.py +++ b/opt/package/test_inference.py @@ -115,15 +115,14 @@ def get_model_type_from_path(path: Path) -> str: return "single" -def check_multi_channel_labels(model, model_name: str): +def check_multi_channel_labels(model_or_config, model_name: str): """Verify that multi-output models have channel labels.""" - if "RobertaPreLayerNormModel" in type(model).__name__: + type_name = type(model_or_config).__name__ + if "RobertaPreLayerNorm" in type_name: return - if not hasattr(model, "config"): - return + config = getattr(model_or_config, "config", model_or_config) - config = model.config if not hasattr(config, "task_network"): return