diff --git a/backend/app/features/ingestion/parsers/case_docs.py b/backend/app/features/ingestion/parsers/case_docs.py index b2dc58c5..593cea7e 100644 --- a/backend/app/features/ingestion/parsers/case_docs.py +++ b/backend/app/features/ingestion/parsers/case_docs.py @@ -67,15 +67,22 @@ def parse_env_build(env_build_path: str | Path) -> dict[str, str | None]: Returns ------- dict - Dictionary with keys 'grid_resolution', 'compiler', 'mpilib' (str or None) + Dictionary with keys 'grid_resolution', 'compiler', 'mpilib', + and 'cime_output_root' (str or None) """ env_build_path = Path(env_build_path) grid_resolution = _extract_value_from_file(env_build_path, "GRID") compiler = _extract_value_from_file(env_build_path, "COMPILER") mpilib = _extract_value_from_file(env_build_path, "MPILIB") + cime_output_root = _extract_value_from_file(env_build_path, "CIME_OUTPUT_ROOT") - return {"grid_resolution": grid_resolution, "compiler": compiler, "mpilib": mpilib} + return { + "grid_resolution": grid_resolution, + "compiler": compiler, + "mpilib": mpilib, + "cime_output_root": cime_output_root, + } def parse_env_run(env_run_path: str | Path) -> dict[str, str | None]: @@ -228,6 +235,26 @@ def _extract_campaign_and_experiment_type( return campaign, experiment_type +def _substitute_path_variables( + path: str | None, + variables: dict[str, str | None], +) -> str | None: + """Substitute supported shell-style path variables when values are known.""" + if path is None: + return None + + pattern = re.compile( + r"\$(?:{(?PCIME_OUTPUT_ROOT|CASE)}|(?PCIME_OUTPUT_ROOT|CASE)\b)" + ) + + def replace(match: re.Match[str]) -> str: + variable_name = match.group("braced") or match.group("plain") + value = variables.get(variable_name) + return value if value is not None else match.group(0) + + return pattern.sub(replace, path) + + def _calculate_simulation_end_date( simulation_start_date: str | None, stop_option: str | None, diff --git a/backend/app/features/ingestion/parsers/parser.py b/backend/app/features/ingestion/parsers/parser.py index 4a69a1de..f716bd74 100644 --- a/backend/app/features/ingestion/parsers/parser.py +++ b/backend/app/features/ingestion/parsers/parser.py @@ -30,6 +30,7 @@ from app.core.logger import _setup_custom_logger from app.features.ingestion.parsers.case_docs import ( + _substitute_path_variables, parse_env_build, parse_env_case, parse_env_run, @@ -545,6 +546,19 @@ def _parse_all_files(exec_dir: str, files: dict[str, str | None]) -> ParsedSimul if case_status_metadata is not None: metadata.update(case_status_metadata) + path_variables = { + "CASE": metadata.get("case_name"), + "CIME_OUTPUT_ROOT": metadata.get("cime_output_root"), + } + metadata["output_path"] = _substitute_path_variables( + metadata.get("output_path"), + path_variables, + ) + metadata["archive_path"] = _substitute_path_variables( + metadata.get("archive_path"), + path_variables, + ) + execution_id = _resolve_execution_id(metadata.get("execution_id"), exec_dir) return ParsedSimulation( diff --git a/backend/tests/features/ingestion/parsers/test_case_docs.py b/backend/tests/features/ingestion/parsers/test_case_docs.py index 312c6f2a..9292a13e 100644 --- a/backend/tests/features/ingestion/parsers/test_case_docs.py +++ b/backend/tests/features/ingestion/parsers/test_case_docs.py @@ -2,6 +2,7 @@ from unittest.mock import patch from app.features.ingestion.parsers.case_docs import ( + _substitute_path_variables, parse_env_build, parse_env_case, parse_env_run, @@ -113,6 +114,7 @@ def test_value(self, tmp_path): + """ tmp_build = tmp_path / "env_build.xml" @@ -121,6 +123,7 @@ def test_value(self, tmp_path): assert result["compiler"] == "intel" assert result["mpilib"] == "mpt" + assert result["cime_output_root"] == "/global/cfs/cdirs/e3sm" def test_text(self, tmp_path): xml_build = """ @@ -151,6 +154,36 @@ def test_missing_entry_returns_none(self, tmp_path): assert result["mpilib"] is None +class TestSubstitutePathVariables: + def test_replaces_supported_variables(self): + result = _substitute_path_variables( + "$CIME_OUTPUT_ROOT/archive/${CASE}/run/$CASE", + { + "CIME_OUTPUT_ROOT": "/global/cfs/cdirs/e3sm", + "CASE": "v3.LR.historical_0121", + }, + ) + + assert ( + result == "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121/run/" + "v3.LR.historical_0121" + ) + + def test_leaves_unknown_values_unsubstituted(self): + result = _substitute_path_variables( + "$CIME_OUTPUT_ROOT/archive/$CASE", + { + "CIME_OUTPUT_ROOT": None, + "CASE": "v3.LR.historical_0121", + }, + ) + + assert result == "$CIME_OUTPUT_ROOT/archive/v3.LR.historical_0121" + + def test_none_input_returns_none(self): + assert _substitute_path_variables(None, {"CASE": "case"}) is None + + class TestParseEnvRun: def test_extracts_path_artifact_values(self, tmp_path): xml_run = """ diff --git a/backend/tests/features/ingestion/parsers/test_parser.py b/backend/tests/features/ingestion/parsers/test_parser.py index 03c0c1c3..dcfe9ba5 100644 --- a/backend/tests/features/ingestion/parsers/test_parser.py +++ b/backend/tests/features/ingestion/parsers/test_parser.py @@ -612,6 +612,39 @@ def test_case_status_is_merged(self, tmp_path: Path) -> None: assert skipped == 0 assert result[0].status == "failed" + def test_parse_all_files_substitutes_archive_and_output_paths(self) -> None: + exec_dir = "/tmp/test_case/1.0-0" + files: dict[str, str | None] = {key: f"/tmp/{key}" for key in parser.FILE_SPECS} + + with self._mock_all_parsers( + parse_env_case={ + "case_name": "v3.LR.historical_0121", + "campaign": "v3.LR.historical", + "machine": "test", + }, + parse_env_build={ + "compiler": "gnu", + "cime_output_root": "/global/cfs/cdirs/e3sm", + }, + parse_env_run={ + "simulation_start_date": "2020-01-01", + "output_path": "${CIME_OUTPUT_ROOT}/run/$CASE", + "archive_path": "$CIME_OUTPUT_ROOT/archive/${CASE}", + }, + parse_e3sm_timing={ + "execution_id": "1.0-0", + "run_start_date": "2025-12-18T20:09:33", + "run_end_date": "2025-12-18T20:54:58", + }, + ): + parsed = parser._parse_all_files(exec_dir, files) + + assert parsed.output_path == "/global/cfs/cdirs/e3sm/run/v3.LR.historical_0121" + assert ( + parsed.archive_path + == "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121" + ) + def test_case_status_run_dates_override_timing_dates(self, tmp_path: Path) -> None: execution_dir = tmp_path / "1.0-0" execution_dir.mkdir(parents=True) diff --git a/backend/tests/features/ingestion/test_ingest.py b/backend/tests/features/ingestion/test_ingest.py index 4ca3bc85..35239a49 100644 --- a/backend/tests/features/ingestion/test_ingest.py +++ b/backend/tests/features/ingestion/test_ingest.py @@ -12,9 +12,11 @@ SimulationCreateDraft, _build_config_snapshot, _build_simulation_create_draft, + _extract_postprocessing_script_path, _get_or_create_case, _get_reference_metadata_for_case, _normalize_git_url, + _normalize_path_candidate, _normalize_simulation_status, _normalize_simulation_type, _stringify_config_value, @@ -1924,6 +1926,38 @@ def test_different_case_name_creates_separate_cases(self, db: Session) -> None: class TestIngestHelpers: + def test_extract_postprocessing_script_path_returns_none_for_unparseable_value( + self, + ) -> None: + with patch("app.features.ingestion.ingest.logger.warning") as mock_warning: + result = _extract_postprocessing_script_path( + postprocessing_script="'", + execution_dir="/tmp/execution", + ) + + assert result is None + mock_warning.assert_called_once() + + def test_extract_postprocessing_script_path_returns_none_for_empty_tokens( + self, + ) -> None: + with patch( + "app.features.ingestion.ingest.shlex.split", return_value=[] + ) as mock_split: + result = _extract_postprocessing_script_path( + postprocessing_script="/tmp/post.sh", + execution_dir="/tmp/execution", + ) + + assert result is None + mock_split.assert_called_once_with("/tmp/post.sh") + + @pytest.mark.parametrize("value", [None, "", " "]) + def test_normalize_path_candidate_returns_none_for_blank_values( + self, value: str | None + ) -> None: + assert _normalize_path_candidate(value) is None + def test_get_or_create_case_sets_missing_case_group(self, db: Session) -> None: case = Case(name="case_group_test", case_group=None) db.add(case) diff --git a/backend/tests/features/pace/test_api.py b/backend/tests/features/pace/test_api.py index 56a32698..38c3a6e8 100644 --- a/backend/tests/features/pace/test_api.py +++ b/backend/tests/features/pace/test_api.py @@ -162,6 +162,20 @@ def test_endpoint_returns_null_on_upstream_non_200( assert response.status_code == 200 assert response.json() == {"executionId": "x", "experimentId": None} + def test_endpoint_returns_null_on_non_200_response_object( + self, client, monkeypatch + ) -> None: + monkeypatch.setattr( + pace_api.urllib.request, + "urlopen", + lambda *args, **kwargs: _FakeHttpResponse(503, "retry later"), + ) + + response = client.get(f"{API_BASE}/pace/resolve", params={"execution_id": "x"}) + + assert response.status_code == 200 + assert response.json() == {"executionId": "x", "experimentId": None} + def test_endpoint_returns_null_on_malformed_json(self, client, monkeypatch) -> None: monkeypatch.setattr( pace_api.urllib.request, @@ -285,6 +299,20 @@ def fake_urlopen(request: urllib.request.Request, timeout: float): assert second_response.json()["experimentId"] is None assert call_count == 1 + def test_extract_experiment_id_returns_none_for_non_dict_first_item(self) -> None: + assert pace_api._extract_experiment_id(["228920"]) is None + + def test_get_cached_experiment_id_expires_entries(self, monkeypatch) -> None: + monkeypatch.setattr(pace_api.time, "monotonic", lambda: 100.0) + pace_api._set_cached_experiment_id("expired-exec", "228920") + + monkeypatch.setattr(pace_api.time, "monotonic", lambda: 1000.0) + cache_hit, experiment_id = pace_api._get_cached_experiment_id("expired-exec") + + assert cache_hit is False + assert experiment_id is None + assert "expired-exec" not in pace_api._PACE_CACHE + def test_endpoint_rejects_missing_execution_id(self, client) -> None: response = client.get(f"{API_BASE}/pace/resolve")