E3SM-Project · tomvothecoder · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/backend/app/features/ingestion/parsers/case_docs.py b/backend/app/features/ingestion/parsers/case_docs.py
@@ -67,15 +67,22 @@ def parse_env_build(env_build_path: str | Path) -> dict[str, str | None]:
     Returns
     -------
     dict
-        Dictionary with keys 'grid_resolution', 'compiler', 'mpilib' (str or None)
+        Dictionary with keys 'grid_resolution', 'compiler', 'mpilib',
+        and 'cime_output_root' (str or None)
     """
     env_build_path = Path(env_build_path)
 
     grid_resolution = _extract_value_from_file(env_build_path, "GRID")
     compiler = _extract_value_from_file(env_build_path, "COMPILER")
     mpilib = _extract_value_from_file(env_build_path, "MPILIB")
+    cime_output_root = _extract_value_from_file(env_build_path, "CIME_OUTPUT_ROOT")
 
-    return {"grid_resolution": grid_resolution, "compiler": compiler, "mpilib": mpilib}
+    return {
+        "grid_resolution": grid_resolution,
+        "compiler": compiler,
+        "mpilib": mpilib,
+        "cime_output_root": cime_output_root,
+    }
 
 
 def parse_env_run(env_run_path: str | Path) -> dict[str, str | None]:
@@ -228,6 +235,26 @@ def _extract_campaign_and_experiment_type(
     return campaign, experiment_type
 
 
+def _substitute_path_variables(
+    path: str | None,
+    variables: dict[str, str | None],
+) -> str | None:
+    """Substitute supported shell-style path variables when values are known."""
+    if path is None:
+        return None
+
+    pattern = re.compile(
+        r"\$(?:{(?P<braced>CIME_OUTPUT_ROOT|CASE)}|(?P<plain>CIME_OUTPUT_ROOT|CASE)\b)"
+    )
+
+    def replace(match: re.Match[str]) -> str:
+        variable_name = match.group("braced") or match.group("plain")
+        value = variables.get(variable_name)
+        return value if value is not None else match.group(0)
+
+    return pattern.sub(replace, path)
+
+
 def _calculate_simulation_end_date(
     simulation_start_date: str | None,
     stop_option: str | None,

diff --git a/backend/app/features/ingestion/parsers/parser.py b/backend/app/features/ingestion/parsers/parser.py
@@ -30,6 +30,7 @@
 
 from app.core.logger import _setup_custom_logger
 from app.features.ingestion.parsers.case_docs import (
+    _substitute_path_variables,
     parse_env_build,
     parse_env_case,
     parse_env_run,
@@ -545,6 +546,19 @@ def _parse_all_files(exec_dir: str, files: dict[str, str | None]) -> ParsedSimul
     if case_status_metadata is not None:
         metadata.update(case_status_metadata)
 
+    path_variables = {
+        "CASE": metadata.get("case_name"),
+        "CIME_OUTPUT_ROOT": metadata.get("cime_output_root"),
+    }
+    metadata["output_path"] = _substitute_path_variables(
+        metadata.get("output_path"),
+        path_variables,
+    )
+    metadata["archive_path"] = _substitute_path_variables(
+        metadata.get("archive_path"),
+        path_variables,
+    )
+
     execution_id = _resolve_execution_id(metadata.get("execution_id"), exec_dir)
 
     return ParsedSimulation(

diff --git a/backend/tests/features/ingestion/parsers/test_case_docs.py b/backend/tests/features/ingestion/parsers/test_case_docs.py
@@ -2,6 +2,7 @@
 from unittest.mock import patch
 
 from app.features.ingestion.parsers.case_docs import (
+    _substitute_path_variables,
     parse_env_build,
     parse_env_case,
     parse_env_run,
@@ -113,6 +114,7 @@ def test_value(self, tmp_path):
         <config>
             <entry id="COMPILER" value="intel" />
             <entry id="MPILIB" value="mpt" />
+            <entry id="CIME_OUTPUT_ROOT" value="/global/cfs/cdirs/e3sm" />
         </config>
         """
         tmp_build = tmp_path / "env_build.xml"
@@ -121,6 +123,7 @@ def test_value(self, tmp_path):
 
         assert result["compiler"] == "intel"
         assert result["mpilib"] == "mpt"
+        assert result["cime_output_root"] == "/global/cfs/cdirs/e3sm"
 
     def test_text(self, tmp_path):
         xml_build = """
@@ -151,6 +154,36 @@ def test_missing_entry_returns_none(self, tmp_path):
         assert result["mpilib"] is None
 
 
+class TestSubstitutePathVariables:
+    def test_replaces_supported_variables(self):
+        result = _substitute_path_variables(
+            "$CIME_OUTPUT_ROOT/archive/${CASE}/run/$CASE",
+            {
+                "CIME_OUTPUT_ROOT": "/global/cfs/cdirs/e3sm",
+                "CASE": "v3.LR.historical_0121",
+            },
+        )
+
+        assert (
+            result == "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121/run/"
+            "v3.LR.historical_0121"
+        )
+
+    def test_leaves_unknown_values_unsubstituted(self):
+        result = _substitute_path_variables(
+            "$CIME_OUTPUT_ROOT/archive/$CASE",
+            {
+                "CIME_OUTPUT_ROOT": None,
+                "CASE": "v3.LR.historical_0121",
+            },
+        )
+
+        assert result == "$CIME_OUTPUT_ROOT/archive/v3.LR.historical_0121"
+
+    def test_none_input_returns_none(self):
+        assert _substitute_path_variables(None, {"CASE": "case"}) is None
+
+
 class TestParseEnvRun:
     def test_extracts_path_artifact_values(self, tmp_path):
         xml_run = """

diff --git a/backend/tests/features/ingestion/parsers/test_parser.py b/backend/tests/features/ingestion/parsers/test_parser.py
@@ -612,6 +612,39 @@ def test_case_status_is_merged(self, tmp_path: Path) -> None:
         assert skipped == 0
         assert result[0].status == "failed"
 
+    def test_parse_all_files_substitutes_archive_and_output_paths(self) -> None:
+        exec_dir = "/tmp/test_case/1.0-0"
+        files: dict[str, str | None] = {key: f"/tmp/{key}" for key in parser.FILE_SPECS}
+
+        with self._mock_all_parsers(
+            parse_env_case={
+                "case_name": "v3.LR.historical_0121",
+                "campaign": "v3.LR.historical",
+                "machine": "test",
+            },
+            parse_env_build={
+                "compiler": "gnu",
+                "cime_output_root": "/global/cfs/cdirs/e3sm",
+            },
+            parse_env_run={
+                "simulation_start_date": "2020-01-01",
+                "output_path": "${CIME_OUTPUT_ROOT}/run/$CASE",
+                "archive_path": "$CIME_OUTPUT_ROOT/archive/${CASE}",
+            },
+            parse_e3sm_timing={
+                "execution_id": "1.0-0",
+                "run_start_date": "2025-12-18T20:09:33",
+                "run_end_date": "2025-12-18T20:54:58",
+            },
+        ):
+            parsed = parser._parse_all_files(exec_dir, files)
+
+        assert parsed.output_path == "/global/cfs/cdirs/e3sm/run/v3.LR.historical_0121"
+        assert (
+            parsed.archive_path
+            == "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121"
+        )
+
     def test_case_status_run_dates_override_timing_dates(self, tmp_path: Path) -> None:
         execution_dir = tmp_path / "1.0-0"
         execution_dir.mkdir(parents=True)

diff --git a/backend/tests/features/ingestion/test_ingest.py b/backend/tests/features/ingestion/test_ingest.py
@@ -12,9 +12,11 @@
     SimulationCreateDraft,
     _build_config_snapshot,
     _build_simulation_create_draft,
+    _extract_postprocessing_script_path,
     _get_or_create_case,
     _get_reference_metadata_for_case,
     _normalize_git_url,
+    _normalize_path_candidate,
     _normalize_simulation_status,
     _normalize_simulation_type,
     _stringify_config_value,
@@ -1924,6 +1926,38 @@ def test_different_case_name_creates_separate_cases(self, db: Session) -> None:
 
 
 class TestIngestHelpers:
+    def test_extract_postprocessing_script_path_returns_none_for_unparseable_value(
+        self,
+    ) -> None:
+        with patch("app.features.ingestion.ingest.logger.warning") as mock_warning:
+            result = _extract_postprocessing_script_path(
+                postprocessing_script="'",
+                execution_dir="/tmp/execution",
+            )
+
+        assert result is None
+        mock_warning.assert_called_once()
+
+    def test_extract_postprocessing_script_path_returns_none_for_empty_tokens(
+        self,
+    ) -> None:
+        with patch(
+            "app.features.ingestion.ingest.shlex.split", return_value=[]
+        ) as mock_split:
+            result = _extract_postprocessing_script_path(
+                postprocessing_script="/tmp/post.sh",
+                execution_dir="/tmp/execution",
+            )
+
+        assert result is None
+        mock_split.assert_called_once_with("/tmp/post.sh")
+
+    @pytest.mark.parametrize("value", [None, "", "   "])
+    def test_normalize_path_candidate_returns_none_for_blank_values(
+        self, value: str | None
+    ) -> None:
+        assert _normalize_path_candidate(value) is None
+
     def test_get_or_create_case_sets_missing_case_group(self, db: Session) -> None:
         case = Case(name="case_group_test", case_group=None)
         db.add(case)

diff --git a/backend/tests/features/pace/test_api.py b/backend/tests/features/pace/test_api.py
@@ -162,6 +162,20 @@ def test_endpoint_returns_null_on_upstream_non_200(
         assert response.status_code == 200
         assert response.json() == {"executionId": "x", "experimentId": None}
 
+    def test_endpoint_returns_null_on_non_200_response_object(
+        self, client, monkeypatch
+    ) -> None:
+        monkeypatch.setattr(
+            pace_api.urllib.request,
+            "urlopen",
+            lambda *args, **kwargs: _FakeHttpResponse(503, "retry later"),
+        )
+
+        response = client.get(f"{API_BASE}/pace/resolve", params={"execution_id": "x"})
+
+        assert response.status_code == 200
+        assert response.json() == {"executionId": "x", "experimentId": None}
+
     def test_endpoint_returns_null_on_malformed_json(self, client, monkeypatch) -> None:
         monkeypatch.setattr(
             pace_api.urllib.request,
@@ -285,6 +299,20 @@ def fake_urlopen(request: urllib.request.Request, timeout: float):
         assert second_response.json()["experimentId"] is None
         assert call_count == 1
 
+    def test_extract_experiment_id_returns_none_for_non_dict_first_item(self) -> None:
+        assert pace_api._extract_experiment_id(["228920"]) is None
+
+    def test_get_cached_experiment_id_expires_entries(self, monkeypatch) -> None:
+        monkeypatch.setattr(pace_api.time, "monotonic", lambda: 100.0)
+        pace_api._set_cached_experiment_id("expired-exec", "228920")
+
+        monkeypatch.setattr(pace_api.time, "monotonic", lambda: 1000.0)
+        cache_hit, experiment_id = pace_api._get_cached_experiment_id("expired-exec")
+
+        assert cache_hit is False
+        assert experiment_id is None
+        assert "expired-exec" not in pace_api._PACE_CACHE
+
     def test_endpoint_rejects_missing_execution_id(self, client) -> None:
         response = client.get(f"{API_BASE}/pace/resolve")