Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions backend/app/features/ingestion/parsers/case_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,22 @@ def parse_env_build(env_build_path: str | Path) -> dict[str, str | None]:
Returns
-------
dict
Dictionary with keys 'grid_resolution', 'compiler', 'mpilib' (str or None)
Dictionary with keys 'grid_resolution', 'compiler', 'mpilib',
and 'cime_output_root' (str or None)
"""
env_build_path = Path(env_build_path)

grid_resolution = _extract_value_from_file(env_build_path, "GRID")
compiler = _extract_value_from_file(env_build_path, "COMPILER")
mpilib = _extract_value_from_file(env_build_path, "MPILIB")
cime_output_root = _extract_value_from_file(env_build_path, "CIME_OUTPUT_ROOT")

return {"grid_resolution": grid_resolution, "compiler": compiler, "mpilib": mpilib}
return {
"grid_resolution": grid_resolution,
"compiler": compiler,
"mpilib": mpilib,
"cime_output_root": cime_output_root,
}


def parse_env_run(env_run_path: str | Path) -> dict[str, str | None]:
Expand Down Expand Up @@ -228,6 +235,26 @@ def _extract_campaign_and_experiment_type(
return campaign, experiment_type


def _substitute_path_variables(
path: str | None,
variables: dict[str, str | None],
) -> str | None:
"""Substitute supported shell-style path variables when values are known."""
if path is None:
return None

pattern = re.compile(
r"\$(?:{(?P<braced>CIME_OUTPUT_ROOT|CASE)}|(?P<plain>CIME_OUTPUT_ROOT|CASE)\b)"
)

def replace(match: re.Match[str]) -> str:
variable_name = match.group("braced") or match.group("plain")
value = variables.get(variable_name)
return value if value is not None else match.group(0)

return pattern.sub(replace, path)


def _calculate_simulation_end_date(
simulation_start_date: str | None,
stop_option: str | None,
Expand Down
14 changes: 14 additions & 0 deletions backend/app/features/ingestion/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

from app.core.logger import _setup_custom_logger
from app.features.ingestion.parsers.case_docs import (
_substitute_path_variables,
parse_env_build,
parse_env_case,
parse_env_run,
Expand Down Expand Up @@ -545,6 +546,19 @@ def _parse_all_files(exec_dir: str, files: dict[str, str | None]) -> ParsedSimul
if case_status_metadata is not None:
metadata.update(case_status_metadata)

path_variables = {
"CASE": metadata.get("case_name"),
"CIME_OUTPUT_ROOT": metadata.get("cime_output_root"),
}
metadata["output_path"] = _substitute_path_variables(
metadata.get("output_path"),
path_variables,
)
metadata["archive_path"] = _substitute_path_variables(
metadata.get("archive_path"),
path_variables,
)

execution_id = _resolve_execution_id(metadata.get("execution_id"), exec_dir)

return ParsedSimulation(
Expand Down
33 changes: 33 additions & 0 deletions backend/tests/features/ingestion/parsers/test_case_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import patch

from app.features.ingestion.parsers.case_docs import (
_substitute_path_variables,
parse_env_build,
parse_env_case,
parse_env_run,
Expand Down Expand Up @@ -113,6 +114,7 @@ def test_value(self, tmp_path):
<config>
<entry id="COMPILER" value="intel" />
<entry id="MPILIB" value="mpt" />
<entry id="CIME_OUTPUT_ROOT" value="/global/cfs/cdirs/e3sm" />
</config>
"""
tmp_build = tmp_path / "env_build.xml"
Expand All @@ -121,6 +123,7 @@ def test_value(self, tmp_path):

assert result["compiler"] == "intel"
assert result["mpilib"] == "mpt"
assert result["cime_output_root"] == "/global/cfs/cdirs/e3sm"

def test_text(self, tmp_path):
xml_build = """
Expand Down Expand Up @@ -151,6 +154,36 @@ def test_missing_entry_returns_none(self, tmp_path):
assert result["mpilib"] is None


class TestSubstitutePathVariables:
def test_replaces_supported_variables(self):
result = _substitute_path_variables(
"$CIME_OUTPUT_ROOT/archive/${CASE}/run/$CASE",
{
"CIME_OUTPUT_ROOT": "/global/cfs/cdirs/e3sm",
"CASE": "v3.LR.historical_0121",
},
)

assert (
result == "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121/run/"
"v3.LR.historical_0121"
)

def test_leaves_unknown_values_unsubstituted(self):
result = _substitute_path_variables(
"$CIME_OUTPUT_ROOT/archive/$CASE",
{
"CIME_OUTPUT_ROOT": None,
"CASE": "v3.LR.historical_0121",
},
)

assert result == "$CIME_OUTPUT_ROOT/archive/v3.LR.historical_0121"

def test_none_input_returns_none(self):
assert _substitute_path_variables(None, {"CASE": "case"}) is None


class TestParseEnvRun:
def test_extracts_path_artifact_values(self, tmp_path):
xml_run = """
Expand Down
33 changes: 33 additions & 0 deletions backend/tests/features/ingestion/parsers/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,39 @@ def test_case_status_is_merged(self, tmp_path: Path) -> None:
assert skipped == 0
assert result[0].status == "failed"

def test_parse_all_files_substitutes_archive_and_output_paths(self) -> None:
exec_dir = "/tmp/test_case/1.0-0"
files: dict[str, str | None] = {key: f"/tmp/{key}" for key in parser.FILE_SPECS}

with self._mock_all_parsers(
parse_env_case={
"case_name": "v3.LR.historical_0121",
"campaign": "v3.LR.historical",
"machine": "test",
},
parse_env_build={
"compiler": "gnu",
"cime_output_root": "/global/cfs/cdirs/e3sm",
},
parse_env_run={
"simulation_start_date": "2020-01-01",
"output_path": "${CIME_OUTPUT_ROOT}/run/$CASE",
"archive_path": "$CIME_OUTPUT_ROOT/archive/${CASE}",
},
parse_e3sm_timing={
"execution_id": "1.0-0",
"run_start_date": "2025-12-18T20:09:33",
"run_end_date": "2025-12-18T20:54:58",
},
):
parsed = parser._parse_all_files(exec_dir, files)

assert parsed.output_path == "/global/cfs/cdirs/e3sm/run/v3.LR.historical_0121"
assert (
parsed.archive_path
== "/global/cfs/cdirs/e3sm/archive/v3.LR.historical_0121"
)

def test_case_status_run_dates_override_timing_dates(self, tmp_path: Path) -> None:
execution_dir = tmp_path / "1.0-0"
execution_dir.mkdir(parents=True)
Expand Down
34 changes: 34 additions & 0 deletions backend/tests/features/ingestion/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
SimulationCreateDraft,
_build_config_snapshot,
_build_simulation_create_draft,
_extract_postprocessing_script_path,
_get_or_create_case,
_get_reference_metadata_for_case,
_normalize_git_url,
_normalize_path_candidate,
_normalize_simulation_status,
_normalize_simulation_type,
_stringify_config_value,
Expand Down Expand Up @@ -1924,6 +1926,38 @@ def test_different_case_name_creates_separate_cases(self, db: Session) -> None:


class TestIngestHelpers:
def test_extract_postprocessing_script_path_returns_none_for_unparseable_value(
self,
) -> None:
with patch("app.features.ingestion.ingest.logger.warning") as mock_warning:
result = _extract_postprocessing_script_path(
postprocessing_script="'",
execution_dir="/tmp/execution",
)

assert result is None
mock_warning.assert_called_once()

def test_extract_postprocessing_script_path_returns_none_for_empty_tokens(
self,
) -> None:
with patch(
"app.features.ingestion.ingest.shlex.split", return_value=[]
) as mock_split:
result = _extract_postprocessing_script_path(
postprocessing_script="/tmp/post.sh",
execution_dir="/tmp/execution",
)

assert result is None
mock_split.assert_called_once_with("/tmp/post.sh")

@pytest.mark.parametrize("value", [None, "", " "])
def test_normalize_path_candidate_returns_none_for_blank_values(
self, value: str | None
) -> None:
assert _normalize_path_candidate(value) is None

def test_get_or_create_case_sets_missing_case_group(self, db: Session) -> None:
case = Case(name="case_group_test", case_group=None)
db.add(case)
Expand Down
28 changes: 28 additions & 0 deletions backend/tests/features/pace/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,20 @@ def test_endpoint_returns_null_on_upstream_non_200(
assert response.status_code == 200
assert response.json() == {"executionId": "x", "experimentId": None}

def test_endpoint_returns_null_on_non_200_response_object(
self, client, monkeypatch
) -> None:
monkeypatch.setattr(
pace_api.urllib.request,
"urlopen",
lambda *args, **kwargs: _FakeHttpResponse(503, "retry later"),
)

response = client.get(f"{API_BASE}/pace/resolve", params={"execution_id": "x"})

assert response.status_code == 200
assert response.json() == {"executionId": "x", "experimentId": None}

def test_endpoint_returns_null_on_malformed_json(self, client, monkeypatch) -> None:
monkeypatch.setattr(
pace_api.urllib.request,
Expand Down Expand Up @@ -285,6 +299,20 @@ def fake_urlopen(request: urllib.request.Request, timeout: float):
assert second_response.json()["experimentId"] is None
assert call_count == 1

def test_extract_experiment_id_returns_none_for_non_dict_first_item(self) -> None:
assert pace_api._extract_experiment_id(["228920"]) is None

def test_get_cached_experiment_id_expires_entries(self, monkeypatch) -> None:
monkeypatch.setattr(pace_api.time, "monotonic", lambda: 100.0)
pace_api._set_cached_experiment_id("expired-exec", "228920")

monkeypatch.setattr(pace_api.time, "monotonic", lambda: 1000.0)
cache_hit, experiment_id = pace_api._get_cached_experiment_id("expired-exec")

assert cache_hit is False
assert experiment_id is None
assert "expired-exec" not in pace_api._PACE_CACHE

def test_endpoint_rejects_missing_execution_id(self, client) -> None:
response = client.get(f"{API_BASE}/pace/resolve")

Expand Down
Loading