Skip to content

Commit 1f09e20

Browse files
kbighorseclaude
andcommitted
fix: deterministic volume handling and explicit CLI arg rejection
- Add ORDER BY id to legacy query so lowest-PK row wins when multiple radionuclide rows map to the same Sample (volume/volume_unit) - Skip overwrites with warning when a conflict is detected - Reject unknown CLI args in backfill entrypoint instead of silently ignoring them Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 10f3e41 commit 1f09e20

2 files changed

Lines changed: 39 additions & 5 deletions

File tree

transfers/backfill/backfill.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ def run() -> None:
5353

5454

5555
if __name__ == "__main__":
56+
if len(sys.argv) > 1:
57+
logger.error(
58+
"Unknown arguments: %s. "
59+
"CLI options (--batch-size) were removed; "
60+
"use BACKFILL_* env vars to control execution.",
61+
" ".join(sys.argv[1:]),
62+
)
63+
sys.exit(2)
5664
try:
5765
run()
5866
except Exception:

transfers/backfill/chemistry_backfill.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,16 @@ def _backfill_radionuclides_impl(session: Session) -> BackfillResult:
135135
).all()
136136
)
137137

138-
# Query all radionuclides joined with sample info for nma_sample_pt_id
138+
# Query all radionuclides joined with sample info for nma_sample_pt_id.
139+
# ORDER BY id for deterministic processing — when multiple rows map to
140+
# the same Sample, the lowest-PK row's volume/volume_unit wins.
139141
legacy_rows = session.execute(
140-
select(NMA_Radionuclides, NMA_Chemistry_SampleInfo.nma_sample_pt_id).join(
142+
select(NMA_Radionuclides, NMA_Chemistry_SampleInfo.nma_sample_pt_id)
143+
.join(
141144
NMA_Chemistry_SampleInfo,
142145
NMA_Radionuclides.chemistry_sample_info_id == NMA_Chemistry_SampleInfo.id,
143146
)
147+
.order_by(NMA_Radionuclides.id)
144148
).all()
145149

146150
logger.info("Processing %d legacy Radionuclides rows", len(legacy_rows))
@@ -238,14 +242,36 @@ def _backfill_radionuclides_impl(session: Session) -> BackfillResult:
238242
result.inserted += 1
239243
existing_keys.add(global_id_str)
240244

241-
# Update Sample volume/volume_unit if present on legacy row
245+
# Update Sample volume/volume_unit if present on legacy row.
246+
# Only set when the sample doesn't already have a value to avoid
247+
# nondeterministic overwrites when multiple rows share a sample.
242248
if row.volume is not None or row.volume_unit is not None:
243249
sample = session.get(Sample, sample_id)
244250
if sample:
245251
if row.volume is not None:
246-
sample.volume = float(row.volume)
252+
if sample.volume is None:
253+
sample.volume = float(row.volume)
254+
elif float(sample.volume) != float(row.volume):
255+
logger.warning(
256+
"Sample id=%s already has volume=%s, "
257+
"skipping conflicting value %s from GlobalID=%s",
258+
sample_id,
259+
sample.volume,
260+
row.volume,
261+
global_id_str,
262+
)
247263
if row.volume_unit is not None:
248-
sample.volume_unit = row.volume_unit
264+
if sample.volume_unit is None:
265+
sample.volume_unit = row.volume_unit
266+
elif sample.volume_unit != row.volume_unit:
267+
logger.warning(
268+
"Sample id=%s already has volume_unit=%s, "
269+
"skipping conflicting value %s from GlobalID=%s",
270+
sample_id,
271+
sample.volume_unit,
272+
row.volume_unit,
273+
global_id_str,
274+
)
249275

250276
# Create Notes if present
251277
if row.notes:

0 commit comments

Comments
 (0)