From fe3625a18a991239e6ad1c00d21c415ff85ffc05 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 22 Dec 2025 11:38:36 -0700
Subject: [PATCH 01/29] fix: deprecate groundwater level sample matrix

This was deprecated awhile ago but still existed in the lexicon and parameter files.
The only valid sample matrix for water level observations is "water"
---
 core/lexicon.json   | 1 -
 core/parameter.json | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index 9eb7f492..ff9d0f70 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -291,7 +291,6 @@
     {"categories": ["participant_role"], "term": "Observer", "definition": "a person observing the field event"},
     {"categories": ["participant_role"], "term": "Visitor", "definition": "a person visiting the field event"},
     {"categories": ["sample_matrix"], "term": "water", "definition": "water"},
-    {"categories": ["sample_matrix"], "term": "groundwater", "definition": "groundwater"},
     {"categories": ["sample_matrix"], "term": "soil", "definition": "soil"},
     {"categories": ["thing_type"], "term": "observation well", "definition": "a well used to monitor groundwater levels"},
     {"categories": ["thing_type"], "term": "piezometer", "definition": "a type of observation well that measures pressure head in the aquifer"},
diff --git a/core/parameter.json b/core/parameter.json
index 5d9b7db7..b65bd042 100644
--- a/core/parameter.json
+++ b/core/parameter.json
@@ -1,14 +1,14 @@
 [
     {
         "parameter_name": "groundwater level",
-        "matrix": "groundwater",
+        "matrix": "water",
         "parameter_type": "Field Parameter",
         "cas_number": null,
         "default_unit": "ft"
     },
     {
         "parameter_name": "pH",
-        "matrix": "groundwater",
+        "matrix": "water",
         "parameter_type": "Field Parameter",
         "cas_number": null,
         "default_unit": "dimensionless"

From 35985cfc6ce1e835788bc8ca769af21432327d61 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 22 Dec 2025 11:39:58 -0700
Subject: [PATCH 02/29] feat: test water level bulk upload

The bulk upload is used for both the API and CLI, so it should
be tested by itself. Then the API and CLI tests can just call the bulk upload
function and verify that they run as expected
---
 tests/services/test_water_level_csv.py | 136 +++++++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 tests/services/test_water_level_csv.py

diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
new file mode 100644
index 00000000..268de7f7
--- /dev/null
+++ b/tests/services/test_water_level_csv.py
@@ -0,0 +1,136 @@
+import tempfile
+
+from db.engine import session_ctx
+from db import (
+    FieldEvent,
+    FieldActivity,
+    # FieldEventParticipant,
+    Sample,
+    Observation,
+)
+from services.water_level_csv import bulk_upload_water_levels
+
+
+def test_bulk_upload(water_well_thing, contact):
+    """
+    The bulk upload function is used both by the API and the CLI, so it is tested
+    separately here assuming that the functionality is the same. This assumes that
+    the file is parsed correctly and tested for each interface.
+    This test focuses on the data processing and database insertion.
+    """
+
+    # write to a CSV file in memory then delete it after processing
+    # this is being done to avoid filesystem dependencies in tests and
+    # to use the contact fixture for the field staff
+    csv_content = ",".join(
+        [
+            "field_staff",
+            "well_name_point_id",
+            "field_event_date_time",
+            "measurement_date_time",
+            "sampler",
+            "sample_method",
+            "mp_height",
+            "level_status",
+            "depth_to_water_ft",
+            "data_quality",
+            "water_level_notes",
+        ]
+    )
+    csv_content += "\n"
+    csv_content += ",".join(
+        [
+            contact.name,
+            water_well_thing.name,
+            "2025-02-15T08:00:00",
+            "2025-02-15T10:30:00",
+            "Groundwater Team",
+            "electric tape",
+            "1.5",
+            "stable",
+            "45.2",
+            "approved",
+            "Initial measurement",
+        ]
+    )
+
+    with tempfile.NamedTemporaryFile(
+        mode="w+", encoding="utf-8", delete_on_close=True
+    ) as temp_csv:
+        temp_csv.write(csv_content)
+        temp_csv.flush()
+
+        # process the CSV file
+        results = bulk_upload_water_levels(temp_csv.name)
+
+        assert results.exit_code == 0
+
+        assert len(results.payload["water_levels"]) == 1
+        created_records = results.payload["water_levels"][0]
+
+        # verify the data was inserted correctly and then clean up
+        with session_ctx() as session:
+            # ----------
+            # INSERTION VERIFICATION
+            # ----------
+
+            # FieldEvent
+            field_event = session.get(FieldEvent, created_records["field_event_id"])
+            assert field_event is not None
+            assert field_event.thing_id == water_well_thing.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert field_event.event_date.isoformat() == "2025-02-15T15:00:00+00:00"
+            assert field_event.event_date.isoformat() == "2025-02-15T08:00:00+00:00"
+
+            # FieldActivity
+            field_activity = session.get(
+                FieldActivity, created_records["field_activity_id"]
+            )
+            assert field_activity is not None
+            assert field_activity.activity_type == "groundwater level"
+
+            # FieldEventParticipant
+
+            # Sample
+            sample = session.get(Sample, created_records["sample_id"])
+            assert sample is not None
+            assert sample.field_activity_id == field_activity.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert sample.sample_date.isoformat() == "2025-02-15T17:30:00+00:00"
+            assert sample.sample_date.isoformat() == "2025-02-15T10:30:00+00:00"
+            assert sample.sample_name[0:3] == "wl-"
+            assert sample.sample_matrix == "water"
+            assert sample.sample_method == "Electric tape measurement (E-probe)"
+            assert sample.qc_type == "Normal"
+            assert sample.depth_top is None
+            assert sample.depth_bottom is None
+            # TODO: uncomment after notes handling is fixed
+            # assert sample.notes is None
+
+            # Observation
+            observation = session.get(Observation, created_records["observation_id"])
+            assert observation is not None
+            assert observation.sample_id == sample.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert observation.observation_datetime.isoformat() == "2025-02-15T17:30:00+00:00"
+            assert (
+                observation.observation_datetime.isoformat()
+                == "2025-02-15T10:30:00+00:00"
+            )
+            assert observation.value == 45.2
+            assert observation.unit == "ft"
+            assert observation.measuring_point_height == 1.5
+            assert observation.groundwater_level_reason is None
+            assert observation.groundwater_level_accuracy is None
+            # TODO: uncomment after notes handling is fixed
+            # assert observation.notes == "Initial measurement"
+
+            # ----------
+            # CLEAN UP
+            # ----------
+
+            session.delete(observation)
+            session.delete(sample)
+            session.delete(field_activity)
+            session.delete(field_event)
+            session.commit()

From 70153aad4ba68583e38618788faad56b756d8980 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 22 Dec 2025 11:41:22 -0700
Subject: [PATCH 03/29] refactor: use contact fixture for test bulk water level
 upload

All water levels must be associated with an existing contact in the DB,
so the contact fixture is used instead of hardcoding a name.
---
 tests/test_observation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_observation.py b/tests/test_observation.py
index 90f64433..4ba03402 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -121,7 +121,7 @@ def test_add_groundwater_level_observation(groundwater_level_sample, sensor):
     cleanup_post_test(Observation, data["id"])
 
 
-def test_bulk_upload_groundwater_levels_api(water_well_thing):
+def test_bulk_upload_groundwater_levels_api(water_well_thing, contact):
     csv_content = ",".join(
         [
             "field_staff",
@@ -140,10 +140,10 @@ def test_bulk_upload_groundwater_levels_api(water_well_thing):
     csv_content += "\n"
     csv_content += ",".join(
         [
-            "A Lopez",
+            contact.name,
             water_well_thing.name,
-            "2025-02-15T08:00:00-07:00",
-            "2025-02-15T10:30:00-07:00",
+            "2025-02-15T08:00:00",
+            "2025-02-15T10:30:00",
             "Groundwater Team",
             "electric tape",
             "1.5",

From 2198e8bc85eae643df41f6a004b706f9adf50bea Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 22 Dec 2025 16:14:22 -0700
Subject: [PATCH 04/29] fix: cleanup transfer tests

hanging data prevents tests from running cleanly
---
 tests/transfers/test_contact_with_multiple_wells.py      | 9 ++++++++-
 .../test_water_level_with_unknown_data_quality.py        | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/transfers/test_contact_with_multiple_wells.py b/tests/transfers/test_contact_with_multiple_wells.py
index 4199142e..cd8a39ce 100644
--- a/tests/transfers/test_contact_with_multiple_wells.py
+++ b/tests/transfers/test_contact_with_multiple_wells.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 # ===============================================================================
 
-from db import ThingContactAssociation
+from db import ThingContactAssociation, Thing, Contact, Location
 from db.engine import session_ctx
 from transfers.contact_transfer import ContactTransfer
 from transfers.well_transfer import WellTransferer
@@ -31,5 +31,12 @@ def test_multiple_wells():
     with session_ctx() as sess:
         assert sess.query(ThingContactAssociation).count() == 6
 
+        # cleanup
+        sess.query(Contact).delete()
+        sess.query(ThingContactAssociation).delete()
+        sess.query(Location).delete()
+        sess.query(Thing).delete()
+        sess.commit()
+
 
 # ============= EOF =============================================
diff --git a/tests/transfers/test_water_level_with_unknown_data_quality.py b/tests/transfers/test_water_level_with_unknown_data_quality.py
index 077a95fe..85555ae4 100644
--- a/tests/transfers/test_water_level_with_unknown_data_quality.py
+++ b/tests/transfers/test_water_level_with_unknown_data_quality.py
@@ -34,3 +34,4 @@ def test_water_level_with_unknown_data_quality():
         session.query(Sample).delete()
         session.query(FieldActivity).delete()
         session.query(FieldEvent).delete()
+        session.commit()

From 6a1395e275dc25138db44ae0c0bcaf3688d79d9a Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 22 Dec 2025 16:15:46 -0700
Subject: [PATCH 05/29] refactor: update water level bulk upload

This large refactor updates the water level CSV bulk upload process in
the following ways:

- It uses the updated schema/headers for the CSV
- It uses Pydantic to do all of the data validations that don't pertain to the DB
- It updates tests to reflect these changes
- It tests the services function since that logic is used by both the API and CLI
---
 api/observation.py                     |  10 +-
 schemas/water_level_csv.py             | 131 +++++++-
 services/water_level_csv.py            | 442 ++++++++++++-------------
 tests/conftest.py                      |  21 ++
 tests/services/test_water_level_csv.py | 113 ++++---
 tests/test_observation.py              |  73 ++--
 6 files changed, 448 insertions(+), 342 deletions(-)

diff --git a/api/observation.py b/api/observation.py
index 3b446bd7..5a8e67a8 100644
--- a/api/observation.py
+++ b/api/observation.py
@@ -41,7 +41,7 @@
     UpdateWaterChemistryObservation,
 )
 from schemas.transducer import TransducerObservationWithBlockResponse
-from schemas.water_level_csv import WaterLevelBulkUploadResponse
+from schemas.water_level_csv import WaterLevelBulkUploadPayload
 from services.crud_helper import model_deleter, model_adder
 from services.observation_helper import (
     get_observations,
@@ -90,8 +90,8 @@ async def add_water_chemistry_observation(
 
 @router.post(
     "/groundwater-level/bulk-upload",
-    response_model=WaterLevelBulkUploadResponse,
-    status_code=HTTP_200_OK,
+    response_model=WaterLevelBulkUploadPayload,
+    status_code=HTTP_201_CREATED,
 )
 async def bulk_upload_groundwater_levels(
     user: amp_admin_dependency,
@@ -107,7 +107,9 @@ async def bulk_upload_groundwater_levels(
     result = bulk_upload_water_levels(contents)
 
     if result.exit_code != 0:
-        raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail=result.payload)
+        raise HTTPException(
+            status_code=HTTP_400_BAD_REQUEST, detail=result.payload.model_dump()
+        )
 
     return result.payload
 
diff --git a/schemas/water_level_csv.py b/schemas/water_level_csv.py
index 00d71eaf..f1479933 100644
--- a/schemas/water_level_csv.py
+++ b/schemas/water_level_csv.py
@@ -13,30 +13,139 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-from pydantic import BaseModel
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, field_validator, Field
+from typing import Any
 
+from core.enums import SampleMethod, GroundwaterLevelReason, GroundwaterLevelAccuracy
 
-class WaterLevelBulkUploadSummary(BaseModel):
-    total_rows_processed: int
-    total_rows_imported: int
-    validation_errors_or_warnings: int
+
+class WaterLevelCsvRow(BaseModel):
+    """
+    This class defines the schema for a single row in the water level CSV upload.
+    """
+
+    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
+
+    well_name_point_id: str = Field(
+        description="Name/PointID of the well where the measurement was taken."
+    )
+    field_event_date_time: datetime = Field(
+        description="Date and time when the field event occurred."
+    )
+    field_staff: str = Field(description="Name of the person who led the field event.")
+    field_staff_2: str | None = Field(
+        description="Name of the second person who participated in the field event.",
+        default=None,
+    )
+    field_staff_3: str | None = Field(
+        description="Name of the third person who participated in the field event.",
+        default=None,
+    )
+    water_level_date_time: datetime = Field(
+        description="Date and time when the water level measurement was taken."
+    )
+    measuring_person: str = Field(
+        description="Person who took the water level measurement. They must be one of the field staff"
+    )
+    sample_method: SampleMethod = Field(
+        description="Method used to measure the water level."
+    )
+    mp_height: float = Field(
+        description="Measuring point height relative to the ground surface in feet."
+    )
+    level_status: GroundwaterLevelReason = Field(
+        description="Status of the water level."
+    )
+    depth_to_water_ft: float = Field(description="Depth to water in feet.")
+    data_quality: GroundwaterLevelAccuracy = Field(
+        description="A description of the accuracy of the data."
+    )
+    water_level_notes: str | None = Field(
+        description="Additional notes about the water level measurement.", default=None
+    )
+
+    @field_validator("water_level_notes", mode="before")
+    @classmethod
+    def _empty_to_none(cls, value: str | None) -> str | None:
+        if value is None:
+            return None
+        if isinstance(value, str) and value.strip() == "":
+            return None
+        return value
+
+    @field_validator("measuring_person")
+    @classmethod
+    def ensure_measuring_person_is_field_staff(
+        cls, value: str, values: dict[str, Any]
+    ) -> str:
+        data = values.data
+        field_staffs = [
+            data.get("field_staff"),
+            data.get("field_staff_2"),
+            data.get("field_staff_3"),
+        ]
+        if value not in field_staffs:
+            raise ValueError("measuring_person must be one of the field staff")
+        return value
+
+
+class WaterLevelBulkUploadRow(WaterLevelCsvRow):
+    """
+    This class extends WaterLevelCsvRow to include resolved database objects
+    for easier processing during bulk upload.
+    """
+
+    well: Any = Field(description="The Thing object representing the well.")
+    field_staff_contact: Any = Field(
+        description="The Contact object for the field staff."
+    )
+    field_staff_2_contact: Any | None = Field(
+        description="The Contact object for the second field staff."
+    )
+    field_staff_3_contact: Any | None = Field(
+        description="The Contact object for the third field staff."
+    )
+    measuring_person_field_staff_index: int = Field(
+        description="The index of the field staff who is the measuring person: 1, 2, or 3."
+    )
 
 
-class WaterLevelBulkUploadRow(BaseModel):
+class WaterLevelCreatedRow(BaseModel):
+    """
+    This class defines the structure of a successfully created water level row
+    during bulk upload.
+    """
+
     well_name_point_id: str
     field_event_id: int
     field_activity_id: int
+    field_event_participant_1_id: int
+    field_event_participant_2_id: int | None
+    field_event_participant_3_id: int | None
     sample_id: int
     observation_id: int
-    measurement_date_time: str
-    level_status: str
-    data_quality: str
+    groundwater_level_reason: str
+    groundwater_level_accuracy: str
 
 
-class WaterLevelBulkUploadResponse(BaseModel):
+class WaterLevelBulkUploadSummary(BaseModel):
+    total_rows_processed: int
+    total_rows_imported: int
+    total_validation_errors_or_warnings: int
+
+
+class WaterLevelBulkUploadPayload(BaseModel):
     summary: WaterLevelBulkUploadSummary
-    water_levels: list[WaterLevelBulkUploadRow]
+    water_levels: list[WaterLevelCreatedRow]
     validation_errors: list[str]
 
 
+class WaterLevelBulkUploadResponse(BaseModel):
+    exit_code: int
+    stdout: str
+    stderr: str
+    payload: WaterLevelBulkUploadPayload
+
+
 # ============= EOF =============================================
diff --git a/services/water_level_csv.py b/services/water_level_csv.py
index ff49fe12..24bcfce8 100644
--- a/services/water_level_csv.py
+++ b/services/water_level_csv.py
@@ -19,163 +19,58 @@
 import io
 import json
 import uuid
-from dataclasses import dataclass
-from datetime import datetime
 from pathlib import Path
-from typing import Any, BinaryIO, Iterable, List
+from typing import Any, BinaryIO
 
-from pydantic import BaseModel, ConfigDict, ValidationError, field_validator
+from pydantic import ValidationError
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
-from db import Thing, FieldEvent, FieldActivity, Sample, Observation, Parameter
+from db import (
+    Thing,
+    FieldEvent,
+    FieldActivity,
+    Sample,
+    Observation,
+    Parameter,
+    Contact,
+    FieldEventParticipant,
+)
 from db.engine import session_ctx
-
-# Required CSV columns for the bulk upload
-REQUIRED_FIELDS: List[str] = [
-    "field_staff",
-    "well_name_point_id",
-    "field_event_date_time",
-    "measurement_date_time",
-    "sampler",
-    "sample_method",
-    "mp_height",
-    "level_status",
-    "depth_to_water_ft",
-    "data_quality",
+from schemas.water_level_csv import (
+    WaterLevelCsvRow,
+    WaterLevelBulkUploadRow,
+    WaterLevelBulkUploadResponse,
+    WaterLevelCreatedRow,
+    WaterLevelBulkUploadSummary,
+    WaterLevelBulkUploadPayload,
+)
+
+REQUIRED_FIELDS = [
+    key
+    for key in WaterLevelCsvRow.model_fields.keys()
+    if WaterLevelCsvRow.model_fields[key].default is not None
 ]
 
-# Allow-list values for validation. These represent early MVP lexicon values.
-VALID_LEVEL_STATUSES = {"stable", "rising", "falling"}
-VALID_DATA_QUALITIES = {"approved", "provisional"}
-VALID_SAMPLERS = {"groundwater team", "consultant"}
-
-# Mapping between human-friendly sample methods provided in CSV uploads and
-# their canonical lexicon terms stored in the database.
-SAMPLE_METHOD_ALIASES = {
-    "electric tape": "Electric tape measurement (E-probe)",
-    "steel tape": "Steel-tape measurement",
-}
-SAMPLE_METHOD_CANONICAL = {
-    value.lower(): value for value in SAMPLE_METHOD_ALIASES.values()
-}
-
-
-@dataclass
-class BulkUploadResult:
-    exit_code: int
-    stdout: str
-    stderr: str
-    payload: dict[str, Any]
-
-
-@dataclass
-class _ValidatedRow:
-    row_index: int
-    raw: dict[str, str]
-    well: Thing
-    field_staff: str
-    sampler: str
-    sample_method_term: str
-    field_event_dt: datetime
-    measurement_dt: datetime
-    mp_height: float
-    depth_to_water_ft: float
-    level_status: str
-    data_quality: str
-    water_level_notes: str | None
-
-
-class WaterLevelCsvRow(BaseModel):
-    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
-
-    field_staff: str
-    well_name_point_id: str
-    field_event_date_time: datetime
-    measurement_date_time: datetime
-    sampler: str
-    sample_method: str
-    mp_height: float
-    level_status: str
-    depth_to_water_ft: float
-    data_quality: str
-    water_level_notes: str | None = None
-
-    @field_validator(
-        "field_staff",
-        "well_name_point_id",
-        "sampler",
-        "sample_method",
-        "level_status",
-        "data_quality",
-    )
-    @classmethod
-    def _require_value(cls, value: str) -> str:
-        if value is None or value == "":
-            raise ValueError("value is required")
-        return value
-
-    @field_validator("sampler")
-    @classmethod
-    def _validate_sampler(cls, value: str) -> str:
-        if value.lower() not in VALID_SAMPLERS:
-            raise ValueError(
-                f"Invalid sampler '{value}'. Expected one of: {sorted(VALID_SAMPLERS)}"
-            )
-        return value
-
-    @field_validator("level_status")
-    @classmethod
-    def _validate_level_status(cls, value: str) -> str:
-        if value.lower() not in VALID_LEVEL_STATUSES:
-            raise ValueError(
-                f"Invalid level_status '{value}'. Expected one of: {sorted(VALID_LEVEL_STATUSES)}"
-            )
-        return value
-
-    @field_validator("data_quality")
-    @classmethod
-    def _validate_data_quality(cls, value: str) -> str:
-        if value.lower() not in VALID_DATA_QUALITIES:
-            raise ValueError(
-                f"Invalid data_quality '{value}'. Expected one of: {sorted(VALID_DATA_QUALITIES)}"
-            )
-        return value
-
-    @field_validator("sample_method")
-    @classmethod
-    def _normalize_sample_method(cls, value: str) -> str:
-        normalized = value.lower()
-        if normalized in SAMPLE_METHOD_ALIASES:
-            return SAMPLE_METHOD_ALIASES[normalized]
-        if normalized in SAMPLE_METHOD_CANONICAL:
-            return SAMPLE_METHOD_CANONICAL[normalized]
-        raise ValueError(
-            f"Invalid sample_method '{value}'. Expected one of: {sorted(SAMPLE_METHOD_ALIASES.keys())}"
-        )
-
-    @field_validator("water_level_notes", mode="before")
-    @classmethod
-    def _empty_to_none(cls, value: str | None) -> str | None:
-        if value is None:
-            return None
-        if isinstance(value, str) and value.strip() == "":
-            return None
-        return value
-
 
 def bulk_upload_water_levels(
     source_file: str | Path | bytes | BinaryIO, *, pretty_json: bool = False
-) -> BulkUploadResult:
+) -> WaterLevelBulkUploadResponse:
     """Parse a CSV of water-level measurements and write database rows."""
 
     try:
         headers, csv_rows = _read_csv(source_file)
     except FileNotFoundError:
         msg = f"File not found: {source_file}"
-        payload = _build_payload([], [], 0, 0, [msg])
+        payload = WaterLevelBulkUploadPayload(
+            summary=WaterLevelBulkUploadSummary(0, 0, 0),
+            water_levels=[],
+            validation_errors=[],
+        )
         stdout = _serialize_payload(payload, pretty_json)
-        return BulkUploadResult(exit_code=1, stdout=stdout, stderr=msg, payload=payload)
+        return WaterLevelBulkUploadResponse(
+            exit_code=1, stdout=stdout, stderr=msg, payload=payload
+        )
 
     validation_errors: list[str] = []
     created_rows: list[dict[str, Any]] = []
@@ -202,44 +97,28 @@ def bulk_upload_water_levels(
         if validation_errors:
             session.rollback()
 
-    summary = {
-        "total_rows_processed": len(csv_rows),
-        "total_rows_imported": len(created_rows) if not validation_errors else 0,
-        "validation_errors_or_warnings": len(validation_errors),
-    }
-    payload = _build_payload(
-        csv_rows, created_rows, **summary, errors=validation_errors
+    summary = WaterLevelBulkUploadSummary(
+        total_rows_processed=len(csv_rows),
+        total_rows_imported=len(created_rows) if not validation_errors else 0,
+        total_validation_errors_or_warnings=len(validation_errors),
     )
+
+    payload = WaterLevelBulkUploadPayload(
+        summary=summary,
+        water_levels=created_rows,
+        validation_errors=validation_errors,
+    )
+
     stdout = _serialize_payload(payload, pretty_json)
     stderr = "\n".join(validation_errors)
     exit_code = 0 if not validation_errors else 1
-    return BulkUploadResult(
+    return WaterLevelBulkUploadResponse(
         exit_code=exit_code, stdout=stdout, stderr=stderr, payload=payload
     )
 
 
-def _serialize_payload(payload: dict[str, Any], pretty: bool) -> str:
-    return json.dumps(payload, indent=2 if pretty else None)
-
-
-def _build_payload(
-    csv_rows: Iterable[dict[str, Any]],
-    created_rows: list[dict[str, Any]],
-    total_rows_processed: int,
-    total_rows_imported: int,
-    validation_errors_or_warnings: int,
-    *,
-    errors: list[str],
-) -> dict[str, Any]:
-    return {
-        "summary": {
-            "total_rows_processed": total_rows_processed,
-            "total_rows_imported": total_rows_imported,
-            "validation_errors_or_warnings": validation_errors_or_warnings,
-        },
-        "water_levels": created_rows,
-        "validation_errors": errors,
-    }
+def _serialize_payload(payload: WaterLevelBulkUploadPayload, pretty: bool) -> str:
+    return json.dumps(payload.model_dump(), indent=2 if pretty else None)
 
 
 def _read_csv(
@@ -279,24 +158,28 @@ def _validate_headers(headers: list[str]) -> list[str]:
 
 def _validate_rows(
     session: Session, rows: list[dict[str, str]]
-) -> tuple[list[_ValidatedRow], list[str]]:
-    valid_rows: list[_ValidatedRow] = []
-    errors: list[str] = []
-
-    wells_by_name: dict[str, Thing] = {}
+) -> tuple[list[WaterLevelBulkUploadRow], list[str]]:
+    # Caches to avoid repeated DB lookups
+    contacts_by_name_cache: dict[str, Contact] = {}
+    wells_by_name_cache: dict[str, Thing] = {}
 
+    valid_rows: list[WaterLevelBulkUploadRow] = []
+    errors: list[str] = []
     for idx, raw_row in enumerate(rows, start=1):
-        normalized = {k: (v or "").strip() for k, v in raw_row.items() if k is not None}
+        # Normalize whitespace in all fields
+        normalized_row = {k: (v or "").strip() for k, v in raw_row.items()}
 
-        missing = [field for field in REQUIRED_FIELDS if not normalized.get(field)]
-        if missing:
-            errors.extend(
-                [f"Row {idx}: Missing required field '{field}'" for field in missing]
-            )
-            continue
+        """
+        Developer's note
 
+        Pydantic handles all of the validation logic, including type 
+        conversions and required field checks. If a field is missing or has an
+        invalid value, Pydantic will raise a ValidationError, which we catch
+        and convert into a user-friendly error message.
+        """
         try:
-            model = WaterLevelCsvRow(**normalized)
+            model = WaterLevelCsvRow(**normalized_row)
+            WaterLevelCsvRow.model_validate(model)
         except ValidationError as exc:
             for err in exc.errors():
                 location = ".".join(str(part) for part in err["loc"])
@@ -304,108 +187,193 @@ def _validate_rows(
                 errors.append(f"Row {idx}: {location} - {message}")
             continue
 
+        # Verify that the well exists in the database
         well_name = model.well_name_point_id
-        well = wells_by_name.get(well_name)
+        well = wells_by_name_cache.get(well_name, None)
         if well is None:
             sql = select(Thing).where(Thing.name == well_name)
             well = session.scalars(sql).one_or_none()
             if well is None:
                 errors.append(f"Row {idx}: Unknown well_name_point_id '{well_name}'")
                 continue
-            wells_by_name[well_name] = well
-
-        valid_rows.append(
-            _ValidatedRow(
-                row_index=idx,
-                raw={**normalized},
-                well=well,
-                field_staff=model.field_staff,
-                sampler=model.sampler,
-                sample_method_term=model.sample_method,
-                field_event_dt=model.field_event_date_time,
-                measurement_dt=model.measurement_date_time,
-                mp_height=model.mp_height,
-                depth_to_water_ft=model.depth_to_water_ft,
-                level_status=model.level_status,
-                data_quality=model.data_quality,
-                water_level_notes=model.water_level_notes,
+            wells_by_name_cache[well_name] = well
+
+        # verify that the well depth is greater than the water level depth bgs
+        if well.well_depth <= model.depth_to_water_ft - model.mp_height:
+            errors.append(
+                f"Row {idx}: well_depth ({well.well_depth} ft) must be greater than depth_to_water_ft ({model.depth_to_water_ft} ft) minus mp_height ({model.mp_height} ft)"
             )
+            continue
+
+        # Verify that the field staff are in the database
+        """
+        Developer's note
+
+        This has to be repeated for each field staff person not in a for loop because field_staff_2 and _3 can be None
+        """
+        field_staff_name = model.field_staff
+        field_staff_contact = contacts_by_name_cache.get(field_staff_name, None)
+        if field_staff_contact is None:
+            sql = select(Contact).where(Contact.name == field_staff_name)
+            field_staff_contact = session.scalars(sql).one_or_none()
+            if field_staff_contact is None:
+                errors.append(f"Row {idx}: Unknown field_staff '{field_staff_name}'")
+                continue
+            contacts_by_name_cache[field_staff_name] = field_staff_contact
+
+        if model.field_staff_2:
+            field_staff_2_name = model.field_staff_2
+            field_staff_2_contact = contacts_by_name_cache.get(field_staff_2_name, None)
+            if field_staff_2_contact is None:
+                sql = select(Contact).where(Contact.name == field_staff_2_name)
+                field_staff_2_contact = session.scalars(sql).one_or_none()
+                if field_staff_2_contact is None:
+                    errors.append(
+                        f"Row {idx}: Unknown field_staff_2 '{field_staff_2_name}'"
+                    )
+                    continue
+                contacts_by_name_cache[field_staff_2_name] = field_staff_2_contact
+        else:
+            field_staff_2_contact = None
+
+        if model.field_staff_3:
+            field_staff_3_name = model.field_staff_3
+            field_staff_3_contact = contacts_by_name_cache.get(field_staff_3_name, None)
+            if field_staff_3_contact is None:
+                sql = select(Contact).where(Contact.name == field_staff_3_name)
+                field_staff_3_contact = session.scalars(sql).one_or_none()
+                if field_staff_3_contact is None:
+                    errors.append(
+                        f"Row {idx}: Unknown field_staff_3 '{field_staff_3_name}'"
+                    )
+                    continue
+                contacts_by_name_cache[field_staff_3_name] = field_staff_3_contact
+        else:
+            field_staff_3_contact = None
+
+        # The Pydantic schema ensures that measuring_person is one of the field staff
+        if model.measuring_person == model.field_staff:
+            measuring_person_field_staff_index = 1
+        elif model.measuring_person == model.field_staff_2:
+            measuring_person_field_staff_index = 2
+        else:
+            measuring_person_field_staff_index = 3
+
+        valid_model = WaterLevelBulkUploadRow(
+            **model.model_dump(),
+            well=well,
+            field_staff_contact=field_staff_contact,
+            field_staff_2_contact=field_staff_2_contact,
+            field_staff_3_contact=field_staff_3_contact,
+            measuring_person_field_staff_index=measuring_person_field_staff_index,
         )
 
+        valid_rows.append(valid_model)
+
     return valid_rows, errors
 
 
 def _create_records(
-    session: Session, parameter_id: int, rows: list[_ValidatedRow]
+    session: Session, parameter_id: int, rows: list[WaterLevelBulkUploadRow]
 ) -> list[dict[str, Any]]:
     created: list[dict[str, Any]] = []
 
     for row in rows:
+        # FieldEvent
         field_event = FieldEvent(
             thing=row.well,
-            event_date=row.field_event_dt,
-            notes=_build_field_event_notes(row),
+            event_date=row.field_event_date_time,
         )
+        session.add(field_event)
+
+        # FieldActivity, FieldEventParticipant, Sample, Observation
         field_activity = FieldActivity(
             field_event=field_event,
             activity_type="groundwater level",
-            notes=f"Sampler: {row.sampler}",
         )
+        session.add(field_activity)
+
+        # FieldEventParticipants
+        field_event_participant_1 = FieldEventParticipant(
+            field_event=field_event,
+            participant=row.field_staff_contact,
+            participant_role="Lead",
+        )
+        if row.field_staff_2_contact:
+            field_event_participant_2 = FieldEventParticipant(
+                field_event=field_event,
+                participant=row.field_staff_2_contact,
+                participant_role="Participant",
+            )
+            session.add(field_event_participant_2)
+        else:
+            field_event_participant_2 = None
+        if row.field_staff_3_contact:
+            field_event_participant_3 = FieldEventParticipant(
+                field_event=field_event,
+                participant=row.field_staff_3_contact,
+                participant_role="Participant",
+            )
+            session.add(field_event_participant_3)
+        else:
+            field_event_participant_3 = None
+
+        # Sample
+        if row.measuring_person_field_staff_index == 1:
+            sample_field_event_participant = field_event_participant_1
+        elif row.measuring_person_field_staff_index == 2:
+            sample_field_event_participant = field_event_participant_2
+        else:
+            sample_field_event_participant = field_event_participant_3
+
         sample = Sample(
             field_activity=field_activity,
-            sample_date=row.measurement_dt,
+            field_event_participant=sample_field_event_participant,
+            sample_date=row.water_level_date_time,
             sample_name=f"wl-{uuid.uuid4()}",
             sample_matrix="water",
-            sample_method=row.sample_method_term,
+            sample_method=row.sample_method,
             qc_type="Normal",
-            notes=row.water_level_notes,
         )
+        session.add(sample)
+
+        # Observation
         observation = Observation(
             sample=sample,
-            observation_datetime=row.measurement_dt,
+            observation_datetime=row.water_level_date_time,
             parameter_id=parameter_id,
             value=row.depth_to_water_ft,
             unit="ft",
             measuring_point_height=row.mp_height,
-            groundwater_level_reason=None,
-            notes=_build_observation_notes(row),
+            groundwater_level_reason=row.level_status,
+            groundwater_level_accuracy=row.data_quality,
+            notes=row.water_level_notes,
         )
-        session.add(field_event)
-        session.add(field_activity)
-        session.add(sample)
         session.add(observation)
         session.flush()
 
         created.append(
-            {
-                "well_name_point_id": row.raw["well_name_point_id"],
-                "field_event_id": field_event.id,
-                "field_activity_id": field_activity.id,
-                "sample_id": sample.id,
-                "observation_id": observation.id,
-                "measurement_date_time": row.raw["measurement_date_time"],
-                "level_status": row.level_status,
-                "data_quality": row.data_quality,
-            }
+            WaterLevelCreatedRow(
+                well_name_point_id=row.well_name_point_id,
+                field_event_id=field_event.id,
+                field_activity_id=field_activity.id,
+                field_event_participant_1_id=field_event_participant_1.id,
+                field_event_participant_2_id=(
+                    field_event_participant_2.id if field_event_participant_2 else None
+                ),
+                field_event_participant_3_id=(
+                    field_event_participant_3.id if field_event_participant_3 else None
+                ),
+                sample_id=sample.id,
+                observation_id=observation.id,
+                groundwater_level_reason=row.level_status,
+                groundwater_level_accuracy=row.data_quality,
+            )
         )
 
     return created
 
 
-def _build_field_event_notes(row: _ValidatedRow) -> str | None:
-    parts = [f"Field staff: {row.field_staff}"]
-    if row.water_level_notes:
-        parts.append(row.water_level_notes)
-    notes = " | ".join(part for part in parts if part)
-    return notes or None
-
-
-def _build_observation_notes(row: _ValidatedRow) -> str | None:
-    parts = [f"Level status: {row.level_status}", f"Data quality: {row.data_quality}"]
-    notes = " | ".join(parts)
-    return notes or None
-
-
 def _get_groundwater_level_parameter_id(session: Session) -> int:
     sql = select(Parameter.id).where(Parameter.parameter_name == "groundwater level")
     parameter_id = session.scalars(sql).one_or_none()
diff --git a/tests/conftest.py b/tests/conftest.py
index e0d9eff9..68d3202c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1082,3 +1082,24 @@ def second_lexicon_triple(third_lexicon_term, fourth_lexicon_term):
         yield triple
         session.delete(triple)
         session.commit()
+
+
+@pytest.fixture()
+def water_level_bulk_upload_data(water_well_thing, contact, second_contact):
+    data = {
+        "well_name_point_id": water_well_thing.name,
+        "field_event_date_time": "2025-02-15T08:00:00",
+        "field_staff": contact.name,
+        "field_staff_2": second_contact.name,
+        "field_staff_3": "",
+        "water_level_date_time": "2025-02-15T10:30:00",
+        "measuring_person": contact.name,
+        "sample_method": "Electric tape measurement (E-probe)",
+        "mp_height": "1.5",
+        "level_status": "Water level not affected",
+        "depth_to_water_ft": "7",
+        "data_quality": "Water level accurate to within two hundreths of a foot",
+        "water_level_notes": "Initial measurement",
+    }
+    yield data
+    del data
diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index 268de7f7..ad4d0aef 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -4,14 +4,16 @@
 from db import (
     FieldEvent,
     FieldActivity,
-    # FieldEventParticipant,
+    FieldEventParticipant,
     Sample,
     Observation,
 )
 from services.water_level_csv import bulk_upload_water_levels
 
 
-def test_bulk_upload(water_well_thing, contact):
+def test_bulk_upload(
+    water_level_bulk_upload_data, water_well_thing, contact, second_contact
+):
     """
     The bulk upload function is used both by the API and the CLI, so it is tested
     separately here assuming that the functionality is the same. This assumes that
@@ -22,37 +24,10 @@ def test_bulk_upload(water_well_thing, contact):
     # write to a CSV file in memory then delete it after processing
     # this is being done to avoid filesystem dependencies in tests and
     # to use the contact fixture for the field staff
-    csv_content = ",".join(
-        [
-            "field_staff",
-            "well_name_point_id",
-            "field_event_date_time",
-            "measurement_date_time",
-            "sampler",
-            "sample_method",
-            "mp_height",
-            "level_status",
-            "depth_to_water_ft",
-            "data_quality",
-            "water_level_notes",
-        ]
-    )
-    csv_content += "\n"
-    csv_content += ",".join(
-        [
-            contact.name,
-            water_well_thing.name,
-            "2025-02-15T08:00:00",
-            "2025-02-15T10:30:00",
-            "Groundwater Team",
-            "electric tape",
-            "1.5",
-            "stable",
-            "45.2",
-            "approved",
-            "Initial measurement",
-        ]
-    )
+    csv_headers = list(water_level_bulk_upload_data.keys())
+    csv_values = list(water_level_bulk_upload_data.values())
+
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
 
     with tempfile.NamedTemporaryFile(
         mode="w+", encoding="utf-8", delete_on_close=True
@@ -65,8 +40,8 @@ def test_bulk_upload(water_well_thing, contact):
 
         assert results.exit_code == 0
 
-        assert len(results.payload["water_levels"]) == 1
-        created_records = results.payload["water_levels"][0]
+        assert len(results.payload.water_levels) == 1
+        created_records = results.payload.water_levels[0]
 
         # verify the data was inserted correctly and then clean up
         with session_ctx() as session:
@@ -75,55 +50,87 @@ def test_bulk_upload(water_well_thing, contact):
             # ----------
 
             # FieldEvent
-            field_event = session.get(FieldEvent, created_records["field_event_id"])
+            field_event = session.get(FieldEvent, created_records.field_event_id)
             assert field_event is not None
             assert field_event.thing_id == water_well_thing.id
             # TODO: uncomment after timezone handling is fixed
             # assert field_event.event_date.isoformat() == "2025-02-15T15:00:00+00:00"
-            assert field_event.event_date.isoformat() == "2025-02-15T08:00:00+00:00"
+            assert (
+                field_event.event_date.isoformat()
+                == water_level_bulk_upload_data["field_event_date_time"] + "+00:00"
+            )
 
             # FieldActivity
             field_activity = session.get(
-                FieldActivity, created_records["field_activity_id"]
+                FieldActivity, created_records.field_activity_id
             )
             assert field_activity is not None
             assert field_activity.activity_type == "groundwater level"
 
-            # FieldEventParticipant
+            # FieldEventParticipants
+            field_event_participant_1 = session.get(
+                FieldEventParticipant, created_records.field_event_participant_1_id
+            )
+            assert field_event_participant_1 is not None
+            assert field_event_participant_1.contact_id == contact.id
+            assert field_event_participant_1.field_event_id == field_event.id
+            assert field_event_participant_1.participant_role == "Lead"
+
+            field_event_participant_2 = session.get(
+                FieldEventParticipant, created_records.field_event_participant_2_id
+            )
+            assert field_event_participant_2 is not None
+            assert field_event_participant_2.contact_id == second_contact.id
+            assert field_event_participant_2.field_event_id == field_event.id
+            assert field_event_participant_2.participant_role == "Participant"
+
+            assert created_records.field_event_participant_3_id is None
 
             # Sample
-            sample = session.get(Sample, created_records["sample_id"])
+            sample = session.get(Sample, created_records.sample_id)
             assert sample is not None
             assert sample.field_activity_id == field_activity.id
             # TODO: uncomment after timezone handling is fixed
             # assert sample.sample_date.isoformat() == "2025-02-15T17:30:00+00:00"
-            assert sample.sample_date.isoformat() == "2025-02-15T10:30:00+00:00"
+            assert (
+                sample.sample_date.isoformat()
+                == water_level_bulk_upload_data["water_level_date_time"] + "+00:00"
+            )
             assert sample.sample_name[0:3] == "wl-"
             assert sample.sample_matrix == "water"
-            assert sample.sample_method == "Electric tape measurement (E-probe)"
+            assert sample.sample_method == water_level_bulk_upload_data["sample_method"]
             assert sample.qc_type == "Normal"
             assert sample.depth_top is None
             assert sample.depth_bottom is None
-            # TODO: uncomment after notes handling is fixed
-            # assert sample.notes is None
 
             # Observation
-            observation = session.get(Observation, created_records["observation_id"])
+            observation = session.get(Observation, created_records.observation_id)
             assert observation is not None
             assert observation.sample_id == sample.id
             # TODO: uncomment after timezone handling is fixed
             # assert observation.observation_datetime.isoformat() == "2025-02-15T17:30:00+00:00"
             assert (
                 observation.observation_datetime.isoformat()
-                == "2025-02-15T10:30:00+00:00"
+                == water_level_bulk_upload_data["water_level_date_time"] + "+00:00"
+            )
+            assert observation.value == float(
+                water_level_bulk_upload_data["depth_to_water_ft"]
             )
-            assert observation.value == 45.2
             assert observation.unit == "ft"
-            assert observation.measuring_point_height == 1.5
-            assert observation.groundwater_level_reason is None
-            assert observation.groundwater_level_accuracy is None
-            # TODO: uncomment after notes handling is fixed
-            # assert observation.notes == "Initial measurement"
+            assert observation.measuring_point_height == float(
+                water_level_bulk_upload_data["mp_height"]
+            )
+            assert (
+                observation.groundwater_level_reason
+                == water_level_bulk_upload_data["level_status"]
+            )
+            assert (
+                observation.groundwater_level_accuracy
+                == water_level_bulk_upload_data["data_quality"]
+            )
+            assert (
+                observation.notes == water_level_bulk_upload_data["water_level_notes"]
+            )
 
             # ----------
             # CLEAN UP
@@ -132,5 +139,7 @@ def test_bulk_upload(water_well_thing, contact):
             session.delete(observation)
             session.delete(sample)
             session.delete(field_activity)
+            session.delete(field_event_participant_1)
+            session.delete(field_event_participant_2)
             session.delete(field_event)
             session.commit()
diff --git a/tests/test_observation.py b/tests/test_observation.py
index 4ba03402..b7170282 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -25,7 +25,7 @@
     amp_editor_function,
     viewer_function,
 )
-from db import Observation, FieldEvent, FieldActivity, Sample
+from db import Observation, FieldEvent, FieldActivity, Sample, FieldEventParticipant
 from db.engine import session_ctx
 from main import app
 from schemas import DT_FMT
@@ -121,38 +121,11 @@ def test_add_groundwater_level_observation(groundwater_level_sample, sensor):
     cleanup_post_test(Observation, data["id"])
 
 
-def test_bulk_upload_groundwater_levels_api(water_well_thing, contact):
-    csv_content = ",".join(
-        [
-            "field_staff",
-            "well_name_point_id",
-            "field_event_date_time",
-            "measurement_date_time",
-            "sampler",
-            "sample_method",
-            "mp_height",
-            "level_status",
-            "depth_to_water_ft",
-            "data_quality",
-            "water_level_notes",
-        ]
-    )
-    csv_content += "\n"
-    csv_content += ",".join(
-        [
-            contact.name,
-            water_well_thing.name,
-            "2025-02-15T08:00:00",
-            "2025-02-15T10:30:00",
-            "Groundwater Team",
-            "electric tape",
-            "1.5",
-            "stable",
-            "45.2",
-            "approved",
-            "Initial measurement",
-        ]
-    )
+def test_bulk_upload_groundwater_levels_api(water_level_bulk_upload_data):
+    csv_headers = list(water_level_bulk_upload_data.keys())
+    csv_values = list(water_level_bulk_upload_data.values())
+
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
 
     files = {
         "file": ("water_levels.csv", csv_content, "text/csv"),
@@ -160,26 +133,50 @@ def test_bulk_upload_groundwater_levels_api(water_well_thing, contact):
 
     response = client.post("/observation/groundwater-level/bulk-upload", files=files)
     data = response.json()
-    assert response.status_code == 200
+    assert response.status_code == 201
     assert data["summary"]["total_rows_imported"] == 1
     assert data["summary"]["total_rows_processed"] == 1
-    assert data["summary"]["validation_errors_or_warnings"] == 0
+    assert data["summary"]["total_validation_errors_or_warnings"] == 0
     assert data["validation_errors"] == []
     row = data["water_levels"][0]
-    assert row["well_name_point_id"] == water_well_thing.name
+    assert (
+        row["well_name_point_id"] == water_level_bulk_upload_data["well_name_point_id"]
+    )
 
     with session_ctx() as session:
-        observation = session.get(Observation, row["observation_id"])
-        assert observation is not None
         # cleanup in reverse dependency order
+        observation = session.get(Observation, row["observation_id"])
         if observation:
             session.delete(observation)
+
         sample = session.get(Sample, row["sample_id"])
         if sample:
             session.delete(sample)
+
+        field_event_participant_1 = session.get(
+            FieldEventParticipant, row["field_event_participant_1_id"]
+        )
+        if field_event_participant_1:
+            session.delete(field_event_participant_1)
+
+        if row["field_event_participant_2_id"]:
+            field_event_participant_2 = session.get(
+                FieldEventParticipant, row["field_event_participant_2_id"]
+            )
+            if field_event_participant_2:
+                session.delete(field_event_participant_2)
+
+        if row["field_event_participant_3_id"]:
+            field_event_participant_3 = session.get(
+                FieldEventParticipant, row["field_event_participant_3_id"]
+            )
+            if field_event_participant_3:
+                session.delete(field_event_participant_3)
+
         field_activity = session.get(FieldActivity, row["field_activity_id"])
         if field_activity:
             session.delete(field_activity)
+
         field_event = session.get(FieldEvent, row["field_event_id"])
         if field_event:
             session.delete(field_event)

From 70d0e31966d2707fa7ac741dc31f2dccb84b572f Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 08:49:56 -0700
Subject: [PATCH 06/29] refactor: remove redundant water level test in CLI

The logic to test if the bulk upload function works correctly is already covered in its own unit tests. The CLI test was redundant and has been removed to streamline the test suite.
---
 tests/test_cli_commands.py | 99 ++++----------------------------------
 1 file changed, 9 insertions(+), 90 deletions(-)

diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 13e99114..8c648015 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -15,16 +15,11 @@
 # ===============================================================================
 from __future__ import annotations
 
-import textwrap
-import uuid
 from pathlib import Path
 
 from click.testing import CliRunner
-from sqlalchemy import select
 
 from cli.cli import cli
-from db import FieldActivity, FieldEvent, Observation, Sample
-from db.engine import session_ctx
 
 
 def test_initialize_lexicon_invokes_initializer(monkeypatch):
@@ -103,6 +98,15 @@ def fake_upload(file_path, *, pretty_json=False):
 
 
 def test_water_levels_bulk_upload_json_output(monkeypatch, tmp_path):
+    """
+    Developer's note
+
+    The function that uploads water levels from CSV files has its own unit tests that
+    verify that the database rows are created correctly. Since the CLI command simply
+    calls that function, an end-to-end test here is not needed. The test here verifies
+    that the CLI can be invoked and that the response is as expected.
+    """
+
     csv_file = tmp_path / "water_levels.csv"
     csv_file.write_text("col\nvalue\n")
     captured = {}
@@ -132,89 +136,4 @@ def fake_upload(file_path, *, pretty_json=False):
     assert captured["pretty_json"] is True
 
 
-def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
-    """
-    End-to-end CLI invocation should create FieldEvent, Sample, and Observation rows.
-    """
-
-    def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(
-            f"""\
-            field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
-            CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """
-        )
-        path.write_text(csv_text)
-
-    unique_notes = f"pytest-{uuid.uuid4()}"
-    csv_file = tmp_path / "water_levels.csv"
-    _write_csv(csv_file, well_name=water_well_thing.name, notes=unique_notes)
-
-    runner = CliRunner()
-    result = runner.invoke(
-        cli, ["water-levels", "bulk-upload", "--file", str(csv_file)]
-    )
-
-    assert result.exit_code == 0, result.output
-
-    created_ids: dict[str, int] = {}
-    with session_ctx() as session:
-        stmt = (
-            select(Observation)
-            .join(Observation.sample)
-            .join(Sample.field_activity)
-            .join(FieldActivity.field_event)
-            .where(Sample.notes == unique_notes)
-        )
-        observations = session.scalars(stmt).all()
-        assert len(observations) == 1, "Expected one observation for the uploaded CSV"
-
-        observation = observations[0]
-        sample = observation.sample
-        field_activity = sample.field_activity
-        field_event = field_activity.field_event
-
-        assert field_event.thing_id == water_well_thing.id
-        assert sample.sample_method == "Electric tape measurement (E-probe)"
-        assert sample.sample_matrix == "water"
-        assert observation.value == 42.5
-        assert observation.measuring_point_height == 1.5
-        assert observation.notes == "Level status: stable | Data quality: approved"
-        assert (
-            field_event.notes == f"Field staff: CLI Tester | {unique_notes}"
-        ), "Field event notes should capture field staff and notes"
-
-        created_ids = {
-            "observation_id": observation.id,
-            "sample_id": sample.id,
-            "field_activity_id": field_activity.id,
-            "field_event_id": field_event.id,
-        }
-
-    if created_ids:
-        # Clean up committed rows so other tests see a pristine database.
-        with session_ctx() as session:
-            observation = session.get(Observation, created_ids["observation_id"])
-            sample = session.get(Sample, created_ids["sample_id"])
-            field_activity = session.get(
-                FieldActivity, created_ids["field_activity_id"]
-            )
-            field_event = session.get(FieldEvent, created_ids["field_event_id"])
-
-            if observation:
-                session.delete(observation)
-                session.flush()
-            if sample:
-                session.delete(sample)
-                session.flush()
-            if field_activity:
-                session.delete(field_activity)
-                session.flush()
-            if field_event:
-                session.delete(field_event)
-                session.flush()
-
-            session.commit()
-
-
 # ============= EOF =============================================

From 19d597dd28fe4818178907a81c9f93eb8c063e27 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 11:27:05 -0700
Subject: [PATCH 07/29] feat: test file not found scenario in water level CSV
 upload

This commit adds a unit test to verify that the water level csv upload
service correctly handles the scenario where the specified source file
does not exist. The test checks that the appropriate error message is
returned in the payload when a FileNotFoundError is raised.
---
 services/water_level_csv.py            |  6 +++++-
 tests/services/test_water_level_csv.py | 27 ++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/services/water_level_csv.py b/services/water_level_csv.py
index 24bcfce8..423b2934 100644
--- a/services/water_level_csv.py
+++ b/services/water_level_csv.py
@@ -63,7 +63,11 @@ def bulk_upload_water_levels(
     except FileNotFoundError:
         msg = f"File not found: {source_file}"
         payload = WaterLevelBulkUploadPayload(
-            summary=WaterLevelBulkUploadSummary(0, 0, 0),
+            summary=WaterLevelBulkUploadSummary(
+                total_rows_processed=0,
+                total_rows_imported=0,
+                total_validation_errors_or_warnings=0,
+            ),
             water_levels=[],
             validation_errors=[],
         )
diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index ad4d0aef..90c36a12 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -8,6 +8,10 @@
     Sample,
     Observation,
 )
+from schemas.water_level_csv import (
+    WaterLevelBulkUploadSummary,
+    WaterLevelBulkUploadPayload,
+)
 from services.water_level_csv import bulk_upload_water_levels
 
 
@@ -143,3 +147,26 @@ def test_bulk_upload(
             session.delete(field_event_participant_2)
             session.delete(field_event)
             session.commit()
+
+
+def test_bulk_upload_file_not_found():
+    """
+    Test the bulk upload function with a non-existent file path.
+    """
+
+    results = bulk_upload_water_levels("non_existent_file.csv")
+
+    assert results.exit_code == 1
+    assert (
+        results.stdout
+        == '{"summary": {"total_rows_processed": 0, "total_rows_imported": 0, "total_validation_errors_or_warnings": 0}, "water_levels": [], "validation_errors": []}'
+    )
+    assert results.stderr == "File not found: non_existent_file.csv"
+    assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+    assert results.payload.summary == WaterLevelBulkUploadSummary(
+        total_rows_processed=0,
+        total_rows_imported=0,
+        total_validation_errors_or_warnings=0,
+    )
+    assert results.payload.water_levels == []
+    assert results.payload.validation_errors == []

From fad9f984b93d366dd06b274808a2d4586f809604 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 12:26:19 -0700
Subject: [PATCH 08/29] feat: implement water level csv unit test for
 nonexistent well

---
 tests/services/test_water_level_csv.py | 155 ++++++++++++++++++++++++-
 1 file changed, 150 insertions(+), 5 deletions(-)

diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index 90c36a12..ea3e7f6c 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -163,10 +163,155 @@ def test_bulk_upload_file_not_found():
     )
     assert results.stderr == "File not found: non_existent_file.csv"
     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
-    assert results.payload.summary == WaterLevelBulkUploadSummary(
-        total_rows_processed=0,
-        total_rows_imported=0,
-        total_validation_errors_or_warnings=0,
-    )
+    assert isinstance(results.payload.summary, WaterLevelBulkUploadSummary)
+    assert results.payload.summary.total_rows_imported == 0
+    assert results.payload.summary.total_rows_processed == 0
+    assert results.payload.summary.total_validation_errors_or_warnings == 0
     assert results.payload.water_levels == []
     assert results.payload.validation_errors == []
+
+
+def test_bulk_upload_nonexistent_well(water_level_bulk_upload_data):
+    """
+    Test the bulk upload function with a nonexistent well name.
+    """
+    bad_water_level_bulk_upload_data = water_level_bulk_upload_data.copy()
+    bad_water_level_bulk_upload_data["well_name_point_id"] = "NonExistentWell"
+
+    # write to a CSV file in memory then delete it after processing
+    csv_headers = list(bad_water_level_bulk_upload_data.keys())
+    csv_values = list(bad_water_level_bulk_upload_data.values())
+
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+
+    with tempfile.NamedTemporaryFile(
+        mode="w+", encoding="utf-8", delete_on_close=True
+    ) as temp_csv:
+        temp_csv.write(csv_content)
+        temp_csv.flush()
+
+        results = bulk_upload_water_levels(temp_csv.name)
+
+        assert results.exit_code == 1
+        assert (
+            results.stdout
+            == '{"summary": {"total_rows_processed": 1, "total_rows_imported": 0, "total_validation_errors_or_warnings": 1}, "water_levels": [], "validation_errors": ["Row 1: Unknown well_name_point_id \'NonExistentWell\'"]}'
+        )
+        assert results.stderr == "Row 1: Unknown well_name_point_id 'NonExistentWell'"
+        assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+        assert isinstance(results.payload.summary, WaterLevelBulkUploadSummary)
+        assert results.payload.summary.total_rows_imported == 0
+        assert results.payload.summary.total_rows_processed == 1
+        assert results.payload.summary.total_validation_errors_or_warnings == 1
+        assert results.payload.water_levels == []
+        assert results.payload.validation_errors == [
+            "Row 1: Unknown well_name_point_id 'NonExistentWell'"
+        ]
+
+
+def test_bulk_upload_bad_dtw_bgs(water_level_bulk_upload_data, water_well_thing):
+    """
+    Test the bulk upload function with a non-numeric depth to water below ground surface.
+    """
+    pass
+    # # Update the depth_to_water_ft to a non-numeric value
+    # water_level_bulk_upload_data["depth_to_water_ft"] = "not_a_number"
+
+    # # write to a CSV file in memory then delete it after processing
+    # csv_headers = list(water_level_bulk_upload_data.keys())
+    # csv_values = list(water_level_bulk_upload_data.values())
+
+    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+
+    # with tempfile.NamedTemporaryFile(
+    #     mode="w+", encoding="utf-8", delete_on_close=True
+    # ) as temp_csv:
+    #     temp_csv.write(csv_content)
+    #     temp_csv.flush()
+
+    #     results = bulk_upload_water_levels(temp_csv.name)
+
+    #     assert results.exit_code == 1
+    #     assert "Invalid depth_to_water_ft value: not_a_number" in results.stderr
+    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+
+
+def test_bulk_upload_bad_field_staff(water_level_bulk_upload_data, water_well_thing):
+    """
+    Test the bulk upload function with a non-existent field staff name.
+    """
+    pass
+    # # Update the field_staff_1 to a non-existent contact name
+    # water_level_bulk_upload_data["field_staff_1"] = "NonExistentContact"
+
+    # # write to a CSV file in memory then delete it after processing
+    # csv_headers = list(water_level_bulk_upload_data.keys())
+    # csv_values = list(water_level_bulk_upload_data.values())
+
+    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+
+    # with tempfile.NamedTemporaryFile(
+    #     mode="w+", encoding="utf-8", delete_on_close=True
+    # ) as temp_csv:
+    #     temp_csv.write(csv_content)
+    #     temp_csv.flush()
+
+    #     results = bulk_upload_water_levels(temp_csv.name)
+
+    #     assert results.exit_code == 1
+    #     assert "Field staff not found: NonExistentContact" in results.stderr
+    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+
+
+def test_bulk_upload_bad_field_staff_2(water_level_bulk_upload_data, water_well_thing):
+    """
+    Test the bulk upload function with a non-existent second field staff name.
+    """
+    pass
+    # # Update the field_staff_2 to a non-existent contact name
+    # water_level_bulk_upload_data["field_staff_2"] = "NonExistentContact2"
+
+    # # write to a CSV file in memory then delete it after processing
+    # csv_headers = list(water_level_bulk_upload_data.keys())
+    # csv_values = list(water_level_bulk_upload_data.values())
+
+    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+
+    # with tempfile.NamedTemporaryFile(
+    #     mode="w+", encoding="utf-8", delete_on_close=True
+    # ) as temp_csv:
+    #     temp_csv.write(csv_content)
+    #     temp_csv.flush()
+
+    #     results = bulk_upload_water_levels(temp_csv.name)
+
+    #     assert results.exit_code == 1
+    #     assert "Field staff not found: NonExistentContact2" in results.stderr
+    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload
+
+
+def test_bulk_upload_bad_field_staff_3(water_level_bulk_upload_data, water_well_thing):
+    """
+    Test the bulk upload function with a non-existent third field staff name.
+    """
+    pass
+    # # Update the field_staff_3 to a non-existent contact name
+    # water_level_bulk_upload_data["field_staff_3"] = "NonExistentContact3"
+
+    # # write to a CSV file in memory then delete it after processing
+    # csv_headers = list(water_level_bulk_upload_data.keys())
+    # csv_values = list(water_level_bulk_upload_data.values())
+
+    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+
+    # with tempfile.NamedTemporaryFile(
+    #     mode="w+", encoding="utf-8", delete_on_close=True
+    # ) as temp_csv:
+    #     temp_csv.write(csv_content)
+    #     temp_csv.flush()
+
+    #     results = bulk_upload_water_levels(temp_csv.name)
+
+    #     assert results.exit_code == 1
+    #     assert "Field staff not found: NonExistentContact3" in results.stderr
+    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)

From 08e7a60ee4725f7bdbe74100a30e70335071836d Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 12:48:47 -0700
Subject: [PATCH 09/29] feat: implement water level csv unit test for bad dtw
 bgs

---
 tests/services/test_water_level_csv.py | 35 ++++++++++++++------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index ea3e7f6c..f97d55ab 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -213,27 +213,30 @@ def test_bulk_upload_bad_dtw_bgs(water_level_bulk_upload_data, water_well_thing)
     """
     Test the bulk upload function with a non-numeric depth to water below ground surface.
     """
-    pass
-    # # Update the depth_to_water_ft to a non-numeric value
-    # water_level_bulk_upload_data["depth_to_water_ft"] = "not_a_number"
+    bad_water_level_bulk_upload_data = water_level_bulk_upload_data.copy()
+    bad_water_level_bulk_upload_data["depth_to_water_ft"] = (
+        f"{water_well_thing.well_depth+200}"
+    )
 
-    # # write to a CSV file in memory then delete it after processing
-    # csv_headers = list(water_level_bulk_upload_data.keys())
-    # csv_values = list(water_level_bulk_upload_data.values())
+    # write to a CSV file in memory then delete it after processing
+    csv_headers = list(bad_water_level_bulk_upload_data.keys())
+    csv_values = list(bad_water_level_bulk_upload_data.values())
 
-    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
 
-    # with tempfile.NamedTemporaryFile(
-    #     mode="w+", encoding="utf-8", delete_on_close=True
-    # ) as temp_csv:
-    #     temp_csv.write(csv_content)
-    #     temp_csv.flush()
+    with tempfile.NamedTemporaryFile(
+        mode="w+", encoding="utf-8", delete_on_close=True
+    ) as temp_csv:
+        temp_csv.write(csv_content)
+        temp_csv.flush()
 
-    #     results = bulk_upload_water_levels(temp_csv.name)
+        results = bulk_upload_water_levels(temp_csv.name)
 
-    #     assert results.exit_code == 1
-    #     assert "Invalid depth_to_water_ft value: not_a_number" in results.stderr
-    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+        assert results.exit_code == 1
+        assert (
+            results.stdout
+            == f"{{\"summary\": {{\"total_rows_processed\": 1, \"total_rows_imported\": 0, \"total_validation_errors_or_warnings\": 1}}, \"water_levels\": [], \"validation_errors\": [\"Row 1: well_depth ({water_well_thing.well_depth} ft) must be greater than depth_to_water_ft ({bad_water_level_bulk_upload_data['depth_to_water_ft']} ft) minus mp_height ({bad_water_level_bulk_upload_data['mp_height']} ft)\"]}}"
+        )
 
 
 def test_bulk_upload_bad_field_staff(water_level_bulk_upload_data, water_well_thing):

From f0b6b2f6bf099b935eb7a152ce25d0e4b26a99a6 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 12:53:43 -0700
Subject: [PATCH 10/29] feat: implement water level csv unit test for bad dtw
 bgs

---
 tests/services/test_water_level_csv.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index f97d55ab..d8f6ab84 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -237,9 +237,22 @@ def test_bulk_upload_bad_dtw_bgs(water_level_bulk_upload_data, water_well_thing)
             results.stdout
             == f"{{\"summary\": {{\"total_rows_processed\": 1, \"total_rows_imported\": 0, \"total_validation_errors_or_warnings\": 1}}, \"water_levels\": [], \"validation_errors\": [\"Row 1: well_depth ({water_well_thing.well_depth} ft) must be greater than depth_to_water_ft ({bad_water_level_bulk_upload_data['depth_to_water_ft']} ft) minus mp_height ({bad_water_level_bulk_upload_data['mp_height']} ft)\"]}}"
         )
+        assert (
+            results.stderr
+            == f"Row 1: well_depth ({water_well_thing.well_depth} ft) must be greater than depth_to_water_ft ({bad_water_level_bulk_upload_data['depth_to_water_ft']} ft) minus mp_height ({bad_water_level_bulk_upload_data['mp_height']} ft)"
+        )
+        assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+        assert isinstance(results.payload.summary, WaterLevelBulkUploadSummary)
+        assert results.payload.summary.total_rows_imported == 0
+        assert results.payload.summary.total_rows_processed == 1
+        assert results.payload.summary.total_validation_errors_or_warnings == 1
+        assert results.payload.water_levels == []
+        assert results.payload.validation_errors == [
+            f"Row 1: well_depth ({water_well_thing.well_depth} ft) must be greater than depth_to_water_ft ({bad_water_level_bulk_upload_data['depth_to_water_ft']} ft) minus mp_height ({bad_water_level_bulk_upload_data['mp_height']} ft)"
+        ]
 
 
-def test_bulk_upload_bad_field_staff(water_level_bulk_upload_data, water_well_thing):
+def test_bulk_upload_bad_field_staff(water_level_bulk_upload_data):
     """
     Test the bulk upload function with a non-existent field staff name.
     """

From 797dee46f84c2b48df001f857aa63c6ac3a106de Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 13:29:16 -0700
Subject: [PATCH 11/29] feat: enable all errors in a row to be captured

instead of just the first one. This allows users to see all issues with a row at once instead of having to fix them one by one.
---
 services/water_level_csv.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/services/water_level_csv.py b/services/water_level_csv.py
index 423b2934..c3d32aa1 100644
--- a/services/water_level_csv.py
+++ b/services/water_level_csv.py
@@ -173,6 +173,9 @@ def _validate_rows(
         # Normalize whitespace in all fields
         normalized_row = {k: (v or "").strip() for k, v in raw_row.items()}
 
+        # allow all errors for a row to be logged at once instead of just the first one encountered
+        error_in_row = False
+
         """
         Developer's note
 
@@ -189,7 +192,7 @@ def _validate_rows(
                 location = ".".join(str(part) for part in err["loc"])
                 message = err["msg"]
                 errors.append(f"Row {idx}: {location} - {message}")
-            continue
+            error_in_row = True
 
         # Verify that the well exists in the database
         well_name = model.well_name_point_id
@@ -199,15 +202,16 @@ def _validate_rows(
             well = session.scalars(sql).one_or_none()
             if well is None:
                 errors.append(f"Row {idx}: Unknown well_name_point_id '{well_name}'")
-                continue
-            wells_by_name_cache[well_name] = well
+                error_in_row = True
+            else:
+                wells_by_name_cache[well_name] = well
 
         # verify that the well depth is greater than the water level depth bgs
-        if well.well_depth <= model.depth_to_water_ft - model.mp_height:
+        if well and well.well_depth <= model.depth_to_water_ft - model.mp_height:
             errors.append(
                 f"Row {idx}: well_depth ({well.well_depth} ft) must be greater than depth_to_water_ft ({model.depth_to_water_ft} ft) minus mp_height ({model.mp_height} ft)"
             )
-            continue
+            error_in_row = True
 
         # Verify that the field staff are in the database
         """
@@ -222,8 +226,9 @@ def _validate_rows(
             field_staff_contact = session.scalars(sql).one_or_none()
             if field_staff_contact is None:
                 errors.append(f"Row {idx}: Unknown field_staff '{field_staff_name}'")
-                continue
-            contacts_by_name_cache[field_staff_name] = field_staff_contact
+                error_in_row = True
+            else:
+                contacts_by_name_cache[field_staff_name] = field_staff_contact
 
         if model.field_staff_2:
             field_staff_2_name = model.field_staff_2
@@ -235,8 +240,9 @@ def _validate_rows(
                     errors.append(
                         f"Row {idx}: Unknown field_staff_2 '{field_staff_2_name}'"
                     )
-                    continue
-                contacts_by_name_cache[field_staff_2_name] = field_staff_2_contact
+                    error_in_row = True
+                else:
+                    contacts_by_name_cache[field_staff_2_name] = field_staff_2_contact
         else:
             field_staff_2_contact = None
 
@@ -250,11 +256,15 @@ def _validate_rows(
                     errors.append(
                         f"Row {idx}: Unknown field_staff_3 '{field_staff_3_name}'"
                     )
-                    continue
-                contacts_by_name_cache[field_staff_3_name] = field_staff_3_contact
+                    error_in_row = True
+                else:
+                    contacts_by_name_cache[field_staff_3_name] = field_staff_3_contact
         else:
             field_staff_3_contact = None
 
+        if error_in_row:
+            continue
+
         # The Pydantic schema ensures that measuring_person is one of the field staff
         if model.measuring_person == model.field_staff:
             measuring_person_field_staff_index = 1

From d7f38e853a420ecc84ed664f3cb6bfecb61c5344 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 13:34:58 -0700
Subject: [PATCH 12/29] feat: test invalid field staff names

---
 tests/services/test_water_level_csv.py | 109 +++++++++----------------
 1 file changed, 38 insertions(+), 71 deletions(-)

diff --git a/tests/services/test_water_level_csv.py b/tests/services/test_water_level_csv.py
index d8f6ab84..b4a5532e 100644
--- a/tests/services/test_water_level_csv.py
+++ b/tests/services/test_water_level_csv.py
@@ -254,80 +254,47 @@ def test_bulk_upload_bad_dtw_bgs(water_level_bulk_upload_data, water_well_thing)
 
 def test_bulk_upload_bad_field_staff(water_level_bulk_upload_data):
     """
-    Test the bulk upload function with a non-existent field staff name.
+    Test the bulk upload function with nonexistent field staff names.
     """
-    pass
-    # # Update the field_staff_1 to a non-existent contact name
-    # water_level_bulk_upload_data["field_staff_1"] = "NonExistentContact"
-
-    # # write to a CSV file in memory then delete it after processing
-    # csv_headers = list(water_level_bulk_upload_data.keys())
-    # csv_values = list(water_level_bulk_upload_data.values())
-
-    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
-
-    # with tempfile.NamedTemporaryFile(
-    #     mode="w+", encoding="utf-8", delete_on_close=True
-    # ) as temp_csv:
-    #     temp_csv.write(csv_content)
-    #     temp_csv.flush()
-
-    #     results = bulk_upload_water_levels(temp_csv.name)
-
-    #     assert results.exit_code == 1
-    #     assert "Field staff not found: NonExistentContact" in results.stderr
-    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
-
-
-def test_bulk_upload_bad_field_staff_2(water_level_bulk_upload_data, water_well_thing):
-    """
-    Test the bulk upload function with a non-existent second field staff name.
-    """
-    pass
-    # # Update the field_staff_2 to a non-existent contact name
-    # water_level_bulk_upload_data["field_staff_2"] = "NonExistentContact2"
-
-    # # write to a CSV file in memory then delete it after processing
-    # csv_headers = list(water_level_bulk_upload_data.keys())
-    # csv_values = list(water_level_bulk_upload_data.values())
-
-    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
-
-    # with tempfile.NamedTemporaryFile(
-    #     mode="w+", encoding="utf-8", delete_on_close=True
-    # ) as temp_csv:
-    #     temp_csv.write(csv_content)
-    #     temp_csv.flush()
-
-    #     results = bulk_upload_water_levels(temp_csv.name)
-
-    #     assert results.exit_code == 1
-    #     assert "Field staff not found: NonExistentContact2" in results.stderr
-    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload
-
-
-def test_bulk_upload_bad_field_staff_3(water_level_bulk_upload_data, water_well_thing):
-    """
-    Test the bulk upload function with a non-existent third field staff name.
-    """
-    pass
-    # # Update the field_staff_3 to a non-existent contact name
-    # water_level_bulk_upload_data["field_staff_3"] = "NonExistentContact3"
+    bad_water_level_bulk_upload_data = water_level_bulk_upload_data.copy()
+    bad_water_level_bulk_upload_data["field_staff"] = "NonExistentContact"
+    bad_water_level_bulk_upload_data["field_staff_2"] = "NonExistentContact2"
+    bad_water_level_bulk_upload_data["field_staff_3"] = "NonExistentContact3"
+    bad_water_level_bulk_upload_data["measuring_person"] = (
+        bad_water_level_bulk_upload_data["field_staff"]
+    )
 
-    # # write to a CSV file in memory then delete it after processing
-    # csv_headers = list(water_level_bulk_upload_data.keys())
-    # csv_values = list(water_level_bulk_upload_data.values())
+    # write to a CSV file in memory then delete it after processing
+    csv_headers = list(bad_water_level_bulk_upload_data.keys())
+    csv_values = list(bad_water_level_bulk_upload_data.values())
 
-    # csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
 
-    # with tempfile.NamedTemporaryFile(
-    #     mode="w+", encoding="utf-8", delete_on_close=True
-    # ) as temp_csv:
-    #     temp_csv.write(csv_content)
-    #     temp_csv.flush()
+    with tempfile.NamedTemporaryFile(
+        mode="w+", encoding="utf-8", delete_on_close=True
+    ) as temp_csv:
+        temp_csv.write(csv_content)
+        temp_csv.flush()
 
-    #     results = bulk_upload_water_levels(temp_csv.name)
+        results = bulk_upload_water_levels(temp_csv.name)
 
-    #     assert results.exit_code == 1
-    #     assert "Field staff not found: NonExistentContact3" in results.stderr
-    #     assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+        assert results.exit_code == 1
+        assert (
+            results.stdout
+            == '{"summary": {"total_rows_processed": 1, "total_rows_imported": 0, "total_validation_errors_or_warnings": 3}, "water_levels": [], "validation_errors": ["Row 1: Unknown field_staff \'NonExistentContact\'", "Row 1: Unknown field_staff_2 \'NonExistentContact2\'", "Row 1: Unknown field_staff_3 \'NonExistentContact3\'"]}'
+        )
+        assert (
+            results.stderr
+            == "Row 1: Unknown field_staff 'NonExistentContact'\nRow 1: Unknown field_staff_2 'NonExistentContact2'\nRow 1: Unknown field_staff_3 'NonExistentContact3'"
+        )
+        assert isinstance(results.payload, WaterLevelBulkUploadPayload)
+        assert isinstance(results.payload.summary, WaterLevelBulkUploadSummary)
+        assert results.payload.summary.total_rows_imported == 0
+        assert results.payload.summary.total_rows_processed == 1
+        assert results.payload.summary.total_validation_errors_or_warnings == 3
+        assert results.payload.water_levels == []
+        assert results.payload.validation_errors == [
+            "Row 1: Unknown field_staff 'NonExistentContact'",
+            "Row 1: Unknown field_staff_2 'NonExistentContact2'",
+            "Row 1: Unknown field_staff_3 'NonExistentContact3'",
+        ]

From ededb61423a6a113d7c4cb65bb77985003a61fcb Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 13:44:30 -0700
Subject: [PATCH 13/29] fix: remove outdated TODO note

---
 cli/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cli/cli.py b/cli/cli.py
index 50625434..4fedfa21 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -83,7 +83,6 @@ def water_levels_bulk_upload(file_path: str, output_format: str | None):
     """
     parse and upload a csv
     """
-    # TODO: use the same helper function used by api to parse and upload a WL csv
     from cli.service_adapter import water_levels_csv
 
     pretty_json = (output_format or "").lower() == "json"

From c4ae5693fdaf425ca64929b89d2363931af6d5fe Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 16:02:13 -0700
Subject: [PATCH 14/29] refactor: update positive givens for water level csv
 feature tests

---
 tests/features/environment.py            |  7 +--
 tests/features/steps/water-levels-csv.py | 60 ++++++++++++------------
 2 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/tests/features/environment.py b/tests/features/environment.py
index 530ca453..8d8401e9 100644
--- a/tests/features/environment.py
+++ b/tests/features/environment.py
@@ -221,9 +221,9 @@ def add_spring(context, session, location, name_num):
 
 
 @add_context_object_container("contacts")
-def add_contact(context, session):
+def add_contact(context, name, session):
     contact = Contact(
-        name="Test Contact",
+        name=name,
         role="Software Developer",
         organization="NMBGMR",
         release_status="draft",
@@ -534,7 +534,8 @@ def before_all(context):
 
         add_well_casing_material(context, session, well_1)
 
-        contact = add_contact(context, session)
+        for i in range(4):
+            add_contact(context, f"Test Contact {i}", session)
 
         for permission in [
             "Datalogger Installation",
diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index 06901f74..c0353ed3 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from datetime import datetime
 import json
 import tempfile
 from pathlib import Path
@@ -23,26 +24,9 @@
 
 from db import Observation
 from db.engine import session_ctx
+from schemas.water_level_csv import WaterLevelCsvRow
 from services.water_level_csv import bulk_upload_water_levels
 
-REQUIRED_FIELDS: List[str] = [
-    "field_staff",
-    "well_name_point_id",
-    "field_event_date_time",
-    "measurement_date_time",
-    "sampler",
-    "sample_method",
-    "mp_height",
-    "level_status",
-    "depth_to_water_ft",
-    "data_quality",
-]
-OPTIONAL_FIELDS = ["water_level_notes"]
-VALID_SAMPLERS = ["Groundwater Team", "Consultant"]
-VALID_SAMPLE_METHODS = ["electric tape", "steel tape"]
-VALID_LEVEL_STATUSES = ["stable", "rising", "falling"]
-VALID_DATA_QUALITIES = ["approved", "provisional"]
-
 
 def _available_well_names(context: Context) -> list[str]:
     if not hasattr(context, "well_names"):
@@ -50,21 +34,33 @@ def _available_well_names(context: Context) -> list[str]:
     return context.well_names
 
 
+def _available_field_staff(context: Context) -> list[str]:
+    if not hasattr(context, "contact_names"):
+        context.contact_names = [
+            contact.name for contact in context.objects["contacts"]
+        ]
+    return context.contact_names
+
+
 def _base_row(context: Context, index: int) -> Dict[str, str]:
     well_names = _available_well_names(context)
     well_name = well_names[(index - 1) % len(well_names)]
+
+    contact_names = _available_field_staff(context)
     measurement_day = 14 + index
     return {
-        "field_staff": "A Lopez" if index == 1 else "B Chen",
         "well_name_point_id": well_name,
-        "field_event_date_time": f"2025-02-{measurement_day:02d}T08:00:00-07:00",
-        "measurement_date_time": f"2025-02-{measurement_day:02d}T10:30:00-07:00",
-        "sampler": VALID_SAMPLERS[(index - 1) % len(VALID_SAMPLERS)],
-        "sample_method": VALID_SAMPLE_METHODS[(index - 1) % len(VALID_SAMPLE_METHODS)],
+        "field_event_date_time": f"2025-02-{measurement_day:02d}T08:00:00",
+        "field_staff": contact_names[(index - 1) % len(contact_names)],
+        "field_staff_2": contact_names[(index - 2) % len(contact_names)],
+        "field_staff_3": contact_names[(index - 3) % len(contact_names)],
+        "water_level_date_time": f"2025-02-{measurement_day:02d}T10:30:00",
+        "measuring_person": contact_names[(index - 1) % len(contact_names)],
+        "sample_method": "Steel-tape measurement",  # lexicon value
         "mp_height": "1.5" if index == 1 else "1.8",
-        "level_status": VALID_LEVEL_STATUSES[(index - 1) % len(VALID_LEVEL_STATUSES)],
-        "depth_to_water_ft": "45.2" if index == 1 else "47.0",
-        "data_quality": VALID_DATA_QUALITIES[(index - 1) % len(VALID_DATA_QUALITIES)],
+        "level_status": "Water level not affected",  # maps to groundwater_level_reason
+        "depth_to_water_ft": "9" if index == 1 else "8",
+        "data_quality": "Water level accurate to within two hundreths of a foot",  # maps to groundwater_level_accuracy
         "water_level_notes": "Initial measurement" if index == 1 else "Follow-up",
     }
 
@@ -101,7 +97,7 @@ def _set_rows(
     elif rows:
         context.csv_headers = list(rows[0].keys())
     else:
-        context.csv_headers = list(REQUIRED_FIELDS)
+        context.csv_headers = [field for field in WaterLevelCsvRow.model_fields.keys()]
     _write_csv_to_context(context)
     context.stdout_json = None
 
@@ -151,12 +147,16 @@ def step_impl(context: Context):
 
 
 @given(
-    '"measurement_date_time" values are valid ISO 8601 timestamps with timezone offsets (e.g. "2025-02-15T10:30:00-08:00")'
+    '"water_level_date_time" values are valid ISO 8601 timezone-naive datetime strings (e.g. "2025-02-15T10:30:00")'
 )
 def step_impl(context: Context):
     for row in context.csv_rows:
-        assert row["measurement_date_time"].startswith("2025-02")
-        assert "T" in row["measurement_date_time"]
+        assert row["water_level_date_time"].startswith("2025-02")
+        assert "T" in row["water_level_date_time"]
+        dt_naive = datetime.strptime(row["water_level_date_time"], "%Y-%m-%dT%H:%M:%S")
+        assert (
+            dt_naive.tzinfo is None
+        ), f"Expected timezone-naive datetime but got {row['water_level_date_time']}"
 
 
 @given("the CSV includes optional fields when available:")

From d8d0aa3c5e6856e04826660da420bd9efdc7b035 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 16:21:50 -0700
Subject: [PATCH 15/29] fix: only process DB validations after Pydantic
 validation has passed

the model needs to be validated and created before any comparisons
can be made against the database
---
 services/water_level_csv.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/services/water_level_csv.py b/services/water_level_csv.py
index c3d32aa1..f1abead3 100644
--- a/services/water_level_csv.py
+++ b/services/water_level_csv.py
@@ -192,7 +192,8 @@ def _validate_rows(
                 location = ".".join(str(part) for part in err["loc"])
                 message = err["msg"]
                 errors.append(f"Row {idx}: {location} - {message}")
-            error_in_row = True
+            # the model needs valid data to be serialized and processed/validated against the database, so we skip to the next row if there are validation errors from Pydantic
+            continue
 
         # Verify that the well exists in the database
         well_name = model.well_name_point_id

From ba2782098bb4e30ff23afdc504a9582a3e25d94f Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Mon, 5 Jan 2026 16:53:21 -0700
Subject: [PATCH 16/29] feat: add water_level_date_time to water level csv
 upload response

this is required in the feature file
---
 schemas/water_level_csv.py               | 1 +
 services/water_level_csv.py              | 1 +
 tests/features/steps/water-levels-csv.py | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/schemas/water_level_csv.py b/schemas/water_level_csv.py
index f1479933..302f26d4 100644
--- a/schemas/water_level_csv.py
+++ b/schemas/water_level_csv.py
@@ -125,6 +125,7 @@ class WaterLevelCreatedRow(BaseModel):
     field_event_participant_3_id: int | None
     sample_id: int
     observation_id: int
+    water_level_date_time: str
     groundwater_level_reason: str
     groundwater_level_accuracy: str
 
diff --git a/services/water_level_csv.py b/services/water_level_csv.py
index f1abead3..95d1b0b5 100644
--- a/services/water_level_csv.py
+++ b/services/water_level_csv.py
@@ -381,6 +381,7 @@ def _create_records(
                 ),
                 sample_id=sample.id,
                 observation_id=observation.id,
+                water_level_date_time=row.water_level_date_time.isoformat(),
                 groundwater_level_reason=row.level_status,
                 groundwater_level_accuracy=row.data_quality,
             )
diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index c0353ed3..5570870d 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -212,7 +212,7 @@ def step_impl(context: Context):
     with session_ctx() as session:
         for row in rows:
             assert "well_name_point_id" in row
-            assert "measurement_date_time" in row
+            assert "water_level_date_time" in row
             obs = session.get(Observation, row["observation_id"])
             assert obs is not None, "Observation missing from database"
 

From 03f0b3e932cd139b07ba83cbf87d12444ba626cf Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Tue, 6 Jan 2026 08:56:02 -0700
Subject: [PATCH 17/29] fix: update seeding data

---
 tests/features/steps/water-levels-csv.py | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index 5570870d..37fc8cab 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -48,21 +48,21 @@ def _base_row(context: Context, index: int) -> Dict[str, str]:
 
     contact_names = _available_field_staff(context)
     measurement_day = 14 + index
-    return {
-        "well_name_point_id": well_name,
-        "field_event_date_time": f"2025-02-{measurement_day:02d}T08:00:00",
-        "field_staff": contact_names[(index - 1) % len(contact_names)],
-        "field_staff_2": contact_names[(index - 2) % len(contact_names)],
-        "field_staff_3": contact_names[(index - 3) % len(contact_names)],
-        "water_level_date_time": f"2025-02-{measurement_day:02d}T10:30:00",
-        "measuring_person": contact_names[(index - 1) % len(contact_names)],
-        "sample_method": "Steel-tape measurement",  # lexicon value
-        "mp_height": "1.5" if index == 1 else "1.8",
-        "level_status": "Water level not affected",  # maps to groundwater_level_reason
-        "depth_to_water_ft": "9" if index == 1 else "8",
-        "data_quality": "Water level accurate to within two hundreths of a foot",  # maps to groundwater_level_accuracy
-        "water_level_notes": "Initial measurement" if index == 1 else "Follow-up",
-    }
+    row = WaterLevelCsvRow(
+        well_name_point_id=well_name,
+        field_event_date_time=f"2025-02-{measurement_day:02d}T08:00:00",
+        field_staff=contact_names[(index - 1) % len(contact_names)],
+        field_staff_2=contact_names[(index - 2) % len(contact_names)],
+        field_staff_3=contact_names[(index - 3) % len(contact_names)],
+        water_level_date_time=f"2025-02-{measurement_day:02d}T10:30:00",
+        measuring_person=contact_names[(index - 1) % len(contact_names)],
+        sample_method="Steel-tape measurement",
+        mp_height=1.5 if index == 1 else 1.8,
+        level_status="Water level not affected",
+        depth_to_water_ft=9 if index == 1 else 8,
+        data_quality="Water level accurate to within two hundreths of a foot",
+    )
+    return row.model_dump(mode="json")
 
 
 def _build_valid_rows(context: Context, count: int = 2) -> List[Dict[str, str]]:

From 64f5ea39216091cb33f3d5efc9e61a80ecc8f46e Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Tue, 6 Jan 2026 09:18:46 -0700
Subject: [PATCH 18/29] fix: remove measuring_person from scenario since that
 is not a lexicon value

This reflects the updates to the feature test
---
 tests/features/steps/water-levels-csv.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index 37fc8cab..9f073726 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -342,17 +342,15 @@ def step_impl(context: Context):
 # Scenario: Upload fails due to invalid lexicon values
 # ============================================================================
 @given(
-    'my CSV file contains invalid lexicon values for "sampler", "sample_method", "level_status", or "data_quality"'
+    'my CSV file contains invalid lexicon values for "sample_method", "level_status", or "data_quality"'
 )
 def step_impl(context: Context):
     rows = _build_valid_rows(context, count=1)
-    rows[0]["sampler"] = "Unknown Team"
     rows[0]["sample_method"] = "mystery"
     rows[0]["level_status"] = "supercharged"
     rows[0]["data_quality"] = "bad"
     _set_rows(context, rows)
     context.invalid_fields = [
-        "sampler",
         "sample_method",
         "level_status",
         "data_quality",

From 50cfa8d2e746a0a2a707fd9c97c4e7263cee30b8 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Tue, 6 Jan 2026 09:23:24 -0700
Subject: [PATCH 19/29] fix: update test steps to reflect changes in field
 names

It is now called water_level_date_time instead of measurement_date_time
---
 tests/features/steps/water-levels-csv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index 9f073726..2599cb0e 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -306,13 +306,13 @@ def step_impl(context: Context, required_field: str):
 # Scenario: Upload fails due to invalid date formats
 # ============================================================================
 @given(
-    'my CSV file contains invalid ISO 8601 date values in the "measurement_date_time" field'
+    'my CSV file contains invalid ISO 8601 date values in the "water_level_date_time" field'
 )
 def step_impl(context: Context):
     rows = _build_valid_rows(context, count=1)
-    rows[0]["measurement_date_time"] = "02/15/2025 10:30"
+    rows[0]["water_level_date_time"] = "02/15/2025 10:30"
     _set_rows(context, rows)
-    context.invalid_fields = ["measurement_date_time"]
+    context.invalid_fields = ["water_level_date_time"]
 
 
 @then("stderr should contain validation errors identifying the invalid field and row")

From 19306c91be6d26117779f2de206792c1fe9edddb Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Tue, 6 Jan 2026 10:19:28 -0700
Subject: [PATCH 20/29] refactor: move common CSV gives to common.py

The required/optional givens are common between well inventory
and water level CSVs, so this commit moves them to common.py to avoid
duplication and make them reusable across different feature files.
---
 tests/features/steps/common.py             | 20 +++++++
 tests/features/steps/water-levels-csv.py   | 68 +++++++++++++++++++---
 tests/features/steps/well-inventory-csv.py | 19 ------
 3 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/tests/features/steps/common.py b/tests/features/steps/common.py
index 79d8433c..1e67c045 100644
--- a/tests/features/steps/common.py
+++ b/tests/features/steps/common.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
 from behave import then, given, when
+from behave.runner import Context
 from starlette.testclient import TestClient
 
 from core.dependencies import (
@@ -152,4 +153,23 @@ def step_impl(context):
     assert data["page"] == 1, f'Unexpected page {data["page"]}'
 
 
+@given("the CSV includes required fields:")
+def step_impl_csv_includes_required_fields(context: Context):
+    """Sets up the CSV file with multiple rows of well inventory data."""
+    context.required_fields = [row[0] for row in context.table]
+    keys = context.rows[0].keys()
+    for field in context.required_fields:
+        assert field in keys, f"Missing required field: {field}"
+
+
+@given("the CSV includes optional fields when available:")
+def step_impl(context: Context):
+    optional_fields = [row[0] for row in context.table]
+    keys = context.rows[0].keys()
+
+    for key in keys:
+        if key not in context.required_fields:
+            assert key in optional_fields, f"Unexpected field found: {key}"
+
+
 # ============= EOF =============================================
diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index d748eb30..c7e462b9 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -13,16 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from datetime import datetime
+from datetime import datetime, timedelta
 import json
 import tempfile
 from pathlib import Path
 from typing import Any, Dict, Iterable, List
+from zoneinfo import ZoneInfo
 
 from behave import given, when, then
 from behave.runner import Context
 
-from db import Observation
+from db import Observation, FieldEvent, Sample
 from db.engine import session_ctx
 from schemas.water_level_csv import WaterLevelCsvRow
 from services.water_level_csv import bulk_upload_water_levels
@@ -86,6 +87,7 @@ def _write_csv_to_context(context: Context) -> None:
     temp_file.close()
     context.csv_file = str(Path(temp_file.name))
     context.csv_raw_text = csv_text
+    context.file_content = csv_text  # file_context needs to be set for shared given
 
 
 def _set_rows(
@@ -101,6 +103,9 @@ def _set_rows(
     _write_csv_to_context(context)
     context.stdout_json = None
 
+    # set context.rows to be all rows and the header for optional step
+    context.rows = rows
+
 
 def _ensure_stdout_json(context: Context) -> Dict[str, Any]:
     if not hasattr(context, "stdout_json") or context.stdout_json is None:
@@ -130,6 +135,8 @@ def step_impl(context: Context):
     missing = [field for field in expected_fields if field not in headers]
     assert not missing, f"Missing required headers: {missing}"
 
+    context.required_fields = expected_fields
+
 
 @given('each "well_name_point_id" value matches an existing well')
 def step_impl(context: Context):
@@ -153,13 +160,13 @@ def step_impl(context: Context):
         ), f"Expected timezone-naive datetime but got {row['water_level_date_time']}"
 
 
-# @given("the water level CSV includes optional fields when available:")
-# def step_impl(context: Context):
-#     field_name = context.table.headings[0]
-#     optional_fields = [row[field_name].strip() for row in context.table]
-#     headers = set(context.csv_headers)
-#     missing = [field for field in optional_fields if field not in headers]
-#     assert not missing, f"Missing optional headers: {missing}"
+@given("the water level CSV includes optional fields when available:")
+def step_impl(context: Context):
+    field_name = context.table.headings[0]
+    optional_fields = [row[field_name].strip() for row in context.table]
+    headers = set(context.csv_headers)
+    missing = [field for field in optional_fields if field not in headers]
+    assert not missing, f"Missing optional headers: {missing}"
 
 
 @when("I run the CLI command:")
@@ -174,6 +181,49 @@ def step_impl(context: Context):
     context.stdout_json = None
 
 
+@then(
+    "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value. "
+)
+def step_impl(context: Context):
+    with session_ctx() as session:
+        for field in ["field_event_date_time", "water_level_date_time"]:
+            for i, row in enumerate(context.csv_rows):
+                dt_str = row[field]
+                dt_naive = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S")
+                tz = ZoneInfo("America/Denver")
+                dt_aware = dt_naive.replace(tzinfo=tz)
+
+                if field == "field_event_date_time":
+                    field_event = session.query(FieldEvent).one_or_none(
+                        FieldEvent.id
+                        == context.cli_result.payload.water_levels[i].field_event_id
+                    )
+                    assert (
+                        field_event.event_date == dt_aware
+                    ), f"Expected {dt_aware} but got {field_event.event_date} for row {i+1}"
+                    assert field_event.utcoffset() == timedelta(hours=-7)
+                else:
+                    observation = session.query(Observation).one_or_none(
+                        Observation.id
+                        == context.cli_result.payload.water_levels[i].observation_id
+                    )
+                    assert (
+                        observation.observation_datetime == dt_aware
+                    ), f"Expected {dt_aware} but got {observation.observation_datetime} for row {i+1}"
+                    assert observation.observation_datetime.utcoffset() == timedelta(
+                        hours=-7
+                    )
+
+                    sample = session.query(Sample).one_or_none(
+                        Sample.id
+                        == context.cli_result.payload.water_levels[i].sample_id
+                    )
+                    assert (
+                        sample.sample_date == dt_aware
+                    ), f"Expected {dt_aware} but got {sample.sample_date} for row {i+1}"
+                    assert sample.sample_date.utcoffset() == timedelta(hours=-7)
+
+
 @then("the command exits with code 0")
 def step_impl(context: Context):
     assert context.cli_result.exit_code == 0, context.cli_result.stderr
diff --git a/tests/features/steps/well-inventory-csv.py b/tests/features/steps/well-inventory-csv.py
index 8cd69b03..31f87d54 100644
--- a/tests/features/steps/well-inventory-csv.py
+++ b/tests/features/steps/well-inventory-csv.py
@@ -16,15 +16,6 @@ def step_impl_valid_lexicon_values(context: Context):
         assert response.status_code == 200, f"Invalid lexicon category: {row[0]}"
 
 
-@given("the CSV includes required fields:")
-def step_impl_csv_includes_required_fields(context: Context):
-    """Sets up the CSV file with multiple rows of well inventory data."""
-    context.required_fields = [row[0] for row in context.table]
-    keys = context.rows[0].keys()
-    for field in context.required_fields:
-        assert field in keys, f"Missing required field: {field}"
-
-
 @given('each "well_name_point_id" value is unique per row')
 def step_impl(context: Context):
     """Verifies that each "well_name_point_id" value is unique per row."""
@@ -37,16 +28,6 @@ def step_impl(context: Context):
         seen_ids.add(row["well_name_point_id"])
 
 
-@given("the CSV includes optional fields when available:")
-def step_impl(context: Context):
-    optional_fields = [row[0] for row in context.table]
-    keys = context.rows[0].keys()
-
-    for key in keys:
-        if key not in context.required_fields:
-            assert key in optional_fields, f"Unexpected field found: {key}"
-
-
 @given("the csv includes optional water level entry fields when available:")
 def step_impl(context: Context):
     optional_fields = [row[0] for row in context.table]

From 55ca16fc40b62540b80ee65c98e1f6e4ef7a5af3 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Wed, 4 Feb 2026 13:07:16 -0700
Subject: [PATCH 21/29] fix: fix cli test for refactored/updated water level
 csv handling

The previous implementation was for an outdated version of the water level
csv handling. This commit updates the test to align with the new
implementation, ensuring that the CLI commands are tested correctly with
the current water level csv processing logic.
---
 tests/test_cli_commands.py | 246 +++++++++++++++++++++++++------------
 1 file changed, 170 insertions(+), 76 deletions(-)

diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 4abd837d..c2697601 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -15,15 +15,14 @@
 # ===============================================================================
 from __future__ import annotations
 
-import textwrap
-import uuid
+import tempfile
 from pathlib import Path
 
 from click.testing import CliRunner
 from sqlalchemy import select
 
 from cli.cli import cli
-from db import FieldActivity, FieldEvent, Observation, Sample
+from db import FieldActivity, FieldEvent, FieldEventParticipant, Observation, Sample
 from db.engine import session_ctx
 
 
@@ -141,89 +140,184 @@ def fake_upload(file_path, *, pretty_json=False):
     assert captured["pretty_json"] is True
 
 
-def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
+def test_water_levels_cli_persists_observations(
+    water_level_bulk_upload_data, water_well_thing, contact, second_contact
+):
     """
     End-to-end CLI invocation should create FieldEvent, Sample, and Observation rows.
+
+    This is essentially the same test in tests/services/test_water_level_service.py::test_bulk_upload,
+    but it works by invoking the command line rather than just the function directly.
     """
 
-    def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(
-            f"""\
-            field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
-            CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """
-        )
-        path.write_text(csv_text)
+    # write to a CSV file in memory then delete it after processing
+    # this is being done to avoid filesystem dependencies in tests and
+    # to use the contact fixture for the field staff
+    csv_headers = list(water_level_bulk_upload_data.keys())
+    csv_values = list(water_level_bulk_upload_data.values())
 
-    unique_notes = f"pytest-{uuid.uuid4()}"
-    csv_file = tmp_path / "water_levels.csv"
-    _write_csv(csv_file, well_name=water_well_thing.name, notes=unique_notes)
+    csv_content = ",".join(csv_headers) + "\n" + ",".join(csv_values)
 
-    runner = CliRunner()
-    result = runner.invoke(
-        cli, ["water-levels", "bulk-upload", "--file", str(csv_file)]
-    )
-
-    assert result.exit_code == 0, result.output
+    with tempfile.NamedTemporaryFile(
+        mode="w+", encoding="utf-8", delete_on_close=True
+    ) as temp_csv:
+        temp_csv.write(csv_content)
+        temp_csv.flush()
 
-    created_ids: dict[str, int] = {}
-    with session_ctx() as session:
-        stmt = (
-            select(Observation)
-            .join(Observation.sample)
-            .join(Sample.field_activity)
-            .join(FieldActivity.field_event)
-            .where(Sample.notes == unique_notes)
+        runner = CliRunner()
+        result = runner.invoke(
+            cli, ["water-levels", "bulk-upload", "--file", str(temp_csv.name)]
         )
-        observations = session.scalars(stmt).all()
-        assert len(observations) == 1, "Expected one observation for the uploaded CSV"
-
-        observation = observations[0]
-        sample = observation.sample
-        field_activity = sample.field_activity
-        field_event = field_activity.field_event
-
-        assert field_event.thing_id == water_well_thing.id
-        assert sample.sample_method == "Electric tape measurement (E-probe)"
-        assert sample.sample_matrix == "water"
-        assert observation.value == 42.5
-        assert observation.measuring_point_height == 1.5
-        assert observation.notes == "Level status: stable | Data quality: approved"
-        assert (
-            field_event.notes == f"Field staff: CLI Tester | {unique_notes}"
-        ), "Field event notes should capture field staff and notes"
-
-        created_ids = {
-            "observation_id": observation.id,
-            "sample_id": sample.id,
-            "field_activity_id": field_activity.id,
-            "field_event_id": field_event.id,
-        }
-
-    if created_ids:
-        # Clean up committed rows so other tests see a pristine database.
+
+        assert result.exit_code == 0, result.output
+
+        created_ids: dict[str, int] = {}
         with session_ctx() as session:
-            observation = session.get(Observation, created_ids["observation_id"])
-            sample = session.get(Sample, created_ids["sample_id"])
-            field_activity = session.get(
-                FieldActivity, created_ids["field_activity_id"]
+            stmt = (
+                select(Observation)
+                .join(Observation.sample)
+                .join(Sample.field_activity)
+                .join(FieldActivity.field_event)
+                .where(
+                    Observation.notes
+                    == water_level_bulk_upload_data["water_level_notes"]
+                )
             )
-            field_event = session.get(FieldEvent, created_ids["field_event_id"])
-
-            if observation:
-                session.delete(observation)
-                session.flush()
-            if sample:
-                session.delete(sample)
-                session.flush()
-            if field_activity:
-                session.delete(field_activity)
-                session.flush()
-            if field_event:
-                session.delete(field_event)
-                session.flush()
-
-            session.commit()
+            observations = session.scalars(stmt).all()
+            print(observations)
+            assert (
+                len(observations) == 1
+            ), "Expected one observation for the uploaded CSV"
+
+            observation = observations[0]
+            sample = observation.sample
+            field_activity = sample.field_activity
+            field_event = field_activity.field_event
+            # contact is created before second_contact so will have a lower id
+            field_event_participants = sorted(
+                field_event.field_event_participants, key=lambda fep: fep.contact_id
+            )
+            field_event_participant_1 = field_event_participants[0]
+            field_event_participant_2 = field_event_participants[1]
+
+            # ----------
+            # INSERTION VERIFICATION
+            # ----------
+
+            # FieldEvent
+            assert field_event is not None
+            assert field_event.thing_id == water_well_thing.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert field_event.event_date.isoformat() == "2025-02-15T15:00:00+00:00"
+            assert (
+                field_event.event_date.isoformat()
+                == water_level_bulk_upload_data["field_event_date_time"] + "+00:00"
+            )
+
+            # FieldActivity
+            assert field_activity is not None
+            assert field_activity.activity_type == "groundwater level"
+
+            # FieldEventParticipants
+            assert field_event_participant_1 is not None
+            assert field_event_participant_1.contact_id == contact.id
+            assert field_event_participant_1.field_event_id == field_event.id
+            assert field_event_participant_1.participant_role == "Lead"
+
+            assert field_event_participant_2 is not None
+            assert field_event_participant_2.contact_id == second_contact.id
+            assert field_event_participant_2.field_event_id == field_event.id
+            assert field_event_participant_2.participant_role == "Participant"
+
+            # Sample
+            assert sample is not None
+            assert sample.field_activity_id == field_activity.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert sample.sample_date.isoformat() == "2025-02-15T17:30:00+00:00"
+            assert (
+                sample.sample_date.isoformat()
+                == water_level_bulk_upload_data["water_level_date_time"] + "+00:00"
+            )
+            assert sample.sample_name[0:3] == "wl-"
+            assert sample.sample_matrix == "water"
+            assert sample.sample_method == water_level_bulk_upload_data["sample_method"]
+            assert sample.qc_type == "Normal"
+            assert sample.depth_top is None
+            assert sample.depth_bottom is None
+
+            # Observation
+            assert observation is not None
+            assert observation.sample_id == sample.id
+            # TODO: uncomment after timezone handling is fixed
+            # assert observation.observation_datetime.isoformat() == "2025-02-15T17:30:00+00:00"
+            assert (
+                observation.observation_datetime.isoformat()
+                == water_level_bulk_upload_data["water_level_date_time"] + "+00:00"
+            )
+            assert observation.value == float(
+                water_level_bulk_upload_data["depth_to_water_ft"]
+            )
+            assert observation.unit == "ft"
+            assert observation.measuring_point_height == float(
+                water_level_bulk_upload_data["mp_height"]
+            )
+            assert (
+                observation.groundwater_level_reason
+                == water_level_bulk_upload_data["level_status"]
+            )
+            assert (
+                observation.groundwater_level_accuracy
+                == water_level_bulk_upload_data["data_quality"]
+            )
+            assert (
+                observation.notes == water_level_bulk_upload_data["water_level_notes"]
+            )
+
+            created_ids = {
+                "observation_id": observation.id,
+                "sample_id": sample.id,
+                "field_activity_id": field_activity.id,
+                "field_event_id": field_event.id,
+                "field_participant_ids": [fep.id for fep in field_event.participants],
+            }
+
+        if created_ids:
+            # Clean up committed rows so other tests see a pristine database.
+            with session_ctx() as session:
+                observation = session.get(Observation, created_ids["observation_id"])
+                sample = session.get(Sample, created_ids["sample_id"])
+                field_activity = session.get(
+                    FieldActivity, created_ids["field_activity_id"]
+                )
+                field_event = session.get(FieldEvent, created_ids["field_event_id"])
+                field_participants = (
+                    session.query(FieldEventParticipant)
+                    .filter(
+                        FieldEventParticipant.id.in_(
+                            created_ids["field_participant_ids"]
+                        )
+                    )
+                    .all()
+                )
+
+                if observation:
+                    session.delete(observation)
+                    session.flush()
+                if sample:
+                    session.delete(sample)
+                    session.flush()
+                if field_participants:
+                    for participant in field_participants:
+                        session.delete(participant)
+                    session.flush()
+                if field_activity:
+                    session.delete(field_activity)
+                    session.flush()
+                if field_event:
+                    session.delete(field_event)
+                    session.flush()
+
+                session.commit()
 
 
 # ============= EOF =============================================

From 64bace059087b313452e80fe8179d151ddb1a2bd Mon Sep 17 00:00:00 2001
From: jacob-a-brown <jacob-a-brown@users.noreply.github.com>
Date: Wed, 4 Feb 2026 20:08:15 +0000
Subject: [PATCH 22/29] Formatting changes

---
 .../versions/ff5701e6b9f5_add_groundwater_level_accuracy_to_.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/alembic/versions/ff5701e6b9f5_add_groundwater_level_accuracy_to_.py b/alembic/versions/ff5701e6b9f5_add_groundwater_level_accuracy_to_.py
index 54aa374f..e01b3851 100644
--- a/alembic/versions/ff5701e6b9f5_add_groundwater_level_accuracy_to_.py
+++ b/alembic/versions/ff5701e6b9f5_add_groundwater_level_accuracy_to_.py
@@ -11,7 +11,6 @@
 from alembic import op
 import sqlalchemy as sa
 
-
 # revision identifiers, used by Alembic.
 revision: str = "ff5701e6b9f5"
 down_revision: Union[str, Sequence[str], None] = "7b8c9d0e1f2a"

From d4acffe98411982747f4fd6c8db9be623b1f1425 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:00:22 -0700
Subject: [PATCH 23/29] formatting changes

---
 tests/test_cli_commands.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index c2697601..754f106a 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -184,7 +184,6 @@ def test_water_levels_cli_persists_observations(
                 )
             )
             observations = session.scalars(stmt).all()
-            print(observations)
             assert (
                 len(observations) == 1
             ), "Expected one observation for the uploaded CSV"

From 74a06a43c76f3f7493918f6b29cf24627d6d75e1 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:00:59 -0700
Subject: [PATCH 24/29] fix: move datetime naive-to-aware test to common.py

Both water-level-csv and well-inventory-csv features have the same step
to convert datetime fields from naive to aware and confirm the correct
timezone offset. This commit moves that step implementation to common.py
to avoid code duplication and ensure consistent testing of datetime
handling across both features.
---
 tests/features/data/water-levels.csv       |   4 +-
 tests/features/steps/common.py             |  41 ++++++
 tests/features/steps/water-levels-csv.py   |  85 +++++-------
 tests/features/steps/well-inventory-csv.py | 145 +++++++++++----------
 4 files changed, 153 insertions(+), 122 deletions(-)

diff --git a/tests/features/data/water-levels.csv b/tests/features/data/water-levels.csv
index db510e89..a22f50b7 100644
--- a/tests/features/data/water-levels.csv
+++ b/tests/features/data/water-levels.csv
@@ -1,3 +1,3 @@
 field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
-Alice Lopez,AR0001,2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,45.2,approved,Initial measurement after irrigation shutdown
-Bernardo Chen,AR0002,2025-03-05T09:15:00-07:00,2025-03-05T11:10:00-07:00,Consultant,steel tape,1.8,rising,47.0,provisional,Follow-up visit; pump was off for 24h
+Alice Lopez,AR0001,2025-02-15T08:00:00,2025-02-15T10:30:00,Groundwater Team,electric tape,1.5,stable,45.2,approved,Initial measurement after irrigation shutdown
+Bernardo Chen,AR0002,2025-03-05T09:15:00,2025-03-05T11:10:00,Consultant,steel tape,1.8,rising,47.0,provisional,Follow-up visit; pump was off for 24h
diff --git a/tests/features/steps/common.py b/tests/features/steps/common.py
index 1e67c045..641ccb31 100644
--- a/tests/features/steps/common.py
+++ b/tests/features/steps/common.py
@@ -15,6 +15,7 @@
 # ===============================================================================
 from behave import then, given, when
 from behave.runner import Context
+from datetime import datetime, timedelta
 from starlette.testclient import TestClient
 
 from core.dependencies import (
@@ -25,6 +26,7 @@
     amp_admin_function,
 )
 from core.initializers import register_routes
+from services.util import convert_dt_tz_naive_to_tz_aware
 
 
 @given("a functioning api")
@@ -172,4 +174,43 @@ def step_impl(context: Context):
             assert key in optional_fields, f"Unexpected field found: {key}"
 
 
+@then(
+    "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value."
+)
+def step_impl(context: Context):
+    """
+    In the @given steps that prececeed this step, a list of datetime fields
+    needs to be added to the context object so that they can be checked here. This way
+    we can test datetime fields with different names, such as 'date_time' in well-inventory-csv
+    and `water_level_date_time` in water-level-csv.
+    """
+
+    for i, row in enumerate(context.rows):
+
+        for datetime_field in context.datetime_fields:
+            # Convert date_time field
+            date_time_naive = datetime.fromisoformat(row[datetime_field])
+            print(date_time_naive)
+            date_time_aware = convert_dt_tz_naive_to_tz_aware(
+                date_time_naive, "America/Denver"
+            )
+            row[datetime_field] = date_time_aware.isoformat()
+            # confirm correct time zone and offset
+            if date_time_aware.dst() == timedelta(0):
+                # MST, offset -07:00
+                assert date_time_aware.utcoffset() == timedelta(
+                    hours=-7
+                ), "date_time offset is not -07:00"
+            else:
+                # MDT, offset -06:00
+                assert date_time_aware.utcoffset() == timedelta(
+                    hours=-6
+                ), "date_time offset is not -06:00"
+
+            # confirm the time was not changed from what was provided
+            assert (
+                date_time_aware.replace(tzinfo=None) == date_time_naive
+            ), "date_time value was changed during timezone assignment"
+
+
 # ============= EOF =============================================
diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index c7e462b9..83591325 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -13,17 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from datetime import datetime, timedelta
+from datetime import datetime
 import json
 import tempfile
 from pathlib import Path
 from typing import Any, Dict, Iterable, List
-from zoneinfo import ZoneInfo
 
 from behave import given, when, then
 from behave.runner import Context
 
-from db import Observation, FieldEvent, Sample
+from db import Observation
 from db.engine import session_ctx
 from schemas.water_level_csv import WaterLevelCsvRow
 from services.water_level_csv import bulk_upload_water_levels
@@ -80,7 +79,7 @@ def _serialize_csv(rows: List[Dict[str, Any]], headers: Iterable[str]) -> str:
 
 
 def _write_csv_to_context(context: Context) -> None:
-    csv_text = _serialize_csv(context.csv_rows, context.csv_headers)
+    csv_text = _serialize_csv(context.rows, context.csv_headers)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
     temp_file.write(csv_text.encode("utf-8"))
     temp_file.flush()
@@ -93,7 +92,7 @@ def _write_csv_to_context(context: Context) -> None:
 def _set_rows(
     context: Context, rows: List[Dict[str, str]], headers: List[str] | None = None
 ) -> None:
-    context.csv_rows = rows
+    context.rows = rows
     if headers is not None:
         context.csv_headers = headers
     elif rows:
@@ -103,9 +102,6 @@ def _set_rows(
     _write_csv_to_context(context)
     context.stdout_json = None
 
-    # set context.rows to be all rows and the header for optional step
-    context.rows = rows
-
 
 def _ensure_stdout_json(context: Context) -> Dict[str, Any]:
     if not hasattr(context, "stdout_json") or context.stdout_json is None:
@@ -121,10 +117,18 @@ def step_impl(context: Context):
     rows = _build_valid_rows(context)
     _set_rows(context, rows)
 
+    if (
+        not hasattr(context, "datetime_fields")
+        or "water_level_date_time" not in context.datetime_fields
+    ):
+        context.datetime_fields = ["water_level_date_time"]
+    else:
+        context.datetime_fields.append("water_level_date_time")
+
 
 @given("my CSV file contains multiple rows of water level entry data")
 def step_impl(context: Context):
-    assert len(context.csv_rows) >= 2
+    assert len(context.rows) >= 2
 
 
 @given("the water level CSV includes required fields:")
@@ -141,7 +145,7 @@ def step_impl(context: Context):
 @given('each "well_name_point_id" value matches an existing well')
 def step_impl(context: Context):
     available = set(_available_well_names(context))
-    for row in context.csv_rows:
+    for row in context.rows:
         assert (
             row["well_name_point_id"] in available
         ), f"Unknown well identifier {row['well_name_point_id']}"
@@ -151,7 +155,7 @@ def step_impl(context: Context):
     '"water_level_date_time" values are valid ISO 8601 timezone-naive datetime strings (e.g. "2025-02-15T10:30:00")'
 )
 def step_impl(context: Context):
-    for row in context.csv_rows:
+    for row in context.rows:
         assert row["water_level_date_time"].startswith("2025-02")
         assert "T" in row["water_level_date_time"]
         dt_naive = datetime.strptime(row["water_level_date_time"], "%Y-%m-%dT%H:%M:%S")
@@ -181,49 +185,6 @@ def step_impl(context: Context):
     context.stdout_json = None
 
 
-@then(
-    "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value. "
-)
-def step_impl(context: Context):
-    with session_ctx() as session:
-        for field in ["field_event_date_time", "water_level_date_time"]:
-            for i, row in enumerate(context.csv_rows):
-                dt_str = row[field]
-                dt_naive = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S")
-                tz = ZoneInfo("America/Denver")
-                dt_aware = dt_naive.replace(tzinfo=tz)
-
-                if field == "field_event_date_time":
-                    field_event = session.query(FieldEvent).one_or_none(
-                        FieldEvent.id
-                        == context.cli_result.payload.water_levels[i].field_event_id
-                    )
-                    assert (
-                        field_event.event_date == dt_aware
-                    ), f"Expected {dt_aware} but got {field_event.event_date} for row {i+1}"
-                    assert field_event.utcoffset() == timedelta(hours=-7)
-                else:
-                    observation = session.query(Observation).one_or_none(
-                        Observation.id
-                        == context.cli_result.payload.water_levels[i].observation_id
-                    )
-                    assert (
-                        observation.observation_datetime == dt_aware
-                    ), f"Expected {dt_aware} but got {observation.observation_datetime} for row {i+1}"
-                    assert observation.observation_datetime.utcoffset() == timedelta(
-                        hours=-7
-                    )
-
-                    sample = session.query(Sample).one_or_none(
-                        Sample.id
-                        == context.cli_result.payload.water_levels[i].sample_id
-                    )
-                    assert (
-                        sample.sample_date == dt_aware
-                    ), f"Expected {dt_aware} but got {sample.sample_date} for row {i+1}"
-                    assert sample.sample_date.utcoffset() == timedelta(hours=-7)
-
-
 @then("the command exits with code 0")
 def step_impl(context: Context):
     assert context.cli_result.exit_code == 0, context.cli_result.stderr
@@ -278,6 +239,14 @@ def step_impl(context: Context):
     _set_rows(context, rows, headers=headers)
     assert headers != list(rows[0].keys())
 
+    if (
+        not hasattr(context, "datetime_fields")
+        or "water_level_date_time" not in context.datetime_fields
+    ):
+        context.datetime_fields = ["water_level_date_time"]
+    else:
+        context.datetime_fields.append("water_level_date_time")
+
 
 @then("all water level entries are imported")
 def step_impl(context: Context):
@@ -299,6 +268,14 @@ def step_impl(context: Context):
     _set_rows(context, rows, headers=headers)
     assert "custom_note" in context.csv_headers
 
+    if (
+        not hasattr(context, "datetime_fields")
+        or "water_level_date_time" not in context.datetime_fields
+    ):
+        context.datetime_fields = ["water_level_date_time"]
+    else:
+        context.datetime_fields.append("water_level_date_time")
+
 
 # ============================================================================
 # Scenario: No entries imported when any row fails validation
diff --git a/tests/features/steps/well-inventory-csv.py b/tests/features/steps/well-inventory-csv.py
index 31f87d54..1d38fe86 100644
--- a/tests/features/steps/well-inventory-csv.py
+++ b/tests/features/steps/well-inventory-csv.py
@@ -1,10 +1,8 @@
-from datetime import datetime, timedelta
+from datetime import datetime
 
 from behave import given, when, then
 from behave.runner import Context
 
-from services.util import convert_dt_tz_naive_to_tz_aware
-
 
 @given("valid lexicon values exist for:")
 def step_impl_valid_lexicon_values(context: Context):
@@ -48,23 +46,38 @@ def step_impl(context: Context):
         except ValueError as e:
             raise ValueError(f"Invalid date_time: {row['date_time']}") from e
 
+    if (
+        not hasattr(context, "datetime_fields")
+        or "date_time" not in context.datetime_fields
+    ):
+        context.datetime_fields = ["date_time"]
+    else:
+        context.datetime_fields.append("date_time")
+
 
+# TODO: implement when optional water levels are added to the well inventory csv testing data
 @given(
     'the optional "water_level_date_time" values are valid ISO 8601 timezone-naive datetime strings (e.g. "2025-02-15T10:30:00") when provided'
 )
 def step_impl(context: Context):
     """Verifies that "water_level_date_time" values are valid ISO 8601 timezone-naive datetime strings."""
-    for row in context.rows:
-        if row.get("water_level_date_time", None):
-            try:
-                date_time = datetime.fromisoformat(row["water_level_date_time"])
-                assert (
-                    date_time.tzinfo is None
-                ), f"water_level_date_time should be timezone-naive: {row['water_level_date_time']}"
-            except ValueError as e:
-                raise ValueError(
-                    f"Invalid water_level_date_time: {row['water_level_date_time']}"
-                ) from e
+    pass
+    # for row in context.rows:
+    #     if row.get("water_level_date_time", None):
+    #         try:
+    #             date_time = datetime.fromisoformat(row["water_level_date_time"])
+    #             assert (
+    #                 date_time.tzinfo is None
+    #             ), f"water_level_date_time should be timezone-naive: {row['water_level_date_time']}"
+    #         except ValueError as e:
+    #             raise ValueError(
+    #                 f"Invalid water_level_date_time: {row['water_level_date_time']}"
+    #             ) from e
+
+    # if not hasattr(context, "datetime_fields") or "water_level_date_time" not in context.datetime_fields:
+    #     context.datetime_fields = ["water_level_date_time"]
+    # else:
+    #     context.datetime_fields.append("water_level_date_time")
 
 
 @when("I upload the file to the bulk upload endpoint")
@@ -75,58 +88,58 @@ def step_impl(context: Context):
     )
 
 
-@then(
-    "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value."
-)
-def step_impl(context: Context):
-    """Converts all datetime strings in the CSV rows to timezone-aware datetime objects with Mountain Time offset."""
-    for i, row in enumerate(context.rows):
-        # Convert date_time field
-        date_time_naive = datetime.fromisoformat(row["date_time"])
-        date_time_aware = convert_dt_tz_naive_to_tz_aware(
-            date_time_naive, "America/Denver"
-        )
-        row["date_time"] = date_time_aware.isoformat()
-
-        # confirm correct time zone and offset
-        if i == 0:
-            # MST, offset -07:00
-            assert date_time_aware.utcoffset() == timedelta(
-                hours=-7
-            ), "date_time offset is not -07:00"
-        else:
-            # MDT, offset -06:00
-            assert date_time_aware.utcoffset() == timedelta(
-                hours=-6
-            ), "date_time offset is not -06:00"
-
-        # confirm the time was not changed from what was provided
-        assert (
-            date_time_aware.replace(tzinfo=None) == date_time_naive
-        ), "date_time value was changed during timezone assignment"
-
-        # Convert water_level_date_time field if it exists
-        if row.get("water_level_date_time", None):
-            wl_date_time_naive = datetime.fromisoformat(row["water_level_date_time"])
-            wl_date_time_aware = convert_dt_tz_naive_to_tz_aware(
-                wl_date_time_naive, "America/Denver"
-            )
-            row["water_level_date_time"] = wl_date_time_aware.isoformat()
-
-            if wl_date_time_aware.dst():
-                # MDT, offset -06:00
-                assert wl_date_time_aware.utcoffset() == timedelta(
-                    hours=-6
-                ), "water_level_date_time offset is not -06:00"
-            else:
-                # MST, offset -07:00
-                assert wl_date_time_aware.utcoffset() == timedelta(
-                    hours=-7
-                ), "water_level_date_time offset is not -07:00"
-
-            assert (
-                wl_date_time_aware.replace(tzinfo=None) == wl_date_time_naive
-            ), "water_level_date_time value was changed during timezone assignment"
+# @then(
+#     "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value."
+# )
+# def step_impl(context: Context):
+#     """Converts all datetime strings in the CSV rows to timezone-aware datetime objects with Mountain Time offset."""
+#     for i, row in enumerate(context.rows):
+#         # Convert date_time field
+#         date_time_naive = datetime.fromisoformat(row["date_time"])
+#         date_time_aware = convert_dt_tz_naive_to_tz_aware(
+#             date_time_naive, "America/Denver"
+#         )
+#         row["date_time"] = date_time_aware.isoformat()
+
+#         # confirm correct time zone and offset
+#         if i == 0:
+#             # MST, offset -07:00
+#             assert date_time_aware.utcoffset() == timedelta(
+#                 hours=-7
+#             ), "date_time offset is not -07:00"
+#         else:
+#             # MDT, offset -06:00
+#             assert date_time_aware.utcoffset() == timedelta(
+#                 hours=-6
+#             ), "date_time offset is not -06:00"
+
+#         # confirm the time was not changed from what was provided
+#         assert (
+#             date_time_aware.replace(tzinfo=None) == date_time_naive
+#         ), "date_time value was changed during timezone assignment"
+
+#         # Convert water_level_date_time field if it exists
+#         if row.get("water_level_date_time", None):
+#             wl_date_time_naive = datetime.fromisoformat(row["water_level_date_time"])
+#             wl_date_time_aware = convert_dt_tz_naive_to_tz_aware(
+#                 wl_date_time_naive, "America/Denver"
+#             )
+#             row["water_level_date_time"] = wl_date_time_aware.isoformat()
+
+#             if wl_date_time_aware.dst():
+#                 # MDT, offset -06:00
+#                 assert wl_date_time_aware.utcoffset() == timedelta(
+#                     hours=-6
+#                 ), "water_level_date_time offset is not -06:00"
+#             else:
+#                 # MST, offset -07:00
+#                 assert wl_date_time_aware.utcoffset() == timedelta(
+#                     hours=-7
+#                 ), "water_level_date_time offset is not -07:00"
+
+#             assert (
+#                 wl_date_time_aware.replace(tzinfo=None) == wl_date_time_naive
+#             ), "water_level_date_time value was changed during timezone assignment"
 
 
 @then("the response includes a summary containing:")

From a1052a49b9f3323f877953dbd499bf9df1e932ce Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:10:17 -0700
Subject: [PATCH 25/29] feat: test invalid "measuring_person" and standardize
 test verbiage

This commit adds a feature test for invalid "measuring_person" values in
the water level csv upload. It also uses standard verbiage for the feature
test so that the same step can be reused.
---
 tests/features/steps/water-levels-csv.py | 13 +++++++++++++
 tests/features/water-level-csv.feature   |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/features/steps/water-levels-csv.py b/tests/features/steps/water-levels-csv.py
index 83591325..eb840292 100644
--- a/tests/features/steps/water-levels-csv.py
+++ b/tests/features/steps/water-levels-csv.py
@@ -382,4 +382,17 @@ def step_impl(context: Context):
     ]
 
 
+# ============================================================================
+# Scenario: Upload fails when "measuring_person" does not match "field_staff," "field_staff_2," or "field_staff_3"
+# ============================================================================
+@given(
+    'my CSV file contains a "measuring_person" value that does not match any of the provided "field_staff" values'
+)
+def step_impl(context: Context):
+    rows = _build_valid_rows(context, count=1)
+    rows[0]["measuring_person"] = "Unknown Person"
+    _set_rows(context, rows)
+    context.invalid_fields = ["measuring_person"]
+
+
 # ============= EOF =============================================
diff --git a/tests/features/water-level-csv.feature b/tests/features/water-level-csv.feature
index 063f3c2e..9ac74e64 100644
--- a/tests/features/water-level-csv.feature
+++ b/tests/features/water-level-csv.feature
@@ -173,5 +173,5 @@ Feature: Bulk upload water level entries from CSV via CLI
       oco water-levels bulk-upload --file ./water_levels.csv
       """
     Then the command exits with a non-zero exit code
-    And stderr should contain a validation error for the "measuring_person" field
+    And stderr should contain validation errors identifying the invalid field and row
     And no water level entries are imported
\ No newline at end of file

From 6aced6ce2622f445b341cb18c8c4d8ef2b2edd6c Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:36:25 -0700
Subject: [PATCH 26/29] fix: remove commented-out code

---
 tests/features/steps/well-inventory-csv.py | 54 ----------------------
 1 file changed, 54 deletions(-)

diff --git a/tests/features/steps/well-inventory-csv.py b/tests/features/steps/well-inventory-csv.py
index 1d38fe86..7710500a 100644
--- a/tests/features/steps/well-inventory-csv.py
+++ b/tests/features/steps/well-inventory-csv.py
@@ -88,60 +88,6 @@ def step_impl(context: Context):
     )
 
 
-# @then(
-#     "all datetime objects are assigned the correct Mountain Time timezone offset based on the date value."
-# )
-# def step_impl(context: Context):
-#     """Converts all datetime strings in the CSV rows to timezone-aware datetime objects with Mountain Time offset."""
-#     for i, row in enumerate(context.rows):
-#         # Convert date_time field
-#         date_time_naive = datetime.fromisoformat(row["date_time"])
-#         date_time_aware = convert_dt_tz_naive_to_tz_aware(
-#             date_time_naive, "America/Denver"
-#         )
-#         row["date_time"] = date_time_aware.isoformat()
-
-#         # confirm correct time zone and offset
-#         if i == 0:
-#             # MST, offset -07:00
-#             assert date_time_aware.utcoffset() == timedelta(
-#                 hours=-7
-#             ), "date_time offset is not -07:00"
-#         else:
-#             # MDT, offset -06:00
-#             assert date_time_aware.utcoffset() == timedelta(
-#                 hours=-6
-#             ), "date_time offset is not -06:00"
-
-#         # confirm the time was not changed from what was provided
-#         assert (
-#             date_time_aware.replace(tzinfo=None) == date_time_naive
-#         ), "date_time value was changed during timezone assignment"
-
-#         # Convert water_level_date_time field if it exists
-#         if row.get("water_level_date_time", None):
-#             wl_date_time_naive = datetime.fromisoformat(row["water_level_date_time"])
-#             wl_date_time_aware = convert_dt_tz_naive_to_tz_aware(
-#                 wl_date_time_naive, "America/Denver"
-#             )
-#             row["water_level_date_time"] = wl_date_time_aware.isoformat()
-
-#             if wl_date_time_aware.dst():
-#                 # MDT, offset -06:00
-#                 assert wl_date_time_aware.utcoffset() == timedelta(
-#                     hours=-6
-#                 ), "water_level_date_time offset is not -06:00"
-#             else:
-#                 # MST, offset -07:00
-#                 assert wl_date_time_aware.utcoffset() == timedelta(
-#                     hours=-7
-#                 ), "water_level_date_time offset is not -07:00"
-
-#             assert (
-#                 wl_date_time_aware.replace(tzinfo=None) == wl_date_time_naive
-#             ), "water_level_date_time value was changed during timezone assignment"
-
-
 @then("the response includes a summary containing:")
 def step_impl(context: Context):
     response_json = context.response.json()

From 360495a67f2d0c85107b23057886bcebeb2bd5ab Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:41:20 -0700
Subject: [PATCH 27/29] fix: remove print debugging statement

---
 tests/features/steps/common.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/features/steps/common.py b/tests/features/steps/common.py
index 641ccb31..d3618a96 100644
--- a/tests/features/steps/common.py
+++ b/tests/features/steps/common.py
@@ -190,7 +190,6 @@ def step_impl(context: Context):
         for datetime_field in context.datetime_fields:
             # Convert date_time field
             date_time_naive = datetime.fromisoformat(row[datetime_field])
-            print(date_time_naive)
             date_time_aware = convert_dt_tz_naive_to_tz_aware(
                 date_time_naive, "America/Denver"
             )

From 7debe732146fa08957eb53e89f35705eab9c8854 Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Thu, 5 Feb 2026 10:44:23 -0700
Subject: [PATCH 28/29] fix: fix spelling typo

---
 tests/features/steps/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/features/steps/common.py b/tests/features/steps/common.py
index d3618a96..c3016de0 100644
--- a/tests/features/steps/common.py
+++ b/tests/features/steps/common.py
@@ -179,7 +179,7 @@ def step_impl(context: Context):
 )
 def step_impl(context: Context):
     """
-    In the @given steps that prececeed this step, a list of datetime fields
+    In the @given steps that precede this step, a list of datetime fields
     needs to be added to the context object so that they can be checked here. This way
     we can test datetime fields with different names, such as 'date_time' in well-inventory-csv
     and `water_level_date_time` in water-level-csv.

From 077b9da570835947dd128069c3db895962412fec Mon Sep 17 00:00:00 2001
From: jacob-a-brown <brown.jacob.alexander@gmail.com>
Date: Fri, 6 Feb 2026 15:37:47 -0700
Subject: [PATCH 29/29] fix: use 'groundwater' for 'groundwater level' matrix

---
 core/parameter.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/parameter.json b/core/parameter.json
index b65bd042..5d9b7db7 100644
--- a/core/parameter.json
+++ b/core/parameter.json
@@ -1,14 +1,14 @@
 [
     {
         "parameter_name": "groundwater level",
-        "matrix": "water",
+        "matrix": "groundwater",
         "parameter_type": "Field Parameter",
         "cas_number": null,
         "default_unit": "ft"
     },
     {
         "parameter_name": "pH",
-        "matrix": "water",
+        "matrix": "groundwater",
         "parameter_type": "Field Parameter",
         "cas_number": null,
         "default_unit": "dimensionless"