diff --git a/admin/views/radionuclides.py b/admin/views/radionuclides.py index f1bd2799..27c240ae 100644 --- a/admin/views/radionuclides.py +++ b/admin/views/radionuclides.py @@ -63,7 +63,6 @@ def can_delete(self, request: Request) -> bool: "chemistry_sample_info_id", "nma_sample_pt_id", "nma_sample_point_id", - "thing_id", "analyte", "symbol", "sample_value", @@ -85,7 +84,6 @@ def can_delete(self, request: Request) -> bool: "chemistry_sample_info_id", "nma_sample_pt_id", "nma_sample_point_id", - "thing_id", "analyte", "symbol", "sample_value", @@ -127,7 +125,6 @@ def can_delete(self, request: Request) -> bool: "chemistry_sample_info_id", "nma_sample_pt_id", "nma_sample_point_id", - "thing_id", "analyte", "symbol", "sample_value", @@ -149,7 +146,6 @@ def can_delete(self, request: Request) -> bool: "chemistry_sample_info_id": "Chemistry Sample Info ID", "nma_sample_pt_id": "NMA SamplePtID (Legacy)", "nma_sample_point_id": "NMA SamplePointID (Legacy)", - "thing_id": "Thing ID", "analyte": "Analyte", "symbol": "Symbol", "sample_value": "Sample Value", diff --git a/alembic/versions/c7f8a9b0c1d2_add_thing_id_to_nma_surface_water_data.py b/alembic/versions/c7f8a9b0c1d2_add_thing_id_to_nma_surface_water_data.py new file mode 100644 index 00000000..8a359768 --- /dev/null +++ b/alembic/versions/c7f8a9b0c1d2_add_thing_id_to_nma_surface_water_data.py @@ -0,0 +1,58 @@ +"""add thing_id to NMA_SurfaceWaterData + +Revision ID: c7f8a9b0c1d2 +Revises: d9f1e2c3b4a5 +Create Date: 2026-02-04 12:03:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "c7f8a9b0c1d2" +down_revision: Union[str, Sequence[str], None] = "d9f1e2c3b4a5" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.add_column( + "NMA_SurfaceWaterData", + sa.Column("thing_id", sa.Integer(), nullable=True), + ) + op.create_foreign_key( + "fk_surface_water_data_thing_id", + "NMA_SurfaceWaterData", + "thing", + ["thing_id"], + ["id"], + ondelete="CASCADE", + ) + # Backfill thing_id based on LocationId -> Thing.nma_pk_location + op.execute(""" + UPDATE "NMA_SurfaceWaterData" sw + SET thing_id = t.id + FROM thing t + WHERE t.nma_pk_location IS NOT NULL + AND sw."LocationId" IS NOT NULL + AND t.nma_pk_location = sw."LocationId"::text + """) + # Remove any rows that cannot be linked to a Thing, then enforce NOT NULL + op.execute('DELETE FROM "NMA_SurfaceWaterData" WHERE thing_id IS NULL') + op.alter_column( + "NMA_SurfaceWaterData", "thing_id", existing_type=sa.Integer(), nullable=False + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_constraint( + "fk_surface_water_data_thing_id", + "NMA_SurfaceWaterData", + type_="foreignkey", + ) + op.drop_column("NMA_SurfaceWaterData", "thing_id") diff --git a/alembic/versions/d9f1e2c3b4a5_drop_thing_id_from_nma_radionuclides.py b/alembic/versions/d9f1e2c3b4a5_drop_thing_id_from_nma_radionuclides.py new file mode 100644 index 00000000..3ace8f52 --- /dev/null +++ b/alembic/versions/d9f1e2c3b4a5_drop_thing_id_from_nma_radionuclides.py @@ -0,0 +1,60 @@ +"""Drop thing_id from NMA_Radionuclides + +Revision ID: d9f1e2c3b4a5 +Revises: 71a4c6b3d2e8 +Create Date: 2026-02-04 15:32:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "d9f1e2c3b4a5" +down_revision: Union[str, Sequence[str], None] = "71a4c6b3d2e8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _drop_thing_id_fk_and_indexes(inspector) -> None: + fks = inspector.get_foreign_keys("NMA_Radionuclides") + for fk in fks: + if "thing_id" in (fk.get("constrained_columns") or []): + op.drop_constraint(fk["name"], "NMA_Radionuclides", type_="foreignkey") + + indexes = inspector.get_indexes("NMA_Radionuclides") + for idx in indexes: + if "thing_id" in (idx.get("column_names") or []): + op.drop_index(idx["name"], table_name="NMA_Radionuclides") + + +def upgrade() -> None: + """Upgrade schema.""" + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = [col["name"] for col in inspector.get_columns("NMA_Radionuclides")] + if "thing_id" in columns: + _drop_thing_id_fk_and_indexes(inspector) + op.drop_column("NMA_Radionuclides", "thing_id") + + +def downgrade() -> None: + """Downgrade schema.""" + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = [col["name"] for col in inspector.get_columns("NMA_Radionuclides")] + if "thing_id" not in columns: + op.add_column( + "NMA_Radionuclides", + sa.Column("thing_id", sa.Integer(), nullable=True), + ) + op.create_foreign_key( + "fk_nma_radionuclides_thing_id", + "NMA_Radionuclides", + "thing", + ["thing_id"], + ["id"], + ondelete="CASCADE", + ) diff --git a/db/nma_legacy.py b/db/nma_legacy.py index 4b32fd06..557c415a 100644 --- a/db/nma_legacy.py +++ b/db/nma_legacy.py @@ -80,23 +80,30 @@ class NMA_WaterLevelsContinuous_Pressure_Daily(Base): data and mirrors the original column names/types closely so transfer scripts can operate without further schema mapping. - Note: This table is OUT OF SCOPE for the UUID->Integer PK refactoring since - it's not a Thing child table. """ __tablename__ = "NMA_WaterLevelsContinuous_Pressure_Daily" + # PK global_id: Mapped[uuid.UUID] = mapped_column( "GlobalID", UUID(as_uuid=True), primary_key=True ) - object_id: Mapped[Optional[int]] = mapped_column( - "OBJECTID", Integer, autoincrement=True - ) - well_id: Mapped[Optional[uuid.UUID]] = mapped_column("WellID", UUID(as_uuid=True)) + # FK to Thing table - required for all WaterLevelsContinuous_Pressure_Daily records thing_id: Mapped[int] = mapped_column( Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False ) + + # Legacy PK + # Current `global_id` is also the original PK in the legacy DB + + # Legacy FK (not officially assigned as FK in legacy DB, but was used to link to wells) + well_id: Mapped[Optional[uuid.UUID]] = mapped_column("WellID", UUID(as_uuid=True)) + + # Additional columns + object_id: Mapped[Optional[int]] = mapped_column( + "OBJECTID", Integer, autoincrement=True + ) point_id: Mapped[Optional[str]] = mapped_column("PointID", String(50)) date_measured: Mapped[datetime] = mapped_column( "DateMeasured", DateTime, nullable=False @@ -143,7 +150,19 @@ class NMA_view_NGWMN_WellConstruction(Base): __tablename__ = "NMA_view_NGWMN_WellConstruction" + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # FK + # FK is undefined, but not needed for view tables such as this. + + # Legacy PK (for audit) + # Legacy PK does not exist. This is expected for view tables such as this + + # Legacy FK (for audit) + # Legacy FK does not exist. This is expected for view tables such as this. + + # Additional columns point_id: Mapped[str] = mapped_column("PointID", String(50)) casing_top: Mapped[Optional[float]] = mapped_column("CasingTop", Float) casing_bottom: Mapped[Optional[float]] = mapped_column("CasingBottom", Float) @@ -172,8 +191,20 @@ class NMA_view_NGWMN_WaterLevels(Base): __tablename__ = "NMA_view_NGWMN_WaterLevels" + # PK point_id: Mapped[str] = mapped_column("PointID", String(50), primary_key=True) date_measured: Mapped[date] = mapped_column("DateMeasured", Date, primary_key=True) + + # FK + # FK is undefined, but not needed for view tables such as this. + + # Legacy PK (for audit) + # Legacy PK does not exist. This is expected for view tables such as this + + # Legacy FK (for audit) + # Legacy FK does not exist. This is expected for view tables such as this. + + # Additional columns depth_to_water_bgs: Mapped[Optional[float]] = mapped_column( "DepthToWaterBGS", Float ) @@ -194,7 +225,19 @@ class NMA_view_NGWMN_Lithology(Base): __tablename__ = "NMA_view_NGWMN_Lithology" + # PK object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True) + + # FK + # FK is undefined, but not needed for view tables such as this. + + # Legacy PK (for audit) + # Legacy PK does not exist. This is expected for view tables such as this + + # Legacy FK (for audit) + # Legacy FK does not exist. This is expected for view tables such as this. + + # Additional columns point_id: Mapped[str] = mapped_column("PointID", String(50)) lithology: Mapped[Optional[str]] = mapped_column("Lithology", String(50)) term: Mapped[Optional[str]] = mapped_column("TERM", String(100)) @@ -221,29 +264,30 @@ class NMA_HydraulicsData(Base): __tablename__ = "NMA_HydraulicsData" - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) + # FK to Thing - required for all HydraulicsData records + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False + ) + + # Legacy PK (for audit) nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy FK (for audit) nma_well_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_WellID", UUID(as_uuid=True) ) + + # Additional columns nma_point_id: Mapped[Optional[str]] = mapped_column("nma_PointID", String(50)) nma_object_id: Mapped[Optional[int]] = mapped_column( "nma_OBJECTID", Integer, unique=True ) - - # Data columns data_source: Mapped[Optional[str]] = mapped_column("Data Source", String(255)) - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) - cs_gal_d_ft: Mapped[Optional[float]] = mapped_column("Cs (gal/d/ft)", Float) hd_ft2_d: Mapped[Optional[float]] = mapped_column("HD (ft2/d)", Float) hl_day_1: Mapped[Optional[float]] = mapped_column("HL (day-1)", Float) @@ -270,6 +314,7 @@ class NMA_HydraulicsData(Base): "Hydraulic Remarks", String(200) ) + # Relationships thing: Mapped["Thing"] = relationship("Thing", back_populates="hydraulics_data") @validates("thing_id") @@ -302,28 +347,29 @@ class NMA_Stratigraphy(Base): ), ) - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) + # FK to Thing table - required for all Stratigraphy records + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False + ) + + # Legacy PK (for audit) nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy FK (for audit) nma_well_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_WellID", UUID(as_uuid=True) ) + + # Additional columns nma_point_id: Mapped[str] = mapped_column("nma_PointID", String(10), nullable=False) nma_object_id: Mapped[Optional[int]] = mapped_column( "nma_OBJECTID", Integer, unique=True ) - - # FK to Thing - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) - strat_top: Mapped[int] = mapped_column("StratTop", SmallInteger, nullable=False) strat_bottom: Mapped[int] = mapped_column( "StratBottom", SmallInteger, nullable=False @@ -370,15 +416,25 @@ class NMA_Chemistry_SampleInfo(Base): __tablename__ = "NMA_Chemistry_SampleInfo" - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) + # FK to Thing - required for all ChemistrySampleInfo records + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False + ) + + # Legacy PK (for audit) nma_sample_pt_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_SamplePtID", UUID(as_uuid=True), unique=True, nullable=True ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy FK (for audit) + nma_location_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "nma_LocationId", UUID(as_uuid=True) + ) + + # Additional columns nma_wclab_id: Mapped[Optional[str]] = mapped_column("nma_WCLab_ID", String(18)) nma_sample_point_id: Mapped[str] = mapped_column( "nma_SamplePointID", String(10), nullable=False @@ -386,16 +442,6 @@ class NMA_Chemistry_SampleInfo(Base): nma_object_id: Mapped[Optional[int]] = mapped_column( "nma_OBJECTID", Integer, unique=True ) - # Legacy LocationId UUID - kept for audit trail - nma_location_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "nma_LocationId", UUID(as_uuid=True) - ) - - # FK to Thing - required for all ChemistrySampleInfo records - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) - collection_date: Mapped[Optional[datetime]] = mapped_column( "CollectionDate", DateTime ) @@ -481,29 +527,33 @@ class NMA_AssociatedData(Base): __tablename__ = "NMA_AssociatedData" - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) + # FK to Thing - required for all AssociatedData records + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False + ) + + # Legacy PK (for audit) nma_assoc_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_AssocID", UUID(as_uuid=True), unique=True, nullable=True ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy FK (for audit) nma_location_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_LocationId", UUID(as_uuid=True), unique=True ) + + # Additional columns nma_point_id: Mapped[Optional[str]] = mapped_column("nma_PointID", String(10)) nma_object_id: Mapped[Optional[int]] = mapped_column( "nma_OBJECTID", Integer, unique=True ) - notes: Mapped[Optional[str]] = mapped_column("Notes", String(255)) formation: Mapped[Optional[str]] = mapped_column("Formation", String(15)) - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) + # Relationships thing: Mapped["Thing"] = relationship("Thing", back_populates="associated_data") @validates("thing_id") @@ -520,20 +570,32 @@ class NMA_SurfaceWaterData(Base): """ Legacy SurfaceWaterData table from AMPAPI. - Note: This table is OUT OF SCOPE for refactoring (not a Thing child). + Note: This table is a Thing child (linked via LocationId -> Thing.nma_pk_location). """ __tablename__ = "NMA_SurfaceWaterData" - location_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "LocationId", UUID(as_uuid=True) + # PK + object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True) + + # FK + # FK to Thing - required for all SurfaceWaterData records + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False ) + + # Legacy PK (for audit) surface_id: Mapped[uuid.UUID] = mapped_column( "SurfaceID", UUID(as_uuid=True), nullable=False ) - point_id: Mapped[str] = mapped_column("PointID", String(10)) - object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True) + # Legacy FK (for audit) + location_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "LocationId", UUID(as_uuid=True) + ) + + # Additional columns + point_id: Mapped[str] = mapped_column("PointID", String(10)) discharge: Mapped[Optional[str]] = mapped_column("Discharge", String(50)) discharge_method: Mapped[Optional[str]] = mapped_column( "DischargeMethod", String(50) @@ -553,6 +615,18 @@ class NMA_SurfaceWaterData(Base): source_notes: Mapped[Optional[str]] = mapped_column("SourceNotes", String(200)) data_source: Mapped[Optional[str]] = mapped_column("DataSource", String(255)) + # Relationships + thing: Mapped["Thing"] = relationship("Thing", back_populates="surface_water_data") + + @validates("thing_id") + def validate_thing_id(self, key, value): + """Prevent orphan NMA_SurfaceWaterData - must have a parent Thing.""" + if value is None: + raise ValueError( + "NMA_SurfaceWaterData requires a parent Thing (thing_id cannot be None)" + ) + return value + class NMA_SurfaceWaterPhotos(Base): """ @@ -563,15 +637,26 @@ class NMA_SurfaceWaterPhotos(Base): __tablename__ = "NMA_SurfaceWaterPhotos" + # PK + global_id: Mapped[uuid.UUID] = mapped_column( + "GlobalID", UUID(as_uuid=True), primary_key=True + ) + + # FK + # FK not assigned. + + # Legacy PK (for audit) + # Current `global_id` is also the original PK in the legacy DB + + # Legacy FK (for audit) surface_id: Mapped[Optional[uuid.UUID]] = mapped_column( "SurfaceID", UUID(as_uuid=True) ) + + # Additional columns point_id: Mapped[str] = mapped_column("PointID", String(50), nullable=False) ole_path: Mapped[Optional[str]] = mapped_column("OLEPath", String(50)) object_id: Mapped[Optional[int]] = mapped_column("OBJECTID", Integer, unique=True) - global_id: Mapped[uuid.UUID] = mapped_column( - "GlobalID", UUID(as_uuid=True), primary_key=True - ) class NMA_WeatherData(Base): @@ -583,14 +668,24 @@ class NMA_WeatherData(Base): __tablename__ = "NMA_WeatherData" + # PK + object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True) + + # FK + # FK not assigned. + + # Legacy PK (for audit) + weather_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "WeatherID", UUID(as_uuid=True) + ) + + # Legacy FK (for audit) location_id: Mapped[Optional[uuid.UUID]] = mapped_column( "LocationId", UUID(as_uuid=True) ) + + # Additional columns point_id: Mapped[str] = mapped_column("PointID", String(10)) - weather_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "WeatherID", UUID(as_uuid=True) - ) - object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True) class NMA_WeatherPhotos(Base): @@ -602,15 +697,26 @@ class NMA_WeatherPhotos(Base): __tablename__ = "NMA_WeatherPhotos" + # PK: + global_id: Mapped[uuid.UUID] = mapped_column( + "GlobalID", UUID(as_uuid=True), primary_key=True + ) + + # FK: + # FK not assigned. + + # Legacy PK (for audit): + # Current `global_id` is also the original PK in the legacy DB + + # Legacy FK (for audit): weather_id: Mapped[Optional[uuid.UUID]] = mapped_column( "WeatherID", UUID(as_uuid=True) ) + + # Additional columns point_id: Mapped[str] = mapped_column("PointID", String(50), nullable=False) ole_path: Mapped[Optional[str]] = mapped_column("OLEPath", String(50)) object_id: Mapped[Optional[int]] = mapped_column("OBJECTID", Integer, unique=True) - global_id: Mapped[uuid.UUID] = mapped_column( - "GlobalID", UUID(as_uuid=True), primary_key=True - ) class NMA_Soil_Rock_Results(Base): @@ -623,17 +729,28 @@ class NMA_Soil_Rock_Results(Base): __tablename__ = "NMA_Soil_Rock_Results" + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # FK to Thing + thing_id: Mapped[int] = mapped_column( + Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False + ) + + # Legacy PK (for audit) + # Legacy PK does not exist. + + # Legacy FK (for audit) (not officially assigned as FK in legacy DB, but was used to link to wells) nma_point_id: Mapped[Optional[str]] = mapped_column("nma_Point_ID", String(255)) + + # Additional columns sample_type: Mapped[Optional[str]] = mapped_column("Sample Type", String(255)) date_sampled: Mapped[Optional[str]] = mapped_column("Date Sampled", String(255)) d13c: Mapped[Optional[float]] = mapped_column("d13C", Float) d18o: Mapped[Optional[float]] = mapped_column("d18O", Float) sampled_by: Mapped[Optional[str]] = mapped_column("Sampled by", String(255)) - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) + # Relationships thing: Mapped["Thing"] = relationship("Thing", back_populates="soil_rock_results") @validates("thing_id") @@ -669,27 +786,27 @@ class NMA_MinorTraceChemistry(Base): ), ) - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) - nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True - ) - - # New Integer FK to ChemistrySampleInfo + # FK to ChemistrySampleInfo table - required for all MinorTraceChemistry records chemistry_sample_info_id: Mapped[int] = mapped_column( Integer, ForeignKey("NMA_Chemistry_SampleInfo.id", ondelete="CASCADE"), nullable=False, ) - # Legacy UUID FK (for audit) + # Legacy PK (for audit) + nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True + ) + + # Legacy FK (for audit) nma_chemistry_sample_info_uuid: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_chemistry_sample_info_uuid", UUID(as_uuid=True), nullable=True ) - # Legacy columns (sizes match database schema) + # Additional columns analyte: Mapped[Optional[str]] = mapped_column("analyte", String(50)) symbol: Mapped[Optional[str]] = mapped_column("symbol", String(10)) sample_value: Mapped[Optional[float]] = mapped_column("sample_value", Float) @@ -712,6 +829,14 @@ class NMA_MinorTraceChemistry(Base): "NMA_Chemistry_SampleInfo", back_populates="minor_trace_chemistries" ) + @validates("chemistry_sample_info_id") + def validate_chemistry_sample_info_id(self, key, value): + if value is None: + raise ValueError( + "NMA_MinorTraceChemistry requires a chemistry_sample_info_id" + ) + return value + class NMA_Radionuclides(Base): """ @@ -729,30 +854,27 @@ class NMA_Radionuclides(Base): __tablename__ = "NMA_Radionuclides" - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) - nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True - ) - - # FK to Thing - thing_id: Mapped[int] = mapped_column( - Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False - ) - - # New Integer FK to ChemistrySampleInfo + # FK to ChemistrySampleInfo table - required for all Radionuclides records chemistry_sample_info_id: Mapped[int] = mapped_column( Integer, ForeignKey("NMA_Chemistry_SampleInfo.id", ondelete="CASCADE"), nullable=False, ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy PK (for audit) + nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True + ) + + # Legacy FK (for audit) nma_sample_pt_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_SamplePtID", UUID(as_uuid=True), nullable=True ) + + # Additional columns nma_sample_point_id: Mapped[Optional[str]] = mapped_column( "nma_SamplePointID", String(10) ) @@ -760,8 +882,6 @@ class NMA_Radionuclides(Base): "nma_OBJECTID", Integer, unique=True ) nma_wclab_id: Mapped[Optional[str]] = mapped_column("nma_WCLab_ID", String(25)) - - # Data columns analyte: Mapped[Optional[str]] = mapped_column("Analyte", String(50)) symbol: Mapped[Optional[str]] = mapped_column("Symbol", String(50)) sample_value: Mapped[Optional[float]] = mapped_column( @@ -782,20 +902,10 @@ class NMA_Radionuclides(Base): volume_unit: Mapped[Optional[str]] = mapped_column("VolumeUnit", String(50)) analyses_agency: Mapped[Optional[str]] = mapped_column("AnalysesAgency", String(50)) - thing: Mapped["Thing"] = relationship("Thing", back_populates="radionuclides") + # Relationships chemistry_sample_info: Mapped["NMA_Chemistry_SampleInfo"] = relationship( "NMA_Chemistry_SampleInfo", back_populates="radionuclides" ) - thing: Mapped["Thing"] = relationship("Thing") - - @validates("thing_id") - def validate_thing_id(self, key, value): - """Prevent orphan NMA_Radionuclides - must have a parent Thing.""" - if value is None: - raise ValueError( - "NMA_Radionuclides requires a parent Thing (thing_id cannot be None)" - ) - return value @validates("chemistry_sample_info_id") def validate_chemistry_sample_info_id(self, key, value): @@ -820,25 +930,27 @@ class NMA_MajorChemistry(Base): __tablename__ = "NMA_MajorChemistry" - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) - nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True - ) - - # New Integer FK to ChemistrySampleInfo + # FK to ChemistrySampleInfo table - required for all MajorChemistry records chemistry_sample_info_id: Mapped[int] = mapped_column( Integer, ForeignKey("NMA_Chemistry_SampleInfo.id", ondelete="CASCADE"), nullable=False, ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy PK (for audit) + nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True + ) + + # Legacy FK (for audit) nma_sample_pt_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_SamplePtID", UUID(as_uuid=True), nullable=True ) + + # Additional columns nma_sample_point_id: Mapped[Optional[str]] = mapped_column( "nma_SamplePointID", String(10) ) @@ -846,8 +958,6 @@ class NMA_MajorChemistry(Base): "nma_OBJECTID", Integer, unique=True ) nma_wclab_id: Mapped[Optional[str]] = mapped_column("nma_WCLab_ID", String(25)) - - # Data columns analyte: Mapped[Optional[str]] = mapped_column("Analyte", String(50)) symbol: Mapped[Optional[str]] = mapped_column("Symbol", String(50)) sample_value: Mapped[Optional[float]] = mapped_column( @@ -866,6 +976,7 @@ class NMA_MajorChemistry(Base): volume_unit: Mapped[Optional[str]] = mapped_column("VolumeUnit", String(50)) analyses_agency: Mapped[Optional[str]] = mapped_column("AnalysesAgency", String(50)) + # Relationships chemistry_sample_info: Mapped["NMA_Chemistry_SampleInfo"] = relationship( "NMA_Chemistry_SampleInfo", back_populates="major_chemistries" ) @@ -909,15 +1020,10 @@ class NMA_FieldParameters(Base): Index("FieldParameters$nma_OBJECTID", "nma_OBJECTID", unique=True), ) - # New Integer PK + # PK id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Legacy UUID PK (now audit column) - nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( - "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True - ) - - # New Integer FK to ChemistrySampleInfo + # FK to ChemistrySampleInfo table - required for all FieldParameters records chemistry_sample_info_id: Mapped[int] = mapped_column( Integer, ForeignKey( @@ -928,10 +1034,17 @@ class NMA_FieldParameters(Base): nullable=False, ) - # Legacy ID columns (renamed with nma_ prefix) + # Legacy PK (for audit) + nma_global_id: Mapped[Optional[uuid.UUID]] = mapped_column( + "nma_GlobalID", UUID(as_uuid=True), unique=True, nullable=True + ) + + # Legacy FK (for audit) nma_sample_pt_id: Mapped[Optional[uuid.UUID]] = mapped_column( "nma_SamplePtID", UUID(as_uuid=True), nullable=True ) + + # Additional columns nma_sample_point_id: Mapped[Optional[str]] = mapped_column( "nma_SamplePointID", String(10) ) @@ -939,8 +1052,6 @@ class NMA_FieldParameters(Base): "nma_OBJECTID", Integer, Identity(start=1), nullable=False ) nma_wclab_id: Mapped[Optional[str]] = mapped_column("nma_WCLab_ID", String(25)) - - # Data columns field_parameter: Mapped[Optional[str]] = mapped_column("FieldParameter", String(50)) sample_value: Mapped[Optional[float]] = mapped_column( "SampleValue", Float, nullable=True diff --git a/db/thing.py b/db/thing.py index bdfff8e5..fb046d3e 100644 --- a/db/thing.py +++ b/db/thing.py @@ -51,9 +51,9 @@ NMA_AssociatedData, NMA_Chemistry_SampleInfo, NMA_HydraulicsData, - NMA_Radionuclides, NMA_Soil_Rock_Results, NMA_Stratigraphy, + NMA_SurfaceWaterData, NMA_WaterLevelsContinuous_Pressure_Daily, ) @@ -339,14 +339,6 @@ class Thing( passive_deletes=True, ) - # One-To-Many: A Thing can have many NMA_Radionuclides records (legacy NMA data). - radionuclides: Mapped[List["NMA_Radionuclides"]] = relationship( - "NMA_Radionuclides", - back_populates="thing", - cascade="all, delete-orphan", - passive_deletes=True, - ) - # One-To-Many: A Thing can have many NMA_AssociatedData records (legacy NMA data). associated_data: Mapped[List["NMA_AssociatedData"]] = relationship( "NMA_AssociatedData", @@ -370,6 +362,12 @@ class Thing( passive_deletes=True, ) ) + surface_water_data: Mapped[List["NMA_SurfaceWaterData"]] = relationship( + "NMA_SurfaceWaterData", + back_populates="thing", + cascade="all, delete-orphan", + passive_deletes=True, + ) # --- Association Proxies --- assets: AssociationProxy[list["Asset"]] = association_proxy( diff --git a/tests/test_surface_water_data_legacy.py b/tests/test_surface_water_data_legacy.py index d6650c20..3680edb9 100644 --- a/tests/test_surface_water_data_legacy.py +++ b/tests/test_surface_water_data_legacy.py @@ -39,6 +39,7 @@ from uuid import uuid4 from db.engine import session_ctx +from db.thing import Thing from db.nma_legacy import NMA_SurfaceWaterData @@ -47,12 +48,22 @@ def _next_object_id() -> int: return -(uuid4().int % 2_000_000_000) +def _attach_thing_with_location(session, water_well_thing): + location_id = uuid4() + thing = session.get(Thing, water_well_thing.id) + thing.nma_pk_location = str(location_id) + session.commit() + return thing, location_id + + # ===================== CREATE tests ========================== -def test_create_surface_water_data_all_fields(): +def test_create_surface_water_data_all_fields(water_well_thing): """Test creating a surface water data record with all fields.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( - location_id=uuid4(), + location_id=location_id, + thing_id=thing.id, surface_id=uuid4(), point_id="SW-1001", object_id=_next_object_id(), @@ -83,13 +94,16 @@ def test_create_surface_water_data_all_fields(): session.commit() -def test_create_surface_water_data_minimal(): +def test_create_surface_water_data_minimal(water_well_thing): """Test creating a surface water data record with minimal fields.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1002", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) session.add(record) session.commit() @@ -105,13 +119,16 @@ def test_create_surface_water_data_minimal(): # ===================== READ tests ========================== -def test_read_surface_water_data_by_object_id(): +def test_read_surface_water_data_by_object_id(water_well_thing): """Test reading a surface water data record by OBJECTID.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1003", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) session.add(record) session.commit() @@ -125,15 +142,16 @@ def test_read_surface_water_data_by_object_id(): session.commit() -def test_surface_water_data_stores_location_id(): +def test_surface_water_data_stores_location_id(water_well_thing): """Ensure location_id values persist in the legacy model.""" with session_ctx() as session: - location_id = uuid4() + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( location_id=location_id, surface_id=uuid4(), point_id="SW-1010", object_id=_next_object_id(), + thing_id=thing.id, ) session.add(record) session.commit() @@ -146,18 +164,23 @@ def test_surface_water_data_stores_location_id(): session.commit() -def test_query_surface_water_data_by_point_id(): +def test_query_surface_water_data_by_point_id(water_well_thing): """Test querying surface water data by point_id.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record1 = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1004", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) record2 = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1005", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) session.add_all([record1, record2]) session.commit() @@ -176,13 +199,16 @@ def test_query_surface_water_data_by_point_id(): # ===================== UPDATE tests ========================== -def test_update_surface_water_data(): +def test_update_surface_water_data(water_well_thing): """Test updating a surface water data record.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1006", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) session.add(record) session.commit() @@ -200,13 +226,16 @@ def test_update_surface_water_data(): # ===================== DELETE tests ========================== -def test_delete_surface_water_data(): +def test_delete_surface_water_data(water_well_thing): """Test deleting a surface water data record.""" with session_ctx() as session: + thing, location_id = _attach_thing_with_location(session, water_well_thing) record = NMA_SurfaceWaterData( surface_id=uuid4(), point_id="SW-1007", object_id=_next_object_id(), + location_id=location_id, + thing_id=thing.id, ) session.add(record) session.commit() diff --git a/transfers/surface_water_data.py b/transfers/surface_water_data.py index 2d745627..9821bf41 100644 --- a/transfers/surface_water_data.py +++ b/transfers/surface_water_data.py @@ -23,7 +23,8 @@ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session -from db import NMA_SurfaceWaterData +from db import NMA_SurfaceWaterData, Thing +from db.engine import session_ctx from transfers.logger import logger from transfers.transferer import Transferer from transfers.util import read_csv @@ -39,16 +40,43 @@ class SurfaceWaterDataTransferer(Transferer): def __init__(self, *args, batch_size: int = 1000, **kwargs): super().__init__(*args, **kwargs) self.batch_size = batch_size + self._thing_id_by_location_id: dict[str, int] = {} + self._build_thing_id_cache() + + def _build_thing_id_cache(self) -> None: + with session_ctx() as session: + things = session.query(Thing.id, Thing.nma_pk_location).all() + for thing_id, nma_pk_location in things: + if nma_pk_location: + key = self._normalize_location_id(nma_pk_location) + if key: + self._thing_id_by_location_id[key] = thing_id + logger.info( + "Built Thing cache with %s location ids", + len(self._thing_id_by_location_id), + ) def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: df = read_csv(self.source_table, parse_dates=["DateMeasured"]) return df, df def _transfer_hook(self, session: Session) -> None: - rows = self._dedupe_rows( - [self._row_dict(row) for row in self.cleaned_df.to_dict("records")], - key="OBJECTID", - ) + rows: list[dict[str, Any]] = [] + skipped_missing_thing = 0 + for raw in self.cleaned_df.to_dict("records"): + record = self._row_dict(raw) + if record is None: + skipped_missing_thing += 1 + continue + rows.append(record) + + rows = self._dedupe_rows(rows, key="OBJECTID") + + if skipped_missing_thing: + logger.warning( + "Skipped %s SurfaceWaterData rows without matching Thing", + skipped_missing_thing, + ) insert_stmt = insert(NMA_SurfaceWaterData) excluded = insert_stmt.excluded @@ -61,6 +89,7 @@ def _transfer_hook(self, session: Session) -> None: stmt = insert_stmt.values(chunk).on_conflict_do_update( index_elements=["OBJECTID"], set_={ + "thing_id": excluded["thing_id"], "LocationId": excluded.LocationId, "PointID": excluded.PointID, "OBJECTID": excluded.OBJECTID, @@ -82,7 +111,7 @@ def _transfer_hook(self, session: Session) -> None: session.commit() session.expunge_all() - def _row_dict(self, row: dict[str, Any]) -> dict[str, Any]: + def _row_dict(self, row: dict[str, Any]) -> Optional[dict[str, Any]]: def val(key: str) -> Optional[Any]: v = row.get(key) if pd.isna(v): @@ -102,8 +131,17 @@ def to_uuid(v: Any) -> Optional[uuid.UUID]: if hasattr(dt, "to_pydatetime"): dt = dt.to_pydatetime() + location_id = to_uuid(val("LocationId")) + thing_id = self._resolve_thing_id(location_id) + if thing_id is None: + logger.warning( + "Skipping SurfaceWaterData LocationId=%s - Thing not found", + location_id, + ) + return None + return { - "LocationId": to_uuid(val("LocationId")), + "LocationId": location_id, "SurfaceID": to_uuid(val("SurfaceID")), "PointID": val("PointID"), "OBJECTID": val("OBJECTID"), @@ -119,6 +157,7 @@ def to_uuid(v: Any) -> Optional[uuid.UUID]: "AqClass": val("AqClass"), "SourceNotes": val("SourceNotes"), "DataSource": val("DataSource"), + "thing_id": thing_id, } def _dedupe_rows( @@ -138,6 +177,16 @@ def _dedupe_rows( deduped[row_key] = row return list(deduped.values()) + passthrough + def _resolve_thing_id(self, location_id: Optional[uuid.UUID]) -> Optional[int]: + if location_id is None: + return None + key = self._normalize_location_id(str(location_id)) + return self._thing_id_by_location_id.get(key) + + @staticmethod + def _normalize_location_id(value: str) -> str: + return value.strip().lower() + def run(batch_size: int = 1000) -> None: """Entrypoint to execute the transfer.""" diff --git a/transfers/thing_transfer.py b/transfers/thing_transfer.py index 5d4456db..6c78cc8e 100644 --- a/transfers/thing_transfer.py +++ b/transfers/thing_transfer.py @@ -77,6 +77,7 @@ def transfer_thing(session: Session, site_type: str, make_payload, limit=None) - payload = make_payload(row) thing_type = payload.pop("thing_type") + payload["nma_pk_location"] = row.LocationId thing = add_thing(session, payload, thing_type=thing_type) assoc = LocationThingAssociation() assoc.location = location diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 77ab09b2..31c28db5 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -342,6 +342,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ) well_data["thing_type"] = "water well" well_data["nma_pk_welldata"] = row.WellID + well_data["nma_pk_location"] = row.LocationId well_data.pop("notes") well = Thing(**well_data) @@ -718,6 +719,7 @@ def _persist_well( ) well_data["thing_type"] = "water well" well_data["nma_pk_welldata"] = row.WellID + well_data["nma_pk_location"] = row.LocationId well_data.pop("notes", None) well = Thing(**well_data)