feat: optimize well transfer process by removing redundant checks and enhancing logging

jirhiker · jirhiker · commit ac0357380b37 · 2025-12-04T18:46:06.000-07:00
diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py
@@ -38,11 +38,6 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
 
     # 4. Process each row
     for i, row in enumerate(cleaned_df.itertuples()):
-        # check if limit is reached
-        if limit and i >= limit:
-            logger.info(f"Reached limit of {limit} rows. Stopping migration.")
-            break
-
         # Log progress every 'step' rows
         if i and not i % step:
             logger.info(
@@ -67,18 +62,18 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
             continue
 
         # Check if this formation already exists
-        existing = (
-            session.query(GeologicFormation)
-            .filter(GeologicFormation.formation_code == formation_code)
-            .first()
-        )
-
-        if existing:
-            logger.info(
-                f"Skipping row {i}: Formation code {formation_code} already exists"
-            )
-            skipped_count += 1
-            continue
+        # existing = (
+        #     session.query(GeologicFormation)
+        #     .filter(GeologicFormation.formation_code == formation_code)
+        #     .first()
+        # )
+        #
+        # if existing:
+        #     logger.info(
+        #         f"Skipping row {i}: Formation code {formation_code} already exists"
+        #     )
+        #     skipped_count += 1
+        #     continue
 
         # 6. Prepare data for creation
         # Note: We only store the formation_code. Formation names will be mapped by the API using a
diff --git a/transfers/util.py b/transfers/util.py
@@ -382,7 +382,7 @@ def convert_mt_to_utc(dt_record: datetime) -> datetime:
     return dt_record
 
 
-def chunk_by_size(df: pd.DataFrame | list, chunk_size: int) -> pd.DataFrame:
+def chunk_by_size(df: pd.DataFrame | list, chunk_size: int = 100) -> pd.DataFrame:
     if isinstance(df, list):
         df = pd.DataFrame(df)
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py