From 38595b4133dd6c5c47aa81898bff7c96a1d48e5f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 14:30:19 +0000
Subject: [PATCH 1/3] Initial plan


From f5db33c40457e986d2441d405679b1754c402862 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 14:35:15 +0000
Subject: [PATCH 2/3] Fix patient pipeline: stale column names, Python version,
 product ingestion skip

Co-authored-by: pmayd <9614291+pmayd@users.noreply.github.com>
---
 ...ript3_create_table_patient_data_changes_only.R |  3 +--
 scripts/R/run_pipeline.R                          | 15 ++++++++-------
 scripts/python/pyproject.toml                     |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/script3_create_table_patient_data_changes_only.R b/R/script3_create_table_patient_data_changes_only.R
index 92a2dcc..99300a3 100644
--- a/R/script3_create_table_patient_data_changes_only.R
+++ b/R/script3_create_table_patient_data_changes_only.R
@@ -43,12 +43,11 @@ create_table_longitudinal_data <-
                 "patient_id",
                 "sheet_name",
                 "status",
-                "support_from_a4d",
+                "support_level",
                 "testing_frequency",
                 "tracker_date",
                 "tracker_month",
                 "tracker_year",
-                "updated_2022_date",
                 "weight"
             )
 
diff --git a/scripts/R/run_pipeline.R b/scripts/R/run_pipeline.R
index 5c161da..cb879c5 100644
--- a/scripts/R/run_pipeline.R
+++ b/scripts/R/run_pipeline.R
@@ -109,13 +109,14 @@ ingest_data(
     table = "patient_data_hba1c",
     source = file.path(table_dir, "longitudinal_data_hba1c.parquet")
 )
-ingest_data(
-    project_id = config$project_id,
-    cluster_fields = "clinic_id,product_released_to,product_table_year,product_table_month",
-    dataset = config$dataset,
-    table = "product_data",
-    source = file.path(table_dir, "product_data.parquet")
-)
+# NOTE: product data ingestion is deliberately skipped until the product pipeline is finalized
+# ingest_data(
+#     project_id = config$project_id,
+#     cluster_fields = "clinic_id,product_released_to,product_table_year,product_table_month",
+#     dataset = config$dataset,
+#     table = "product_data",
+#     source = file.path(table_dir, "product_data.parquet")
+# )
 ingest_data(
     project_id = config$project_id,
     cluster_fields = "clinic_id",
diff --git a/scripts/python/pyproject.toml b/scripts/python/pyproject.toml
index a21275c..67b264f 100644
--- a/scripts/python/pyproject.toml
+++ b/scripts/python/pyproject.toml
@@ -7,7 +7,7 @@ readme = "README.md"
 package-mode = false
 
 [tool.poetry.dependencies]
-python = ">=3.10,<3.13"
+python = ">=3.10,<3.14"
 pandas = "^2.2.1"
 openpyxl = "^3.1.5"
 click = "^8.1.7"

From 72c7be9f01666838f3516cc9b87a67d34ae22733 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 14:43:04 +0000
Subject: [PATCH 3/3] Remove longitudinal data table creation and all related
 code

Co-authored-by: pmayd <9614291+pmayd@users.noreply.github.com>
---
 ...3_create_table_patient_data_changes_only.R | 89 -------------------
 scripts/R/run_pipeline.R                      |  7 --
 scripts/R/run_script_3_create_tables.R        | 42 ---------
 3 files changed, 138 deletions(-)
 delete mode 100644 R/script3_create_table_patient_data_changes_only.R

diff --git a/R/script3_create_table_patient_data_changes_only.R b/R/script3_create_table_patient_data_changes_only.R
deleted file mode 100644
index 99300a3..0000000
--- a/R/script3_create_table_patient_data_changes_only.R
+++ /dev/null
@@ -1,89 +0,0 @@
-#' @title Create CSV with longitudinal patient data for a single variable.
-#'
-#' @description
-#' Read in all cleaned patient data CSV and create a single data.frame.
-#' Group this data by id and take only the months when there is a change in the medical data.
-#'
-#'
-#' @param patient_data_files list of CSV files with cleaned patient data from step 2.
-#' @param input_root root directory of the input CSV files.
-#' @param output_root root directory of the output folder.
-#' @param variable name of the column that should be exported.
-#' @param name name used to create the export file name.
-create_table_longitudinal_data <-
-    function(patient_data_files,
-             input_root,
-             output_root,
-             variable,
-             name) {
-        dynamic_patient_columns <-
-            c(
-                "blood_pressure_dias_mmhg",
-                "blood_pressure_sys_mmhg",
-                "bmi",
-                "bmi_date",
-                "clinic_id",
-                "fbg_updated_date",
-                "fbg_updated_mg",
-                "fbg_updated_mmol",
-                "file_name",
-                "hba1c_updated",
-                "hba1c_updated_exceeds",
-                "hba1c_updated_date",
-                "height",
-                "hospitalisation_cause",
-                "hospitalisation_date",
-                "insulin_regimen",
-                "insulin_type",
-                "insulin_subtype",
-                "last_clinic_visit_date",
-                "last_remote_followup_date",
-                "observations",
-                "observations_category",
-                "patient_id",
-                "sheet_name",
-                "status",
-                "support_level",
-                "testing_frequency",
-                "tracker_date",
-                "tracker_month",
-                "tracker_year",
-                "weight"
-            )
-
-        patient_data <- read_cleaned_patient_data(input_root, patient_data_files) %>%
-            dplyr::select(tidyselect::all_of(dynamic_patient_columns))
-
-        # get latest static patient data overall
-        variable_lag <- paste0(variable, "_lag")
-        longitudinal_data <- patient_data %>%
-            tidyr::drop_na(!!variable) %>%
-            dplyr::filter(get(variable) != ERROR_VAL_NUMERIC) %>%
-            dplyr::group_by(patient_id) %>%
-            dplyr::arrange(tracker_year, tracker_month) %>%
-            dplyr::filter(
-                get(variable) != tidyr::replace_na(
-                    dplyr::lag(get(variable), default = NULL),
-                    ERROR_VAL_NUMERIC
-                )
-            ) %>%
-            dplyr::ungroup() %>%
-            dplyr::arrange(patient_id, tracker_year, tracker_month)
-
-        logInfo(
-            log_to_json(
-                message = "longitudinal_data dim: {values['dim']}.",
-                values = list(dim = dim(longitudinal_data)),
-                script = "script3",
-                file = "create_table_patient_data_changes_only.log",
-                functionName = "create_table_longitudinal_data"
-            )
-        )
-
-        export_data_as_parquet(
-            data = longitudinal_data,
-            filename = paste0("longitudinal_data_", name),
-            output_root = output_root,
-            suffix = ""
-        )
-    }
diff --git a/scripts/R/run_pipeline.R b/scripts/R/run_pipeline.R
index cb879c5..d81a906 100644
--- a/scripts/R/run_pipeline.R
+++ b/scripts/R/run_pipeline.R
@@ -102,13 +102,6 @@ ingest_data(
     table = "patient_data_static",
     source = file.path(table_dir, "patient_data_static.parquet")
 )
-ingest_data(
-    project_id = config$project_id,
-    cluster_fields = "clinic_id,patient_id,tracker_date",
-    dataset = config$dataset,
-    table = "patient_data_hba1c",
-    source = file.path(table_dir, "longitudinal_data_hba1c.parquet")
-)
 # NOTE: product data ingestion is deliberately skipped until the product pipeline is finalized
 # ingest_data(
 #     project_id = config$project_id,
diff --git a/scripts/R/run_script_3_create_tables.R b/scripts/R/run_script_3_create_tables.R
index 8a27014..9b86568 100644
--- a/scripts/R/run_script_3_create_tables.R
+++ b/scripts/R/run_script_3_create_tables.R
@@ -100,48 +100,6 @@ main <- function() {
         output_root = paths$output_root
     )
 
-    logfile <- "table_longitudinal_data_hba1c"
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    create_table_longitudinal_data(
-                        patient_data_files,
-                        file.path(paths$output_root, "patient_data_cleaned"),
-                        paths$tables,
-                        "hba1c_updated",
-                        "hba1c"
-                    )
-                },
-                error = function(e) {
-                    logError(
-                        log_to_json(
-                            "Could not create table for longitudinal patient data. Error = {values['e']}.",
-                            values = list(e = e$message),
-                            script = "script3",
-                            file = "run_script_3_create_tables.R",
-                            errorCode = "critical_abort",
-                            functionName = "create_table_longitudinal_data"
-                        )
-                    )
-                },
-                warning = function(w) {
-                    logWarn(
-                        log_to_json(
-                            "Could not create table for longitudinal patient data. Warning = {values['w']}.",
-                            values = list(w = w$message),
-                            script = "script3",
-                            file = "run_script_3_create_tables.R",
-                            warningCode = "critical_abort",
-                            functionName = "create_table_longitudinal_data"
-                        )
-                    )
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
-
     logfile <- "table_patient_data_annual"
     with_file_logger(logfile,
         {