NYCPlanning · damonmcc · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025 · damonmcc
diff --git a/.github/workflows/ceqr_schools.yml b/.github/workflows/ceqr_schools.yml
@@ -0,0 +1,46 @@
+name: CEQR - Schools
+on:
+  workflow_dispatch:
+    inputs:
+      dataset:
+        description: "Dataset to build"
+        type: choice
+        required: true
+        options:
+        - sca_capacity_projects
+        - sca_e_projections_by_boro
+        - sca_e_projections_by_sd
+        - ceqr_school_buildings
+
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    defaults:
+      run:
+        shell: bash
+        working-directory: products/ceqr/ceqr_app
+    container:
+      image: nycplanning/build-geosupport:${{ inputs.image_tag || 'latest' }}
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Load Secrets
+      uses: 1password/load-secrets-action@v1
+      with:
+        export-env: true
+      env:
+        OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
+        BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url"
+        AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT"
+        AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY"
+        AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID"
+
+    - name: Setup build environment
+      working-directory: ./
+      run: ./bash/docker_container_setup.sh
+
+    - name: Run recipe
+      run: |
+        export RECIPE_EGNINE=$BUILD_ENGINE_SERVER/recipe
+        export EDM_DATA=$BUILD_ENGINE_SERVER/defaultdb
+        ./ceqr run recipe ${{ inputs.dataset }}
diff --git a/dcpy/library/archive.py b/dcpy/library/archive.py
@@ -34,7 +34,7 @@ def __call__(
         Parameters
         ----------
         path: path to the configutation file
-        output_format: currently supported formats: `'csv'`, `'geojson'`, `'shapefile'`, `'postgres'`
+        output_format: see ingest.Ingestor translator methods for currently supported formats`
         push: if `True` then push to s3
         clean: if `True`, the temporary files created under `.library` will be removed
         latest: if `True` then tag this current version we are processing to be the `latest`

diff --git a/dcpy/library/cli.py b/dcpy/library/cli.py
@@ -19,7 +19,7 @@
 @app.command()
 def archive(
     path: str = typer.Option(None, "--path", "-f", help="Path to config yml"),
-    output_formats: list[str] = typer.Option(["pgdump", "parquet", "csv"], "--output-format", "-o", help="csv, geojson, shapefile, pgdump and parquet"),
+    output_formats: list[str] = typer.Option(["pgdump", "parquet", "csv", "shapefile", "postgres"], "--output-format", "-o", help="csv, geojson, shapefile, pgdump and parquet"),
     push: bool = typer.Option(False, "--s3", "-s", help="Push to s3"),
     clean: bool = typer.Option(False, "--clean", "-c", help="Remove temporary files"),
     latest: bool = typer.Option(False, "--latest", "-l", help="Tag with latest"),

diff --git a/dcpy/library/ingest.py b/dcpy/library/ingest.py
@@ -65,7 +65,7 @@ def format_field_names(
     else:
         geom_clause = ""
     query = f"""SELECT\n\t{select}{geom_clause}\nFROM {layer_name}"""
-    print(query)
+    print(f"Formatting field names in layer '{layer_name}' using SQL query:\n{query}")
     if not sql:
         return query
     else:
@@ -191,6 +191,7 @@ def wrapper(self: Ingestor, *args, **kwargs) -> tuple[list[str], library.Config]
             layerName = dataset.name
 
         # Initiate vector translate
+        print("Initiating vector translate ...")
         with Progress(
             SpinnerColumn(spinner_name="earth"),
             TextColumn("[progress.description]{task.description}"),

diff --git a/dcpy/library/templates/doe_lcgms.yml b/dcpy/library/templates/doe_lcgms.yml
@@ -0,0 +1,32 @@
+dataset:
+  name: doe_lcgms
+  acl: public-read
+  source:
+    url:
+      path: s3://edm-recipes/inbox/sca/{{ version }}/doe_lcgms.csv
+    options:
+      - AUTODETECT_TYPE=NO
+      - EMPTY_STRING_AS_NULL=YES
+    geometry:
+      SRS: null
+      type: NONE
+
+  destination:
+    geometry:
+      SRS: null
+      type: NONE
+    options:
+      - OVERWRITE=YES
+      - PRECISION=NO
+    fields: []
+    sql: null
+
+  info:
+    description: |
+      Provided by DCP Capital Planning team as an excel file
+      with a name like "LCGMS_SchoolData".
+
+      This is only needed for the legacy CEQR schools dataset ceqr_school_buildings
+      and is different from the doe_lcgms ingest source data used in FacDB.
+    url: ""
+    dependents: []
diff --git a/dcpy/library/templates/sca_bluebook.yml b/dcpy/library/templates/sca_bluebook.yml
@@ -24,6 +24,9 @@ dataset:
   info:
     description: |
       ### NYC School Construction Authority - Capacity Projects in Progress
-      Provided by DCP Capital Planning team as an excel file. This is the SCA's “Enrollment, Capacity, Utilization Report,” known as the “Blue Book”.
+      Provided by DCP Capital Planning team as an excel file
+      with a name like "20XX - 20XX Blue Book" and a sheet name like "XX-XX by Org".
+
+      This is the SCA's “Enrollment, Capacity, Utilization Report,” known as the “Blue Book”.
     url: ""
     dependents: []
diff --git a/dcpy/library/templates/sca_capacity_projects_current.yml b/dcpy/library/templates/sca_capacity_projects_current.yml
@@ -24,6 +24,7 @@ dataset:
   info:
     description: |
       ### NYC School Construction Authority - Capacity Projects in Progress
-      Provided by DCP Capital Planning team as an excel file.
+      Provided by DCP Capital Planning team as an excel file
+      with a name like "Section 6 Capacity Projects in Process".
     url: ""
     dependents: []
diff --git a/dcpy/library/templates/sca_e_pct.yml b/dcpy/library/templates/sca_e_pct.yml
@@ -24,6 +24,7 @@ dataset:
   info:
     description: |
       ### NYC School Construction Authority - Enrollment Percentages by Zone
-      Provided by DCP Capital Planning team as an excel file.
+      Provided by DCP Capital Planning team as an excel file
+      with a name like "20XX ENROLLMENT _ by Zone".
     url: ""
     dependents: []
diff --git a/dcpy/library/templates/sca_e_projections.yml b/dcpy/library/templates/sca_e_projections.yml
@@ -24,6 +24,7 @@ dataset:
   info:
     description: |
       ### NYC School Construction Authority - Enrollment Projections by Grade
-      Provided by DCP Capital Planning team as an excel file.
+      Provided by DCP Capital Planning team as an excel file
+      with a name like "20XX-20XX Enrollment Projection By Grade".
     url: ""
     dependents: []
diff --git a/ingest_templates/doe_lcgms.yml b/ingest_templates/doe_lcgms.yml
@@ -18,8 +18,9 @@ attributes:
 
 ingestion:
   source:
-    type: local_file
-    path: ./LCGMS_SchoolData.xls
+    type: s3
+    bucket: edm-recipes
+    key: inbox/doe/{{ version }}/LCGMS_SchoolData.xls
   file_format:
     type: html
     kwargs:

diff --git a/products/ceqr/ceqr_app/README.md b/products/ceqr/ceqr_app/README.md
@@ -28,6 +28,43 @@ This then gets passed to the EDM production database using `create.sql`, where f
 
 ## Build instructions
 
+> [!IMPORTANT]
+> This codebase is currently only used to build CEQR Schools datasets which are distributed to the Capital Planning and Support (CAPS) team and used in DCP's Schools Model excel workbook. This section is focused on buildings those datasets.
+
+All source data comes from the CAPS team and must be archived using `library` with the output as postgres. For example:
+
+```bash
+library archive --name sca_capacity_projects_current --version 20251120 --latest --output-format postgres --postgres-url $RECIPE_ENGINE
+```
+
+These are the four CEQR school datasets and their source data. See each source dataset's `library` template for details.
+
+`sca_capacity_projects`
+
+- `sca_capacity_projects_current`
+
+`sca_e_projections_by_boro`
+
+- `sca_e_projections`
+
+`sca_e_projections_by_sd`
+
+- `sca_e_pct`
+- `sca_e_projections`
+
+`ceqr_school_buildings`
+
+- `doe_lcgms`
+- `sca_bluebook`
+
+Outputs must be distributed to S3 file storage at `edm-publishing/ceqr-app-data-staging/`. Each dataset has it's own folder and all versions in them. Versions are based on the day the build was run and the `latest` folder has the latest version.
+
+### Diagram of legacy CEQR app data flow
+
+![Diagram of legacy CEQR app data flow](/docs/diagrams/dataflow_ceqr.drawio.png)
+
+## DEPRECATED BUILD NOTES
+
 ### To build using github (NYCPlanning Members Only)
 
 Running a recipe using github actions is easy! Simply open an