From 52bf591bab66d35799aa2f67cf588e1f855176bb Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 4 Mar 2026 14:11:37 -0500 Subject: [PATCH 1/4] make transitzone sql re-runnable --- products/pluto/pluto_build/sql/transitzone.sql | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/products/pluto/pluto_build/sql/transitzone.sql b/products/pluto/pluto_build/sql/transitzone.sql index a7cced1531..65fe8bec70 100644 --- a/products/pluto/pluto_build/sql/transitzone.sql +++ b/products/pluto/pluto_build/sql/transitzone.sql @@ -30,7 +30,7 @@ -- lot-by-lot calculation when block-level assignment would be misleading. -- Create decomposed transit zones table (break multipolygons into individual parts) -DROP TABLE IF EXISTS transit_zones_atomic_geoms; +DROP TABLE IF EXISTS transit_zones_atomic_geoms CASCADE; CREATE TABLE transit_zones_atomic_geoms AS WITH decomposed AS ( SELECT @@ -52,7 +52,7 @@ CREATE INDEX idx_transit_zones_atomic_geoms_gix ON transit_zones_atomic_geoms US -- AR Note: I tried a few approaches for this, and perhaps there's a more clever/performant -- way to accomplish this. Unfortunately, the recommend approach of ST_ClusterDBSCAN -- will `sometimes` accomplish this, but it errors out seemingly randomly. -DROP TABLE IF EXISTS transit_zones_tax_blocks; +DROP TABLE IF EXISTS transit_zones_tax_blocks CASCADE; CREATE TABLE transit_zones_tax_blocks AS WITH block_unions AS ( SELECT @@ -104,7 +104,7 @@ CREATE INDEX idx_transit_zones_tax_blocks_geom ON transit_zones_tax_blocks USING -- Step 1: Calculate coverage percentages for all tax blocks -DROP TABLE IF EXISTS transit_zones_block_to_tz_ranked; +DROP TABLE IF EXISTS transit_zones_block_to_tz_ranked CASCADE; CREATE TABLE transit_zones_block_to_tz_ranked AS WITH block_to_tz AS ( SELECT @@ -140,7 +140,7 @@ ANALYZE transit_zones_block_to_tz_ranked; -- For ambiguous blocks (those with competing transit zones), create lot-level assignments -DROP TABLE IF EXISTS transit_zones_bbl_to_tz_ranked; +DROP TABLE IF EXISTS transit_zones_bbl_to_tz_ranked CASCADE; CREATE TABLE transit_zones_bbl_to_tz_ranked AS WITH ambiguous_bbls AS ( SELECT @@ -225,9 +225,7 @@ FROM ( AND ambiguous.block = block_tz.block AND ambiguous.tz_rank = 2 ) - UNION ALL - -- Lot-level assignments for ambiguous blocks SELECT bbls[1] AS bbl, From b26e55aaa78e6b3201664cc0f8a66cc51b5ab01e Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 4 Mar 2026 14:11:53 -0500 Subject: [PATCH 2/4] fix bug in transitzone assignment This was making it so that when blocks had multiple transit zones, and one of them was between 0-10% covered, then the bbl wouldn't be assigned to a tz at all. --- products/pluto/pluto_build/sql/transitzone.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/products/pluto/pluto_build/sql/transitzone.sql b/products/pluto/pluto_build/sql/transitzone.sql index 65fe8bec70..696bac3251 100644 --- a/products/pluto/pluto_build/sql/transitzone.sql +++ b/products/pluto/pluto_build/sql/transitzone.sql @@ -221,9 +221,9 @@ FROM ( AND NOT EXISTS ( SELECT 1 FROM transit_zones_block_to_tz_ranked AS ambiguous WHERE - ambiguous.borough = block_tz.borough - AND ambiguous.block = block_tz.block + ambiguous.id = block_tz.id AND ambiguous.tz_rank = 2 + AND ambiguous.pct_covered > 10 ) UNION ALL -- Lot-level assignments for ambiguous blocks From 5d1940f57e9cbede487395cffdfb9616b0d3ffef Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 4 Mar 2026 15:12:44 -0500 Subject: [PATCH 3/4] use point_on_surface, rather than centroid for linking back to blocks after a split --- products/pluto/pluto_build/sql/transitzone.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/pluto/pluto_build/sql/transitzone.sql b/products/pluto/pluto_build/sql/transitzone.sql index 696bac3251..e8ba4b225e 100644 --- a/products/pluto/pluto_build/sql/transitzone.sql +++ b/products/pluto/pluto_build/sql/transitzone.sql @@ -89,7 +89,7 @@ WITH block_unions AS ( ON np.borough = p.borough AND np.block = p.block - AND ST_WITHIN(ST_CENTROID(p.geom), np.geom) + AND ST_WITHIN(ST_POINTONSURFACE(p.geom), np.geom) GROUP BY np.borough, np.block, np.sub_block, np.geom ) SELECT From 7dd60344204fa125ad4cb70867f4ae84b7144aaf Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 4 Mar 2026 16:02:39 -0500 Subject: [PATCH 4/4] Convert MIH Option checks to accepted_values --- products/pluto/models/_sources.yml | 12 +++++++ .../tests/assert_only_valid_mih_options.sql | 36 ------------------- 2 files changed, 12 insertions(+), 36 deletions(-) delete mode 100644 products/pluto/tests/assert_only_valid_mih_options.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index 7f855fd1d3..355ae04184 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -67,6 +67,18 @@ sources: description: Spatial overlaps between lots and MIH areas (from miharea.sql) - name: mih_distinct_options description: QAQC view of all distinct MIH options found in the data (from miharea.sql) + columns: + - name: option + description: MIH option name + data_tests: + - accepted_values: + values: ['Option 1', 'Option 2', 'Option 3', 'Deep Affordability Option', 'Workforce Option'] + config: + severity: error + tags: ['de_check', 'minor', 'major'] + meta: + description: Only the five valid MIH options should appear in the source data. Any additional options indicate a source data issue that needs to be investigated. + next_steps: Contact GIS to investigate unexpected MIH option values in source data - name: transit_zones_block_to_tz_ranked description: Ranked transit zone assignments by block (from transitzone.sql) - name: transit_zones_bbl_to_tz_ranked diff --git a/products/pluto/tests/assert_only_valid_mih_options.sql b/products/pluto/tests/assert_only_valid_mih_options.sql deleted file mode 100644 index f62e6c3483..0000000000 --- a/products/pluto/tests/assert_only_valid_mih_options.sql +++ /dev/null @@ -1,36 +0,0 @@ -{{ - config( - tags = ['de_check', 'minor', 'major'], - meta = { - 'description': ''' - This test checks that only the four valid MIH options appear in the source data: - - Option 1 - - Option 2 - - Deep Affordability Option - - Workforce Option - - Any additional options indicate a source data issue that needs to be investigated. - ''', - 'next_steps': 'Contact GIS to investigate unexpected MIH option values in source data' - } - ) -}} - -WITH valid_options AS ( - SELECT option FROM (VALUES - ('Option 1'), - ('Option 2'), - ('Option 3'), - ('Deep Affordability Option'), - ('Workforce Option') - ) AS t(option) -), - -actual_options AS ( - SELECT * FROM {{ source('build_sources', 'mih_distinct_options') }} -) - --- Return any options that are NOT in the valid list (test fails if any rows returned) -SELECT option -FROM actual_options -WHERE option NOT IN (SELECT option FROM valid_options)