Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions products/cscl/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ models:
+materialized: table
etl_dev_qa:
+materialized: table
log_files:
+materialized: table

on-run-start:
- '{{ create_pg_functions() }}'
Expand Down
12 changes: 10 additions & 2 deletions products/cscl/design_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,16 @@ Also, protosegments may be reversed as denoted by the field FROM_TO_INDICATOR eq

See model `int__protosegments` for implementation.

### Error reporting
This is a stub to be filled out when requirements for error/warning logging are finalized
#### Error reports

Errors are logged for the following conditions
- If a preferred B7SC is not found in StreetName or in FeatureName. This is reflected/measured by segments missing face codes (`log__lion_segments_missing_facecode`)
- If a segment has an endpoint that is not joined to a Node (`log__lion_segments_missing_nodes`)
- If a segment does not join to an Atomic Polygon on either side (`log__lion_segments_missing_aps`)
- If a centerline or protosegment has a SEGLOCSTATUS in its source table that differs from its calculated Segment Locational Status (`log__lion_centerline_or_proto_seglocstatus_mismatch`)
- If a segment does not have a joined Atomic Polygon sharing the same borough code as the segment (`log__lion_segments_ap_boro_mismatch`)
- If a segment does not join to a NYPD BEAT polygon (`log__lion_segments_missing_nypd`)
- If a protosegment does not share a Segment ID with a geometry-modeled segment (`log__lion_protosegment_orphans`)


## Special Address Files (SAF)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ SELECT
co.globalid,
co.lionkey,
co.segmentid,
left(left_beat.post, 1) AS left_nypd_service_area,
left(right_beat.post, 1) AS right_nypd_service_area
left_beat.sector AS left_nypd_sector,
left_beat.geo_type AS left_beat_geo_type,
CASE WHEN left_beat.geo_type = 'HP' THEN left(left_beat.post, 1) END AS left_nypd_service_area,
right_beat.sector AS right_nypd_sector,
right_beat.geo_type AS right_beat_geo_type,
CASE WHEN right_beat.geo_type = 'HP' THEN left(left_beat.post, 1) END AS right_nypd_service_area
FROM segment_offsets AS co
-- using a cte around reference can confus the postgres compiler to not use index
LEFT JOIN {{ ref("stg__nypdbeat") }} AS left_beat
ON
st_within(co.left_offset_point, left_beat.geom)
AND left_beat.geo_type = 'HP'
ON st_within(co.left_offset_point, left_beat.geom)
LEFT JOIN {{ ref("stg__nypdbeat") }} AS right_beat
ON
st_within(co.right_offset_point, right_beat.geom)
AND right_beat.geo_type = 'HP'
ON st_within(co.right_offset_point, right_beat.geom)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['segmentid']},
{'columns': ['globalid']},
{'columns': ['nodeid']},
]
) }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,3 @@ segment_attributes AS (
)

SELECT * FROM segment_attributes
WHERE face_code IS NOT NULL -- TODO error report for this and maybe refactor to get this in a more logical place
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ feature_type_codes AS (
)
SELECT
proto.borough AS boroughcode,
facecode.face_code, -- TODO error report when null
facecode.face_code,
CASE
WHEN feature_type_codes.source_feature_class <> 'nonstreetfeatures' THEN proto.alt_segment_seqnum
ELSE seqnum.segment_seqnum
Expand Down Expand Up @@ -69,8 +69,7 @@ SELECT
proto.source_table,
proto.globalid
FROM proto
INNER JOIN primary_segments ON proto.segmentid = primary_segments.segmentid -- TODO error report for non-matches
LEFT JOIN primary_segments ON proto.segmentid = primary_segments.segmentid
LEFT JOIN facecode ON proto.b7sc = facecode.b7sc
LEFT JOIN seqnum ON proto.globalid = seqnum.globalid
LEFT JOIN feature_type_codes ON proto.feature_type_code IS NOT DISTINCT FROM feature_type_codes.code -- NULL -> centerline
WHERE facecode.face_code IS NOT NULL -- TODO - clean up in #2073
2 changes: 2 additions & 0 deletions products/cscl/models/intermediate/segments/int__segments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ WITH segments AS (
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__protosegments") }}
WHERE geom IS NOT NULL -- proxy for joined to a segment
)
SELECT
CONCAT(boroughcode, face_code, segment_seqnum) AS lionkey,
*
FROM segments
WHERE face_code IS NOT NULL
ORDER BY lionkey
16 changes: 16 additions & 0 deletions products/cscl/models/log_files/log.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- this could be done with dbt_utils.get_relations_by_pattern
-- and dbt_utils.union_relations
-- if we either remove dash from db names
-- or if dbt fixes a bug

{{ dbt_utils.union_relations(relations=[
ref("log__lion_centerline_boro_mismatch"),
ref("log__lion_centerline_or_proto_seglocstatus_mismatch"),
ref("log__lion_protosegment_orphans"),
ref("log__lion_segment_lgc_count"),
ref("log__lion_segments_ap_boro_mismatch"),
ref("log__lion_segments_missing_nypd"),
ref("log__lion_segments_missing_aps"),
ref("log__lion_segments_missing_facecode"),
ref("log__lion_segments_missing_nodes"),
]) }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
WITH lion AS (
SELECT * FROM {{ ref("int__lion") }}
),
centerline AS (
SELECT
segmentid,
boroughcode
FROM {{ ref("stg__centerline") }}
),
proto AS (
SELECT * FROM {{ ref("stg__altsegmentdata_proto") }}
),
lion_joined AS (
SELECT
lion.lionkey,
lion.globalid,
lion.segmentid,
lion.boroughcode,
CASE
WHEN lion.source_table = 'centerline' THEN centerline.boroughcode
WHEN lion.source_table = 'altsegmentdata' THEN proto.borough
END AS source_boroughcode,
lion.source_table
FROM lion
LEFT JOIN centerline ON lion.source_table = 'centerline' AND lion.segmentid = centerline.segmentid
LEFT JOIN proto ON lion.source_table = 'altsegmentdata' AND lion.globalid = proto.globalid
WHERE lion.source_table IN ('centerline', 'altsegmentdata')
)
SELECT
'error' AS log_level,
'borough mismatch' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
lion_joined.segmentid AS record_id,
FORMAT(
'The borough for the %s feature with an OID = "%s" (Borough = %s) does not '
|| 'match the borough currently being extracted (Borough = %s).',
source_table,
globalid,
source_boroughcode,
lion_joined.boroughcode
) AS message
FROM lion_joined
INNER JOIN centerline ON lion_joined.segmentid = centerline.segmentid
WHERE centerline.boroughcode IS DISTINCT FROM lion_joined.boroughcode
19 changes: 19 additions & 0 deletions products/cscl/models/log_files/log__lion_centerline_curve.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
SELECT
'warning' AS log_level,
'center of curvature' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'An invalid center of curvature was calculated for the %s feature with an OID = %s. '
|| 'Calculated X = %s, Calculated Y = %s.',
source_table,
globalid,
center_of_curvature_x,
center_of_curvature_y
) AS message
FROM {{ ref('int__lion') }}
WHERE
center_of_curvature_x NOT BETWEEN 0 AND 9999999
OR center_of_curvature_y NOT BETWEEN 0 AND 9999999
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
WITH lion AS (
SELECT * FROM {{ ref("int__lion") }}
),
centerline AS (
SELECT * FROM {{ ref("stg__centerline") }}
),
proto AS (
SELECT * FROM {{ ref("stg__altsegmentdata_proto") }}
),
joined AS (
SELECT
lion.lionkey,
lion.globalid,
lion.segmentid,
lion.segment_locational_status,
CASE
WHEN lion.source_table = 'centerline' THEN centerline.seglocstatus
WHEN lion.source_table = 'altsegmentdata' THEN proto.seglocstatus
END AS source_segment_locational_status,
lion.source_table
FROM lion
LEFT JOIN centerline ON lion.source_table = 'centerline' AND lion.segmentid = centerline.segmentid
LEFT JOIN proto ON lion.source_table = 'altsegmentdata' AND lion.globalid = proto.globalid
WHERE lion.source_table IN ('centerline', 'altsegmentdata')
)
SELECT
'error' AS log_level,
'seglocstatus mismatch' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'The %s feature with an OID = %s has a SEGLOCSTATUS value that is different than '
|| 'what was found using the LION ETL rules. Feature = ''{2}'', ETL Rules = ''{3}''.',
source_table,
globalid,
source_segment_locational_status,
segment_locational_status
) AS message
FROM joined
WHERE segment_locational_status IS DISTINCT FROM source_segment_locational_status
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SELECT
'warning' AS log_level,
'Non-street feature invalid line type' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'LineType appears to be set incorrectly for NonStreetFeature with an OID = ''%s''. '
|| 'The line type value is ''%s''.',
globalid,
linetype
) AS message
FROM {{ ref('stg__nonstreetfeatures') }}
WHERE linetype NOT BETWEEN 1 AND 6
14 changes: 14 additions & 0 deletions products/cscl/models/log_files/log__lion_protosegment_orphans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT
'error' AS log_level,
'protosegment without geometry-modeled segment' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'Protosegment with globalid "%s" and segmentid "%s" has no corresponding geometry-modeled segment.',
globalid,
segmentid::INT
) AS message
FROM {{ ref('int__protosegments') }}
WHERE geom IS NULL
32 changes: 32 additions & 0 deletions products/cscl/models/log_files/log__lion_segment_lgc_count.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
WITH lgc AS (
SELECT * FROM {{ ref("int__lgc") }}
),
segments AS (
SELECT * FROM {{ ref("int__primary_segments") }}
),
max_lgc AS (
SELECT
segmentid,
MAX(lgc_rank) AS lgc_count
FROM lgc
WHERE lgc_rank > 9
GROUP BY segmentid
)
SELECT
'error' AS log_level,
'more than 9 lgcs for a given segmentid' AS error_category,
segments.globalid,
segments.source_table AS source_feature_layer,
'segmentid' AS record_id_type,
max_lgc.segmentid AS record_id,
FORMAT(
'[LGC DATA ERROR] More than 9 LGC values were found for the "%s" '
|| 'feature with an OID = "%s" and segment id "%s". '
|| '"%s" values were found for the record.',
source_table,
globalid,
max_lgc.segmentid::INT,
lgc_count
) AS message
FROM max_lgc
INNER JOIN segments ON max_lgc.segmentid = segments.segmentid
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
WITH segments AS (
SELECT * FROM {{ ref("int__segments") }}
),
aps AS (
SELECT * FROM {{ ref("int__segment_atomicpolygons") }}
)
SELECT
'error' AS log_level,
'neither joined atomic polygon matches segment''s borocode' AS error_category,
segments.globalid,
segments.source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segments.segmentid AS record_id,
FORMAT(
'Neither of the AtomicPolygon borough matches the segment borough for '
|| 'the %s feature with an OID = %s. Segment has boro ''%s'', '
|| 'left atomic polygon with atomicid ''%s'' has boro ''%s'', '
|| 'and right atomic polygon with atomicid ''%s'' has boro ''%s''.',
segments.source_table,
segments.globalid,
aps.segment_borocode,
aps.left_atomicid,
aps.left_borocode,
aps.right_atomicid,
aps.right_borocode
) AS message
FROM aps
INNER JOIN segments ON aps.globalid = segments.globalid
WHERE
left_borocode IS DISTINCT FROM segment_borocode
AND right_borocode IS DISTINCT FROM segment_borocode
AND (left_borocode IS NOT NULL OR right_borocode IS NOT NULL) -- to not duplicate rows from other test
10 changes: 10 additions & 0 deletions products/cscl/models/log_files/log__lion_segments_missing_aps.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
SELECT
'error' AS log_level,
'segment joined to no atomic polygon' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM {{ ref('int__lion') }}
WHERE left_atomicid IS NULL AND right_atomicid IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
WITH segments AS (
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__primary_segments") }}
UNION ALL
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__protosegments") }}
)
SELECT DISTINCT
'error' AS log_level,
'segment missing facecode' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM segments
WHERE face_code IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
SELECT
'error' AS log_level,
'segment missing node' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM {{ ref('int__lion') }}
WHERE from_nodeid IS NULL OR to_nodeid IS NULL
17 changes: 17 additions & 0 deletions products/cscl/models/log_files/log__lion_segments_missing_nypd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
WITH segments AS (
SELECT * FROM {{ ref('int__segments') }}
),
nypd AS (
SELECT * FROM {{ ref('int__segment_nypdbeat') }}
)
SELECT
'error' AS log_level,
'segment joined to no nypd beat' AS error_category,
segments.globalid,
segments.source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segments.segmentid AS record_id,
'' AS message
FROM segments
INNER JOIN nypd ON segments.lionkey = nypd.lionkey
WHERE nypd.left_nypd_sector IS NULL AND nypd.right_nypd_sector IS NULL
2 changes: 2 additions & 0 deletions products/cscl/poc_validation/prod_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class OutputDataset:
assert recipe.exports

for export in recipe.exports.datasets:
if export.name == "log":
continue
formatting = (export.custom or {}).get("formatting")
assert export.filename, "filename is required for export datasets"
assert export.format.value in ["dat", "csv"], "unsupported file format"
Expand Down
5 changes: 5 additions & 0 deletions products/cscl/recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,8 @@ exports:
filename: RPL.txt
format: dat
custom: { formatting: rpl }

# Log file
- name: log
filename: log.csv
format: csv
Loading