From 305ce5458a941f2e954691d4cf780c386113fb92 Mon Sep 17 00:00:00 2001 From: malice Date: Mon, 8 Jun 2026 00:30:13 -0600 Subject: [PATCH] v0.10.7: fix NWS SAME state-FIPS parse + 5-digit ANSI county form (#95) SAME format (FCC standard) is PSSCCC: P=area-type indicator, SS=state FIPS, CCC=county FIPS. Pre-v0.10.7 _build_regions read code[:2] (PS) as the state, so the leading P=0 collapsed every SAME code to state 01 = Alabama. The dumped CAP envelope from CENTRAL_WX#968 showed Bannock County Idaho alerts flagged as US-AL-FIPS016005 + primary_region=US-AL-FIPS016005, which then routed to central.wx.alert.us.al.county.fips016005 instead of central.wx.alert.us.id.county.fips16005. Fix: - Slice code[1:3] for the state FIPS, code[1:] for the 5-digit ANSI county FIPS (SSCCC -- standard interoperable form). Drops the P padding from emitted region/subject; P stays preserved verbatim in data.geocode.SAME for any power user that needs it. - Length guard tightened: ==6 + isdigit + isinstance str (was >= 2). Now malformed entries (too short, too long, non-digit, None) are silently skipped with no crash. - Deleted dead _extract_states_from_codes (defined but never called; same bug, removed rather than fixed). Tests: - New TestSameStateParse parametrized over 4 distinct-state cases per spec: 016005 -> US-ID-FIPS16005 (Bannock area), 001005 -> US-AL-FIPS01005 (Autauga area), 056005 -> US-WY-FIPS56005 (Carbon area), 049005 -> US-UT-FIPS49005 (Cache UT). - Area-subset (P>=1) and unknown-state-FIPS coverage. - Malformed-input parametrize: empty, too short (2 forms), too long (7 digits), non-digit char, all-alpha, None -- each silently skipped. - Existing SAMPLE_FEATURE_* fixtures updated from constructed-to-match-bug values (160001/410051/060037/530033) to proper 0SSCCC format (016001/041051/006037/053033); existing TestBuildRegions assertions updated to expect 5-digit ANSI form. Followup ticket (NOT v0.10.7 scope, recorded in PR body): (a) Null-geometry alerts with valid Idaho UGC zones are silently dropped by _geometry_intersects_region (line 297-298): needs UGC-fallback or geometry-or-UGC check. NWS issues many Special Weather Statements without GeoJSON polygons but with UGC IDZ* zones that should pass. (b) Configured monitoring bbox north=44.5 only covers the southern third of Idaho; Idaho extends to 49.0N, so Coeur d'Alene / Lewiston / etc. are out of scope. Verify whether the narrow bbox was an intentional dev limit or accidental. Co-authored-by: Claude Opus 4.7 (1M context) --- src/central/adapters/nws.py | 45 +++++++------- tests/test_nws_normalization.py | 103 ++++++++++++++++++++++---------- 2 files changed, 93 insertions(+), 55 deletions(-) diff --git a/src/central/adapters/nws.py b/src/central/adapters/nws.py index 8fa11c5..169a4e1 100644 --- a/src/central/adapters/nws.py +++ b/src/central/adapters/nws.py @@ -147,35 +147,34 @@ def _compute_bbox( return (min_lon, min_lat, max_lon, max_lat) -def _extract_states_from_codes( - same_codes: list[str], ugc_codes: list[str] -) -> set[str]: - """Extract state abbreviations from SAME and UGC codes.""" - states: set[str] = set() - - for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] - if fips_state in FIPS_TO_STATE: - states.add(FIPS_TO_STATE[fips_state]) - - for code in ugc_codes: - if len(code) >= 2 and code[:2].isalpha(): - states.add(code[:2].upper()) - - return states - - def _build_regions(same_codes: list[str], ugc_codes: list[str]) -> list[str]: - """Build sorted list of region strings from geocodes.""" + """Build sorted list of region strings from geocodes. + + SAME format (FCC standard) is ``PSSCCC``: ``P`` is an area-type indicator + (``0`` = whole region, ``1``-``9`` = a designated portion), ``SS`` is the + 2-digit state FIPS, ``CCC`` is the 3-digit county FIPS. Pre-v0.10.7 read + ``code[:2]`` (``PS``) as the state -- so e.g. ``016005`` (Bannock County, + Idaho) parsed as state ``01`` = Alabama. Fix: slice ``code[1:3]`` for the + state and emit the 5-digit ANSI county FIPS (``code[1:]`` = ``SSCCC``) + rather than the full 6-digit SAME, so subscribers see the interoperable + standard form. The ``P`` digit is preserved verbatim in + ``data.geocode.SAME`` for any power user that needs it. + + UGC codes carry the 2-letter state alpha (e.g. ``IDZ052``) so the UGC + branch was already correct -- only the SAME branch needed the fix. + + Malformed inputs (non-6-digit, non-digit chars, ``None``) are silently + skipped: no crash, no entry. + """ regions: set[str] = set() for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] + if isinstance(code, str) and len(code) == 6 and code.isdigit(): + fips_state = code[1:3] if fips_state in FIPS_TO_STATE: state = FIPS_TO_STATE[fips_state] - regions.add(f"US-{state}-FIPS{code}") + county_ansi = code[1:] # 5-digit ANSI county FIPS (SSCCC) + regions.add(f"US-{state}-FIPS{county_ansi}") for code in ugc_codes: if len(code) >= 3 and code[:2].isalpha(): diff --git a/tests/test_nws_normalization.py b/tests/test_nws_normalization.py index 701b5be..5e03a46 100644 --- a/tests/test_nws_normalization.py +++ b/tests/test_nws_normalization.py @@ -11,7 +11,6 @@ from central.adapters.nws import ( NWSAdapter, _snake_case, _parse_datetime, - _extract_states_from_codes, _build_regions, _compute_centroid, _compute_bbox, @@ -21,7 +20,9 @@ from central.config_models import AdapterConfig # Sample NWS GeoJSON features for testing -# SAME codes: 6 digits, first 2 are state FIPS (ID=16, OR=41, CA=06, WA=53) +# SAME codes are 6 digits in PSSCCC form (P=area-type indicator, SS=state +# FIPS, CCC=county FIPS). ID=16, OR=41, CA=06, WA=53 — so the SAME values +# below use the standard 0SSCCC form (no area-subset, full region). SAMPLE_FEATURE_ID = { "id": "urn:oid:2.49.0.1.840.0.a1b2c3d4e5f6", "type": "Feature", @@ -42,7 +43,7 @@ SAMPLE_FEATURE_ID = { "expires": "2026-05-15T14:00:00-06:00", "severity": "Severe", "geocode": { - "SAME": ["160001"], # Idaho state FIPS 16 + "SAME": ["016001"], # Idaho (state FIPS 16) county 001 (Ada) "UGC": ["IDC001", "IDZ033"], }, }, @@ -59,7 +60,7 @@ SAMPLE_FEATURE_OR = { "expires": "2026-05-16T08:00:00Z", "severity": "Moderate", "geocode": { - "SAME": ["410051"], # Oregon state FIPS 41 + "SAME": ["041051"], # Oregon (state FIPS 41) county 051 (Multnomah) "UGC": ["ORC051"], }, }, @@ -79,7 +80,7 @@ SAMPLE_FEATURE_CA = { "expires": "2026-05-16T18:00:00-07:00", "severity": "Minor", "geocode": { - "SAME": ["060037"], # California state FIPS 06 + "SAME": ["006037"], # California (state FIPS 06) county 037 (Los Angeles) "UGC": ["CAZ568"], }, }, @@ -96,7 +97,7 @@ SAMPLE_FEATURE_UNKNOWN_SEVERITY = { "expires": None, "severity": "Unknown", "geocode": { - "SAME": ["530033"], # Washington state FIPS 53 + "SAME": ["053033"], # Washington (state FIPS 53) county 033 (King) "UGC": ["WAC033"], }, }, @@ -137,35 +138,14 @@ class TestParseDatetime: assert _parse_datetime("not a date") is None -class TestExtractStates: - """Tests for state extraction from geocodes.""" - - def test_same_codes(self) -> None: - # Idaho FIPS is 16 - states = _extract_states_from_codes(["160001", "160003"], []) - assert states == {"ID"} - - def test_ugc_codes(self) -> None: - states = _extract_states_from_codes([], ["IDC001", "ORC051"]) - assert states == {"ID", "OR"} - - def test_combined(self) -> None: - # Idaho FIPS is 16 - states = _extract_states_from_codes(["160001"], ["WAC033"]) - assert states == {"ID", "WA"} - - def test_empty(self) -> None: - states = _extract_states_from_codes([], []) - assert states == set() - - class TestBuildRegions: """Tests for region string building.""" def test_same_to_fips_region(self) -> None: - # Idaho FIPS is 16 - regions = _build_regions(["160001"], []) - assert "US-ID-FIPS160001" in regions + # SAME 016001 = P=0, SS=16 (Idaho), CCC=001 (Ada County); + # emitted region uses the 5-digit ANSI county FIPS (drops P). + regions = _build_regions(["016001"], []) + assert "US-ID-FIPS16001" in regions def test_ugc_county(self) -> None: regions = _build_regions([], ["IDC001"]) @@ -176,10 +156,69 @@ class TestBuildRegions: assert "US-ID-Z033" in regions def test_sorted_alphabetically(self) -> None: - regions = _build_regions(["160001"], ["IDC001", "IDZ033"]) + regions = _build_regions(["016001"], ["IDC001", "IDZ033"]) assert regions == sorted(regions) +class TestSameStateParse: + """v0.10.7 regression guard: SAME PSSCCC parsing. + + The pre-v0.10.7 ``_build_regions`` read ``code[:2]`` (``PS``) as the state + FIPS, so for ``P=0`` (the common case) every SAME code parsed as + Alabama (``01``) regardless of the real state. Fix slices ``code[1:3]`` + for the state and emits the 5-digit ANSI county FIPS (``code[1:]``). + """ + + @pytest.mark.parametrize("same_code, expected_region", [ + ("016005", "US-ID-FIPS16005"), # Bannock County, Idaho + ("001005", "US-AL-FIPS01005"), # Autauga area, Alabama + ("056005", "US-WY-FIPS56005"), # Carbon area, Wyoming + ("049005", "US-UT-FIPS49005"), # Cache area, Utah + ]) + def test_state_derived_from_positions_1_2( + self, same_code: str, expected_region: str + ) -> None: + """Real SAME codes from four distinct states parse to the correct state.""" + regions = _build_regions([same_code], []) + assert expected_region in regions + assert len(regions) == 1 + + def test_area_subset_indicator_is_dropped_from_emitted_region(self) -> None: + """SAME with P>=1 (designated portion) parses the same state but the + emitted 5-digit county FIPS drops the P digit -- area-subset info + lives upstream in ``data.geocode.SAME`` for power users.""" + # 116005: P=1 (subset of region), SS=16 (Idaho), CCC=005 (Bannock) + regions = _build_regions(["116005"], []) + assert "US-ID-FIPS16005" in regions + + @pytest.mark.parametrize("malformed", [ + "", # empty + "01", # too short + "0160", # too short + "0160050", # too long (7 digits) + "016X05", # non-digit char + "abcdef", # all alpha + None, # missing entry + ]) + def test_malformed_same_is_silently_skipped(self, malformed) -> None: + """Garbage in SAME never crashes and never produces a region.""" + regions = _build_regions([malformed], []) + assert regions == [] + + def test_unknown_state_fips_is_silently_skipped(self) -> None: + """SAME with valid format but unrecognized state FIPS produces nothing.""" + # SS=99 is not a real state FIPS + regions = _build_regions(["099001"], []) + assert regions == [] + + def test_mixed_good_and_malformed(self) -> None: + """Valid entries still emit when malformed ones are present in the list.""" + regions = _build_regions(["016001", "", "049005", "abc"], []) + assert "US-ID-FIPS16001" in regions + assert "US-UT-FIPS49005" in regions + assert len(regions) == 2 + + class TestStateFilter: """Tests for region-based filtering."""