fix(2-B): normalize WFIGS field formats

WFIGS returns ISO 3166-2 state codes (US-MT) and 2-letter incident
type codes (WF, RX). Normalize at parse boundary:

- normalize_state: strips US- prefix (US-MT -> MT)
- normalize_incident_type: maps codes to names (WF -> wildfire)

Fixes:
- category was fire.incident.wf, now fire.incident.wildfire
- region was US-US-MT-GLACIER, now US-MT-GLACIER

Both raw and normalized values stored in event.data.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Matt Johnson 2026-05-19 03:04:27 +00:00
commit dfad7ef45d
4 changed files with 202 additions and 61 deletions

View file

@ -17,6 +17,35 @@ WFIGS_PERIMETERS_URL = (
# Fall-off sweep window: 14 days (matches WFIGS's longest fall-off: large fires)
FALLOFF_WINDOW_DAYS = 14
# Incident type code mappings (WFIGS uses 2-letter codes)
INCIDENT_TYPE_MAP = {
"WF": "wildfire",
"RX": "prescribed_fire",
"CX": "complex",
"FA": "false_alarm",
}
def normalize_state(state: str | None) -> str | None:
"""Strip 'US-' prefix from POOState (ISO 3166-2 -> 2-letter)."""
if not state:
return None
if state.startswith("US-") and len(state) == 5:
return state[3:]
if len(state) == 2:
return state
return state # unknown shape, pass through
def normalize_incident_type(code: str | None) -> str:
"""Map IncidentTypeCategory code to a readable name."""
if not code:
return "unknown"
upper = code.upper()
if upper in INCIDENT_TYPE_MAP:
return INCIDENT_TYPE_MAP[upper]
return code.lower()
def severity_from_acres(acres: float | None) -> int:
"""Map DailyAcres to severity level 0-4."""
@ -42,6 +71,7 @@ def build_regions(state: str | None, county: str | None) -> tuple[list[str], str
"""
Build geo.regions list and primary_region from POOState and POOCounty.
Expects normalized 2-letter state codes (e.g., "MT" not "US-MT").
Returns (regions, primary_region).
"""
if not state:
@ -62,6 +92,7 @@ def subject_suffix(state: str | None, county: str | None) -> str:
"""
Build subject suffix from state and county.
Expects normalized 2-letter state codes.
Returns lowercase state.county (county with spacesunderscores).
Falls back to "unknown" if state is not available.
"""

View file

@ -25,6 +25,8 @@ from central.adapters.wfigs_common import (
extract_centroid,
get_observed_guids,
init_observed_table,
normalize_incident_type,
normalize_state,
parse_wfigs_timestamp,
point_in_bbox,
severity_from_acres,
@ -185,6 +187,7 @@ class WFIGSIncidentsAdapter(SourceAdapter):
return f"central.fire.incident.removed.{state}"
# Regular incidents: central.fire.incident.<state>.<county>
# POOState is already normalized (2-letter code)
state = event.data.get("POOState")
county = event.data.get("POOCounty")
suffix = subject_suffix(state, county)
@ -273,17 +276,22 @@ class WFIGSIncidentsAdapter(SourceAdapter):
):
continue
# Track this GUID as observed (for fall-off detection)
state = props.get("POOState")
# Normalize at parse boundary
state_raw = props.get("POOState")
state = normalize_state(state_raw)
county = props.get("POOCounty")
incident_type_raw = props.get("IncidentTypeCategory")
incident_type = normalize_incident_type(incident_type_raw)
# Track this GUID as observed (for fall-off detection)
# Store normalized state for consistency
current_guids[irwin_id] = (state, county)
# Parse fields
incident_type = props.get("IncidentTypeCategory", "unknown").lower()
discovery_time = parse_wfigs_timestamp(props.get("FireDiscoveryDateTime"))
daily_acres = props.get("DailyAcres")
# Build regions
# Build regions (expects normalized 2-letter state code)
regions, primary_region = build_regions(state, county)
# Build geo
@ -297,7 +305,7 @@ class WFIGSIncidentsAdapter(SourceAdapter):
else:
geo = Geo(regions=regions, primary_region=primary_region)
# Build event
# Build event with normalized values in data
event = Event(
id=irwin_id,
adapter=self.name,
@ -308,12 +316,14 @@ class WFIGSIncidentsAdapter(SourceAdapter):
data={
"IrwinID": irwin_id,
"IncidentName": props.get("IncidentName"),
"IncidentTypeCategory": props.get("IncidentTypeCategory"),
"IncidentTypeCategory": incident_type,
"IncidentTypeCategory_raw": incident_type_raw,
"DailyAcres": daily_acres,
"PercentContained": props.get("PercentContained"),
"FireDiscoveryDateTime": props.get("FireDiscoveryDateTime"),
"ModifiedOnDateTime": props.get("ModifiedOnDateTime"),
"POOState": state,
"POOState_raw": state_raw,
"POOCounty": county,
"raw": props,
},

View file

@ -25,6 +25,8 @@ from central.adapters.wfigs_common import (
extract_centroid,
get_observed_guids,
init_observed_table,
normalize_incident_type,
normalize_state,
parse_wfigs_timestamp,
polygon_intersects_bbox,
severity_from_acres,
@ -185,6 +187,7 @@ class WFIGSPerimetersAdapter(SourceAdapter):
return f"central.fire.perimeter.removed.{state}"
# Regular perimeters: central.fire.perimeter.<state>.<county>
# POOState is already normalized (2-letter code)
state = event.data.get("POOState")
county = event.data.get("POOCounty")
suffix = subject_suffix(state, county)
@ -271,18 +274,23 @@ class WFIGSPerimetersAdapter(SourceAdapter):
):
continue
# Track this GUID as observed (for fall-off detection)
state = props.get("attr_POOState")
# Normalize at parse boundary
state_raw = props.get("attr_POOState")
state = normalize_state(state_raw)
county = props.get("attr_POOCounty")
incident_type_raw = props.get("attr_IncidentTypeCategory")
incident_type = normalize_incident_type(incident_type_raw)
# Track this GUID as observed (for fall-off detection)
# Store normalized state for consistency
current_guids[irwin_id] = (state, county)
# Parse fields using prefixed names
incident_type = props.get("attr_IncidentTypeCategory", "unknown").lower()
discovery_time = parse_wfigs_timestamp(props.get("attr_FireDiscoveryDateTime"))
# Use poly_GISAcres or attr_IncidentSize for acreage
daily_acres = props.get("attr_IncidentSize") or props.get("poly_GISAcres")
# Build regions
# Build regions (expects normalized 2-letter state code)
regions, primary_region = build_regions(state, county)
# Extract centroid for geo
@ -320,13 +328,15 @@ class WFIGSPerimetersAdapter(SourceAdapter):
data={
"IrwinID": irwin_id,
"IncidentName": props.get("attr_IncidentName") or props.get("poly_IncidentName"),
"IncidentTypeCategory": props.get("attr_IncidentTypeCategory"),
"IncidentTypeCategory": incident_type,
"IncidentTypeCategory_raw": incident_type_raw,
"DailyAcres": props.get("attr_IncidentSize"),
"GISAcres": props.get("poly_GISAcres"),
"PercentContained": props.get("attr_PercentContained"),
"FireDiscoveryDateTime": props.get("attr_FireDiscoveryDateTime"),
"ModifiedOnDateTime": props.get("attr_ModifiedOnDateTime_dt"),
"POOState": state,
"POOState_raw": state_raw,
"POOCounty": county,
"geometry": geometry, # Full GeoJSON polygon
"raw": props,

View file

@ -11,38 +11,39 @@ from central.config_models import AdapterConfig, RegionConfig
from central.models import Event, Geo
# Sample GeoJSON response with incidents
# Sample GeoJSON response with incidents using real WFIGS format
# Note: POOState comes as ISO 3166-2 ("US-MT"), IncidentTypeCategory as codes ("WF")
SAMPLE_INCIDENTS_RESPONSE = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [-116.5, 43.5]},
"geometry": {"type": "Point", "coordinates": [-113.5, 48.5]},
"properties": {
"IrwinID": "GUID-001-BOISE",
"IncidentName": "Test Fire 1",
"IncidentTypeCategory": "Wildfire",
"IrwinID": "GUID-001-GLACIER",
"IncidentName": "Glacier Fire",
"IncidentTypeCategory": "WF", # Real format: 2-letter code
"DailyAcres": 150,
"PercentContained": 25,
"FireDiscoveryDateTime": 1716000000000,
"ModifiedOnDateTime": 1716100000000,
"POOState": "ID",
"POOCounty": "Ada",
"POOState": "US-MT", # Real format: ISO 3166-2
"POOCounty": "Glacier",
},
},
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [-117.0, 44.0]},
"geometry": {"type": "Point", "coordinates": [-116.5, 43.5]},
"properties": {
"IrwinID": "GUID-002-CANYON",
"IncidentName": "Test Fire 2",
"IncidentTypeCategory": "PrescribedFire",
"IrwinID": "GUID-002-OWYHEE",
"IncidentName": "Owyhee Rx",
"IncidentTypeCategory": "RX", # Prescribed fire
"DailyAcres": 5,
"PercentContained": 100,
"FireDiscoveryDateTime": 1716200000000,
"ModifiedOnDateTime": 1716300000000,
"POOState": "ID",
"POOCounty": "Canyon",
"POOState": "US-ID",
"POOCounty": "Owyhee",
},
},
{
@ -51,12 +52,12 @@ SAMPLE_INCIDENTS_RESPONSE = {
"properties": {
"IrwinID": "GUID-003-FLORIDA",
"IncidentName": "Florida Fire",
"IncidentTypeCategory": "Wildfire",
"IncidentTypeCategory": "WF",
"DailyAcres": 50,
"PercentContained": 0,
"FireDiscoveryDateTime": 1716400000000,
"ModifiedOnDateTime": 1716500000000,
"POOState": "FL",
"POOState": "US-FL",
"POOCounty": "Miami-Dade",
},
},
@ -72,24 +73,24 @@ SAMPLE_PERIMETERS_RESPONSE = {
"geometry": {
"type": "Polygon",
"coordinates": [[
[-116.6, 43.4],
[-116.4, 43.4],
[-116.4, 43.6],
[-116.6, 43.6],
[-116.6, 43.4],
[-113.6, 48.4],
[-113.4, 48.4],
[-113.4, 48.6],
[-113.6, 48.6],
[-113.6, 48.4],
]],
},
"properties": {
"attr_IrwinID": "GUID-001-BOISE",
"attr_IncidentName": "Test Fire 1",
"attr_IncidentTypeCategory": "Wildfire",
"attr_IrwinID": "GUID-001-GLACIER",
"attr_IncidentName": "Glacier Fire",
"attr_IncidentTypeCategory": "WF", # Real format: 2-letter code
"attr_IncidentSize": 150,
"poly_GISAcres": 148.5,
"attr_PercentContained": 25,
"attr_FireDiscoveryDateTime": 1716000000000,
"attr_ModifiedOnDateTime_dt": 1716100000000,
"attr_POOState": "ID",
"attr_POOCounty": "Ada",
"attr_POOState": "US-MT", # Real format: ISO 3166-2
"attr_POOCounty": "Glacier",
},
},
],
@ -137,15 +138,16 @@ class TestWFIGSCommon:
def test_build_regions_full(self):
from central.adapters.wfigs_common import build_regions
regions, primary = build_regions("ID", "Ada")
assert regions == ["US-ID-ADA"]
assert primary == "US-ID-ADA"
# Expects normalized 2-letter state code
regions, primary = build_regions("MT", "Glacier")
assert regions == ["US-MT-GLACIER"]
assert primary == "US-MT-GLACIER"
def test_build_regions_state_only(self):
from central.adapters.wfigs_common import build_regions
regions, primary = build_regions("ID", None)
assert regions == ["US-ID"]
assert primary == "US-ID"
regions, primary = build_regions("MT", None)
assert regions == ["US-MT"]
assert primary == "US-MT"
def test_build_regions_none(self):
from central.adapters.wfigs_common import build_regions
@ -155,7 +157,8 @@ class TestWFIGSCommon:
def test_subject_suffix(self):
from central.adapters.wfigs_common import subject_suffix
assert subject_suffix("ID", "Ada") == "id.ada"
# Expects normalized 2-letter state code
assert subject_suffix("MT", "Glacier") == "mt.glacier"
assert subject_suffix("ID", "Ada County") == "id.ada_county"
assert subject_suffix("ID", None) == "id"
assert subject_suffix(None, None) == "unknown"
@ -165,6 +168,66 @@ class TestWFIGSCommon:
assert point_in_bbox(-116.5, 43.5, -124, 31, -102, 49) is True
assert point_in_bbox(-80.0, 26.0, -124, 31, -102, 49) is False
# Normalization tests
def test_normalize_state_iso_3166(self):
"""normalize_state strips US- prefix from ISO 3166-2 codes."""
from central.adapters.wfigs_common import normalize_state
assert normalize_state("US-MT") == "MT"
assert normalize_state("US-ID") == "ID"
assert normalize_state("US-CA") == "CA"
def test_normalize_state_already_2letter(self):
"""normalize_state passes through 2-letter codes."""
from central.adapters.wfigs_common import normalize_state
assert normalize_state("MT") == "MT"
assert normalize_state("ID") == "ID"
def test_normalize_state_none_empty(self):
"""normalize_state handles None and empty strings."""
from central.adapters.wfigs_common import normalize_state
assert normalize_state(None) is None
assert normalize_state("") is None
def test_normalize_state_unknown_format(self):
"""normalize_state passes through unknown formats."""
from central.adapters.wfigs_common import normalize_state
assert normalize_state("Montana") == "Montana"
assert normalize_state("US-MONTANA") == "US-MONTANA"
def test_normalize_incident_type_wf(self):
"""normalize_incident_type maps WF to wildfire."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type("WF") == "wildfire"
assert normalize_incident_type("wf") == "wildfire"
def test_normalize_incident_type_rx(self):
"""normalize_incident_type maps RX to prescribed_fire."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type("RX") == "prescribed_fire"
assert normalize_incident_type("rx") == "prescribed_fire"
def test_normalize_incident_type_cx(self):
"""normalize_incident_type maps CX to complex."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type("CX") == "complex"
def test_normalize_incident_type_fa(self):
"""normalize_incident_type maps FA to false_alarm."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type("FA") == "false_alarm"
def test_normalize_incident_type_unknown_code(self):
"""normalize_incident_type lowercases unknown codes."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type("UNKNOWN_CODE") == "unknown_code"
assert normalize_incident_type("Wildfire") == "wildfire"
def test_normalize_incident_type_none(self):
"""normalize_incident_type returns unknown for None."""
from central.adapters.wfigs_common import normalize_incident_type
assert normalize_incident_type(None) == "unknown"
assert normalize_incident_type("") == "unknown"
class TestWFIGSIncidentsAdapter:
"""Tests for WFIGS Incidents adapter."""
@ -211,13 +274,26 @@ class TestWFIGSIncidentsAdapter:
# Should have 2 events (Florida filtered out by bbox)
assert len(events) == 2
# First event: Glacier Fire
event = events[0]
assert event.id == "GUID-001-BOISE"
assert event.id == "GUID-001-GLACIER"
assert event.adapter == "wfigs_incidents"
assert event.category == "fire.incident.wildfire"
# Category uses normalized incident type
assert event.category == "fire.incident.wildfire" # NOT fire.incident.wf
assert event.severity == 3 # 150 acres = severity 3 (100-999 range)
assert event.geo.primary_region == "US-ID-ADA"
assert event.data["IrwinID"] == "GUID-001-BOISE"
# Region uses normalized state (no double US-)
assert event.geo.primary_region == "US-MT-GLACIER" # NOT US-US-MT-GLACIER
# Data contains both normalized and raw values
assert event.data["POOState"] == "MT" # normalized
assert event.data["POOState_raw"] == "US-MT" # raw
assert event.data["IncidentTypeCategory"] == "wildfire" # normalized
assert event.data["IncidentTypeCategory_raw"] == "WF" # raw
# Second event: Owyhee Rx
event2 = events[1]
assert event2.category == "fire.incident.prescribed_fire" # NOT fire.incident.rx
assert event2.data["POOState"] == "ID"
assert event2.data["POOState_raw"] == "US-ID"
@pytest.mark.asyncio
async def test_is_published_dedup(
@ -280,27 +356,30 @@ class TestWFIGSIncidentsAdapter:
# The removal is yielded for GUID-002
removal_events = [e for e in events2 if e.category == "fire.incident.removed"]
assert len(removal_events) == 1
assert removal_events[0].data["irwin_id"] == "GUID-002-CANYON"
assert removal_events[0].data["irwin_id"] == "GUID-002-OWYHEE"
def test_subject_for_incidents(
def test_subject_for_incidents_normalized(
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
):
"""subject_for uses normalized state codes."""
from central.adapters.wfigs_incidents import WFIGSIncidentsAdapter
adapter = WFIGSIncidentsAdapter(mock_config, mock_config_store, cursor_db_path)
# Event data contains normalized state (MT not US-MT)
event = Event(
id="test-id",
adapter="wfigs_incidents",
category="fire.incident.wildfire",
time=datetime.now(timezone.utc),
severity=2,
geo=Geo(primary_region="US-ID-ADA"),
data={"POOState": "ID", "POOCounty": "Ada"},
geo=Geo(primary_region="US-MT-GLACIER"),
data={"POOState": "MT", "POOCounty": "Glacier"},
)
subject = adapter.subject_for(event)
assert subject == "central.fire.incident.id.ada"
# Subject uses normalized state: mt.glacier not us-mt.glacier
assert subject == "central.fire.incident.mt.glacier"
def test_subject_for_removal(
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
@ -316,11 +395,11 @@ class TestWFIGSIncidentsAdapter:
time=datetime.now(timezone.utc),
severity=0,
geo=Geo(),
data={"irwin_id": "test-id", "state": "ID"},
data={"irwin_id": "test-id", "state": "MT"},
)
subject = adapter.subject_for(event)
assert subject == "central.fire.incident.removed.id"
assert subject == "central.fire.incident.removed.mt"
@pytest.mark.asyncio
async def test_bbox_post_filter(
@ -416,29 +495,40 @@ class TestWFIGSPerimetersAdapter:
assert len(events) == 1
event = events[0]
assert event.id == "GUID-001-BOISE"
assert event.id == "GUID-001-GLACIER"
assert event.adapter == "wfigs_perimeters"
assert event.category == "fire.perimeter.wildfire"
assert event.geo.primary_region == "US-ID-ADA"
# Category uses normalized incident type
assert event.category == "fire.perimeter.wildfire" # NOT fire.perimeter.wf
# Region uses normalized state (no double US-)
assert event.geo.primary_region == "US-MT-GLACIER" # NOT US-US-MT-GLACIER
# Data contains both normalized and raw values
assert event.data["POOState"] == "MT" # normalized
assert event.data["POOState_raw"] == "US-MT" # raw
assert event.data["IncidentTypeCategory"] == "wildfire" # normalized
assert event.data["IncidentTypeCategory_raw"] == "WF" # raw
# Geometry is included
assert "geometry" in event.data
assert event.data["geometry"]["type"] == "Polygon"
def test_subject_for_perimeters(
def test_subject_for_perimeters_normalized(
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
):
"""subject_for uses normalized state codes."""
from central.adapters.wfigs_perimeters import WFIGSPerimetersAdapter
adapter = WFIGSPerimetersAdapter(mock_config, mock_config_store, cursor_db_path)
# Event data contains normalized state (MT not US-MT)
event = Event(
id="test-id",
adapter="wfigs_perimeters",
category="fire.perimeter.wildfire",
time=datetime.now(timezone.utc),
severity=2,
geo=Geo(primary_region="US-ID-ADA"),
data={"POOState": "ID", "POOCounty": "Ada", "geometry": {}},
geo=Geo(primary_region="US-MT-GLACIER"),
data={"POOState": "MT", "POOCounty": "Glacier", "geometry": {}},
)
subject = adapter.subject_for(event)
assert subject == "central.fire.perimeter.id.ada"
# Subject uses normalized state: mt.glacier not us-mt.glacier
assert subject == "central.fire.perimeter.mt.glacier"