mirror of
https://github.com/zvx-echo6/central.git
synced 2026-05-21 18:14:44 +02:00
Merge feature/2-c-inciweb: NIFC InciWeb wildfire narrative adapter
This commit is contained in:
commit
4c1fdb8649
3 changed files with 1095 additions and 0 deletions
19
sql/migrations/017_add_inciweb_adapter.sql
Normal file
19
sql/migrations/017_add_inciweb_adapter.sql
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
-- Migration: 017_add_inciweb_adapter
|
||||
-- Add InciWeb adapter to config.adapters
|
||||
-- Idempotent: uses ON CONFLICT DO NOTHING
|
||||
|
||||
INSERT INTO config.adapters (name, enabled, cadence_s, settings)
|
||||
VALUES (
|
||||
'inciweb',
|
||||
false, -- Ships disabled; operator enables via GUI
|
||||
600,
|
||||
jsonb_build_object(
|
||||
'region', jsonb_build_object(
|
||||
'north', 49.0,
|
||||
'south', 31.0,
|
||||
'east', -102.0,
|
||||
'west', -124.0
|
||||
)
|
||||
)
|
||||
)
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
477
src/central/adapters/inciweb.py
Normal file
477
src/central/adapters/inciweb.py
Normal file
|
|
@ -0,0 +1,477 @@
|
|||
"""InciWeb adapter for wildfire narrative updates."""
|
||||
|
||||
import html
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from collections.abc import AsyncIterator
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
import aiohttp
|
||||
from pydantic import BaseModel
|
||||
from tenacity import (
|
||||
retry,
|
||||
retry_if_exception_type,
|
||||
stop_after_attempt,
|
||||
wait_exponential_jitter,
|
||||
)
|
||||
|
||||
from central.adapter import SourceAdapter
|
||||
from central.config_models import AdapterConfig, RegionConfig
|
||||
from central.config_store import ConfigStore
|
||||
from central.models import Event, Geo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# InciWeb RSS feed URL
|
||||
INCIWEB_RSS_URL = "https://inciweb.wildfire.gov/incidents/rss.xml"
|
||||
|
||||
# State name to 2-letter code mapping
|
||||
STATE_NAME_TO_CODE = {
|
||||
"alabama": "AL", "alaska": "AK", "arizona": "AZ", "arkansas": "AR",
|
||||
"california": "CA", "colorado": "CO", "connecticut": "CT", "delaware": "DE",
|
||||
"florida": "FL", "georgia": "GA", "hawaii": "HI", "idaho": "ID",
|
||||
"illinois": "IL", "indiana": "IN", "iowa": "IA", "kansas": "KS",
|
||||
"kentucky": "KY", "louisiana": "LA", "maine": "ME", "maryland": "MD",
|
||||
"massachusetts": "MA", "michigan": "MI", "minnesota": "MN", "mississippi": "MS",
|
||||
"missouri": "MO", "montana": "MT", "nebraska": "NE", "nevada": "NV",
|
||||
"new hampshire": "NH", "new jersey": "NJ", "new mexico": "NM", "new york": "NY",
|
||||
"north carolina": "NC", "north dakota": "ND", "ohio": "OH", "oklahoma": "OK",
|
||||
"oregon": "OR", "pennsylvania": "PA", "rhode island": "RI", "south carolina": "SC",
|
||||
"south dakota": "SD", "tennessee": "TN", "texas": "TX", "utah": "UT",
|
||||
"vermont": "VT", "virginia": "VA", "washington": "WA", "west virginia": "WV",
|
||||
"wisconsin": "WI", "wyoming": "WY", "district of columbia": "DC",
|
||||
"puerto rico": "PR", "guam": "GU", "virgin islands": "VI",
|
||||
"american samoa": "AS", "northern mariana islands": "MP",
|
||||
}
|
||||
|
||||
|
||||
def parse_coordinates_from_description(description: str) -> tuple[float, float] | None:
|
||||
"""
|
||||
Parse latitude/longitude from InciWeb description text.
|
||||
|
||||
Format: "Latitude: 47° 3 17 Longitude: 91° 38 6"
|
||||
InciWeb uses unsigned values for US coordinates (west longitude implied).
|
||||
Returns (lon, lat) tuple or None if not found.
|
||||
"""
|
||||
# Pattern for degree/minute/second format
|
||||
lat_pattern = r"Latitude:\s*(-?\d+)°\s*(\d+)\s*(\d+(?:\.\d+)?)"
|
||||
lon_pattern = r"Longitude:\s*(-?\d+)°\s*(\d+)\s*(\d+(?:\.\d+)?)"
|
||||
|
||||
lat_match = re.search(lat_pattern, description)
|
||||
lon_match = re.search(lon_pattern, description)
|
||||
|
||||
if not lat_match or not lon_match:
|
||||
return None
|
||||
|
||||
try:
|
||||
lat_deg = int(lat_match.group(1))
|
||||
lat_min = int(lat_match.group(2))
|
||||
lat_sec = float(lat_match.group(3))
|
||||
|
||||
lon_deg = int(lon_match.group(1))
|
||||
lon_min = int(lon_match.group(2))
|
||||
lon_sec = float(lon_match.group(3))
|
||||
|
||||
# Convert to decimal degrees
|
||||
# Latitude: positive in northern hemisphere
|
||||
if lat_deg >= 0:
|
||||
lat = lat_deg + lat_min / 60 + lat_sec / 3600
|
||||
else:
|
||||
lat = lat_deg - lat_min / 60 - lat_sec / 3600
|
||||
|
||||
# Longitude: InciWeb gives unsigned values for US west longitudes
|
||||
# Make negative for western hemisphere (US coordinates)
|
||||
lon = lon_deg + lon_min / 60 + lon_sec / 3600
|
||||
if lon > 0:
|
||||
lon = -lon # US longitudes are west (negative)
|
||||
|
||||
return (lon, lat)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def parse_state_from_description(description: str) -> str | None:
|
||||
"""
|
||||
Parse state name from InciWeb description text.
|
||||
|
||||
Format: "State: Minnesota" or "State: New Mexico"
|
||||
Returns 2-letter state code or None if not found.
|
||||
|
||||
Design note: State is parsed from the description rather than the title
|
||||
because InciWeb titles use unit code prefixes (e.g., "MNMNS Stewart Trail",
|
||||
"CACNP Santa Rosa Island Fire") which are not reliable state indicators.
|
||||
The description has a structured "State: <name>" field that reliably
|
||||
identifies the state for all incidents.
|
||||
"""
|
||||
pattern = r"State:\s*([A-Za-z\s]+?)(?:\n|---|$)"
|
||||
match = re.search(pattern, description)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
state_name = match.group(1).strip().lower()
|
||||
return STATE_NAME_TO_CODE.get(state_name)
|
||||
|
||||
|
||||
def strip_html(html_text: str) -> str:
|
||||
"""
|
||||
Strip HTML tags and decode entities to plain text.
|
||||
"""
|
||||
# Decode HTML entities (handles & < > etc.)
|
||||
text = html.unescape(html_text)
|
||||
|
||||
# Handle specifically (not a standard Python html entity)
|
||||
text = text.replace(" ", " ")
|
||||
text = text.replace("\xa0", " ") # Non-breaking space character
|
||||
|
||||
# Remove HTML tags
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
|
||||
# Normalize whitespace
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
def point_in_bbox(
|
||||
lon: float,
|
||||
lat: float,
|
||||
west: float,
|
||||
south: float,
|
||||
east: float,
|
||||
north: float,
|
||||
) -> bool:
|
||||
"""Check if a point is within a bounding box."""
|
||||
return west <= lon <= east and south <= lat <= north
|
||||
|
||||
|
||||
class InciWebSettings(BaseModel):
|
||||
"""Settings schema for InciWeb adapter."""
|
||||
|
||||
region: RegionConfig | None = None
|
||||
|
||||
|
||||
class InciWebAdapter(SourceAdapter):
|
||||
"""NIFC InciWeb wildfire narrative adapter."""
|
||||
|
||||
name = "inciweb"
|
||||
display_name = "NIFC InciWeb — Wildfire Narrative"
|
||||
description = (
|
||||
"Narrative wildfire updates from InciWeb. Editorial; lower precision "
|
||||
"than WFIGS. Use as supplementary context."
|
||||
)
|
||||
settings_schema = InciWebSettings
|
||||
requires_api_key = None
|
||||
api_key_field = None
|
||||
wizard_order = None # Ships disabled
|
||||
default_cadence_s = 600
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: AdapterConfig,
|
||||
config_store: ConfigStore,
|
||||
cursor_db_path: Path,
|
||||
) -> None:
|
||||
self._config_store = config_store
|
||||
self._cursor_db_path = cursor_db_path
|
||||
self._session: aiohttp.ClientSession | None = None
|
||||
self._db: sqlite3.Connection | None = None
|
||||
|
||||
# Conditional fetch state
|
||||
self._last_modified: str | None = None
|
||||
self._etag: str | None = None
|
||||
|
||||
# Parse region from settings
|
||||
region_dict = config.settings.get("region")
|
||||
if region_dict:
|
||||
self.region: RegionConfig | None = RegionConfig(**region_dict)
|
||||
else:
|
||||
self.region = None
|
||||
|
||||
async def startup(self) -> None:
|
||||
"""Initialize HTTP session and SQLite connection."""
|
||||
self._session = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
)
|
||||
self._db = sqlite3.connect(self._cursor_db_path)
|
||||
|
||||
# Create table for dedup tracking
|
||||
self._db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS published_ids (
|
||||
adapter TEXT NOT NULL,
|
||||
event_id TEXT NOT NULL,
|
||||
first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (adapter, event_id)
|
||||
)
|
||||
""")
|
||||
self._db.execute("""
|
||||
CREATE INDEX IF NOT EXISTS published_ids_last_seen
|
||||
ON published_ids (last_seen)
|
||||
""")
|
||||
self._db.commit()
|
||||
|
||||
logger.info(
|
||||
"InciWeb adapter started",
|
||||
extra={"region": self.region.model_dump() if self.region else None},
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Close HTTP session and SQLite connection."""
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
if self._db:
|
||||
self._db.close()
|
||||
self._db = None
|
||||
logger.info("InciWeb adapter shut down")
|
||||
|
||||
async def apply_config(self, new_config: AdapterConfig) -> None:
|
||||
"""Apply new configuration from hot-reload."""
|
||||
region_dict = new_config.settings.get("region")
|
||||
if region_dict:
|
||||
self.region = RegionConfig(**region_dict)
|
||||
else:
|
||||
self.region = None
|
||||
logger.info(
|
||||
"InciWeb config updated",
|
||||
extra={"region": self.region.model_dump() if self.region else None},
|
||||
)
|
||||
|
||||
def is_published(self, event_id: str) -> bool:
|
||||
"""Check if an event has already been published."""
|
||||
if not self._db:
|
||||
return False
|
||||
cur = self._db.execute(
|
||||
"SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?",
|
||||
(self.name, event_id),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
def mark_published(self, event_id: str) -> None:
|
||||
"""Mark an event as published."""
|
||||
if not self._db:
|
||||
return
|
||||
self._db.execute(
|
||||
"""
|
||||
INSERT INTO published_ids (adapter, event_id, first_seen, last_seen)
|
||||
VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT (adapter, event_id) DO UPDATE SET
|
||||
last_seen = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(self.name, event_id),
|
||||
)
|
||||
self._db.commit()
|
||||
|
||||
def bump_last_seen(self, event_id: str) -> None:
|
||||
"""Bump the last_seen timestamp for an event."""
|
||||
if not self._db:
|
||||
return
|
||||
self._db.execute(
|
||||
"UPDATE published_ids SET last_seen = CURRENT_TIMESTAMP WHERE adapter = ? AND event_id = ?",
|
||||
(self.name, event_id),
|
||||
)
|
||||
self._db.commit()
|
||||
|
||||
def sweep_old_ids(self) -> int:
|
||||
"""Remove published_ids older than 14 days. Returns count deleted."""
|
||||
if not self._db:
|
||||
return 0
|
||||
cur = self._db.execute(
|
||||
"DELETE FROM published_ids WHERE adapter = ? AND last_seen < datetime('now', '-14 days')",
|
||||
(self.name,),
|
||||
)
|
||||
self._db.commit()
|
||||
count = cur.rowcount
|
||||
if count > 0:
|
||||
logger.info("InciWeb swept old dedup entries", extra={"count": count})
|
||||
return count
|
||||
|
||||
def subject_for(self, event: Event) -> str:
|
||||
"""Compute NATS subject for an event."""
|
||||
state = event.geo.primary_region
|
||||
if state and state.startswith("US-") and len(state) == 5:
|
||||
state_code = state[3:].lower()
|
||||
return f"central.fire.narrative.inciweb.{state_code}"
|
||||
return "central.fire.narrative.inciweb.unknown"
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential_jitter(initial=1, max=30),
|
||||
retry=retry_if_exception_type((aiohttp.ClientError, TimeoutError)),
|
||||
)
|
||||
async def _fetch_rss(self) -> list[dict[str, Any]]:
|
||||
"""Fetch and parse RSS feed from InciWeb."""
|
||||
if not self._session:
|
||||
raise RuntimeError("Session not initialized")
|
||||
|
||||
# Build request headers with conditional fetch support
|
||||
headers = {"User-Agent": "Central/0.4"}
|
||||
if self._last_modified:
|
||||
headers["If-Modified-Since"] = self._last_modified
|
||||
if self._etag:
|
||||
headers["If-None-Match"] = self._etag
|
||||
|
||||
async with self._session.get(INCIWEB_RSS_URL, headers=headers) as resp:
|
||||
# Handle 304 Not Modified
|
||||
if resp.status == 304:
|
||||
logger.info("InciWeb not modified")
|
||||
return []
|
||||
|
||||
resp.raise_for_status()
|
||||
|
||||
# Capture conditional fetch headers for next request
|
||||
self._last_modified = resp.headers.get("Last-Modified")
|
||||
self._etag = resp.headers.get("ETag")
|
||||
|
||||
content = await resp.text()
|
||||
|
||||
# Parse RSS XML
|
||||
items = []
|
||||
try:
|
||||
root = ET.fromstring(content)
|
||||
channel = root.find("channel")
|
||||
if channel is None:
|
||||
return []
|
||||
|
||||
for item_elem in channel.findall("item"):
|
||||
item: dict[str, Any] = {}
|
||||
|
||||
title = item_elem.find("title")
|
||||
item["title"] = title.text if title is not None and title.text else ""
|
||||
|
||||
link = item_elem.find("link")
|
||||
item["link"] = link.text if link is not None and link.text else ""
|
||||
|
||||
description = item_elem.find("description")
|
||||
item["description"] = description.text if description is not None and description.text else ""
|
||||
|
||||
pub_date = item_elem.find("pubDate")
|
||||
item["pubDate"] = pub_date.text if pub_date is not None and pub_date.text else ""
|
||||
|
||||
guid = item_elem.find("guid")
|
||||
item["guid"] = guid.text if guid is not None and guid.text else ""
|
||||
|
||||
# Check for dc:creator
|
||||
creator = item_elem.find("{http://purl.org/dc/elements/1.1/}creator")
|
||||
item["creator"] = creator.text if creator is not None and creator.text else ""
|
||||
|
||||
items.append(item)
|
||||
|
||||
except ET.ParseError as e:
|
||||
logger.error("InciWeb RSS parse error", extra={"error": str(e)})
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
"InciWeb fetch completed",
|
||||
extra={"item_count": len(items)},
|
||||
)
|
||||
return items
|
||||
|
||||
async def poll(self) -> AsyncIterator[Event]:
|
||||
"""Poll InciWeb for narrative updates."""
|
||||
if not self._db:
|
||||
raise RuntimeError("Database not initialized")
|
||||
|
||||
# Fetch RSS feed
|
||||
try:
|
||||
items = await self._fetch_rss()
|
||||
except Exception as e:
|
||||
logger.error("InciWeb fetch failed", extra={"error": str(e)})
|
||||
raise
|
||||
|
||||
events_yielded = 0
|
||||
|
||||
for item in items:
|
||||
guid = item.get("guid", "")
|
||||
if not guid:
|
||||
continue
|
||||
|
||||
# Dedup: skip if already published
|
||||
if self.is_published(guid):
|
||||
self.bump_last_seen(guid)
|
||||
continue
|
||||
|
||||
description_html = item.get("description", "")
|
||||
|
||||
# Parse coordinates from description
|
||||
centroid = parse_coordinates_from_description(description_html)
|
||||
|
||||
# Post-filter: skip if point outside region bbox
|
||||
if self.region and centroid:
|
||||
lon, lat = centroid
|
||||
if not point_in_bbox(
|
||||
lon, lat,
|
||||
self.region.west, self.region.south,
|
||||
self.region.east, self.region.north,
|
||||
):
|
||||
continue
|
||||
|
||||
# Parse state from description
|
||||
state_code = parse_state_from_description(description_html)
|
||||
|
||||
# Build regions
|
||||
if state_code:
|
||||
regions = [f"US-{state_code}"]
|
||||
primary_region = f"US-{state_code}"
|
||||
else:
|
||||
regions = []
|
||||
primary_region = None
|
||||
|
||||
# Parse pubDate (RFC 822 format)
|
||||
pub_date_str = item.get("pubDate", "")
|
||||
try:
|
||||
event_time = parsedate_to_datetime(pub_date_str)
|
||||
# Ensure UTC
|
||||
if event_time.tzinfo is None:
|
||||
event_time = event_time.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
event_time = event_time.astimezone(timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
event_time = datetime.now(timezone.utc)
|
||||
|
||||
# Build geo
|
||||
geo = Geo(
|
||||
centroid=centroid,
|
||||
bbox=(centroid[0], centroid[1], centroid[0], centroid[1]) if centroid else None,
|
||||
regions=regions,
|
||||
primary_region=primary_region,
|
||||
)
|
||||
|
||||
# Strip HTML from description
|
||||
description_plain = strip_html(description_html)
|
||||
|
||||
# Build event
|
||||
event = Event(
|
||||
id=guid,
|
||||
adapter=self.name,
|
||||
category="fire.narrative.inciweb",
|
||||
time=event_time,
|
||||
severity=0, # Narrative; not authoritative
|
||||
geo=geo,
|
||||
data={
|
||||
"title": item.get("title", ""),
|
||||
"description": description_plain,
|
||||
"description_html": description_html,
|
||||
"url": item.get("link", ""),
|
||||
"guid": guid,
|
||||
"raw": item,
|
||||
},
|
||||
)
|
||||
|
||||
yield event
|
||||
self.mark_published(guid)
|
||||
events_yielded += 1
|
||||
|
||||
# Periodic cleanup of old entries
|
||||
self.sweep_old_ids()
|
||||
|
||||
logger.info(
|
||||
"InciWeb poll completed",
|
||||
extra={"events_yielded": events_yielded},
|
||||
)
|
||||
599
tests/test_inciweb.py
Normal file
599
tests/test_inciweb.py
Normal file
|
|
@ -0,0 +1,599 @@
|
|||
"""Tests for InciWeb adapter."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from central.config_models import AdapterConfig
|
||||
from central.models import Event, Geo
|
||||
|
||||
|
||||
# Real RSS snippet from InciWeb (frozen fixture)
|
||||
SAMPLE_RSS_CONTENT = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0" xml:base="http://inciweb.wildfire.gov/">
|
||||
<channel>
|
||||
<title>InciWeb</title>
|
||||
<link>http://inciweb.wildfire.gov/</link>
|
||||
<description>Inciweb Fire Incidents</description>
|
||||
<language>en</language>
|
||||
<item>
|
||||
<title>MNMNS Stewart Trail</title>
|
||||
<link>http://inciweb.wildfire.gov/incident-information/mnmns-stewart-trail</link>
|
||||
<description>Last updated: 2026-05-18
|
||||
|
||||
---
|
||||
|
||||
The type of incident is Wildfire and involves the following unit(s) Minnesota Department of Natural Resources.
|
||||
|
||||
---
|
||||
|
||||
State: Minnesota
|
||||
|
||||
---
|
||||
|
||||
Coordinates:
|
||||
|
||||
Latitude: 47° 3 17 Longitude: 91° 38 6
|
||||
|
||||
---
|
||||
|
||||
NOTE: All fire perimeters and points are approximations.
|
||||
|
||||
---
|
||||
|
||||
Incident Overview: The Stewart Trail Fire was detected during the afternoon hours on Friday, May 15, 2026.&nbsp;A temporary flight restriction (TFR) is in place.</description>
|
||||
<pubDate>Fri, 15 May 2026 08:48:11 EDT</pubDate>
|
||||
<dc:creator>llangeberg</dc:creator>
|
||||
<guid isPermaLink="false">327828</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>CACNP Santa Rosa Island Fire</title>
|
||||
<link>http://inciweb.wildfire.gov/incident-information/cacnp-santa-rosa-island-fire</link>
|
||||
<description>Last updated: 2026-05-18
|
||||
|
||||
---
|
||||
|
||||
The type of incident is Wildfire and involves the following unit(s) Channel Islands National Park.
|
||||
|
||||
---
|
||||
|
||||
State: California
|
||||
|
||||
---
|
||||
|
||||
Coordinates:
|
||||
|
||||
Latitude: 33° 55 2 Longitude: 120° 5 10
|
||||
|
||||
---
|
||||
|
||||
NOTE: All fire perimeters and points are approximations.
|
||||
|
||||
---
|
||||
|
||||
Incident Overview: On Friday, May 15, 2026, an aircraft flying over Santa Rosa Island in Channel Islands National Park reported a wildfire.<br><p>This is a <strong>full-suppression</strong> human-caused wildfire and is under investigation.</p>&nbsp;</description>
|
||||
<pubDate>Sat, 16 May 2026 12:09:07 EDT</pubDate>
|
||||
<dc:creator>mtheune</dc:creator>
|
||||
<guid isPermaLink="false">327838</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Some Fire Without Coordinates</title>
|
||||
<link>http://inciweb.wildfire.gov/incident-information/no-coords-fire</link>
|
||||
<description>Last updated: 2026-05-18
|
||||
|
||||
---
|
||||
|
||||
The type of incident is Wildfire.
|
||||
|
||||
---
|
||||
|
||||
State: Unknown State
|
||||
|
||||
---
|
||||
|
||||
Incident Overview: This is a test incident without coordinates.</description>
|
||||
<pubDate>Mon, 18 May 2026 09:00:00 EDT</pubDate>
|
||||
<dc:creator>test</dc:creator>
|
||||
<guid isPermaLink="false">999999</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Florida Fire Outside Bbox</title>
|
||||
<link>http://inciweb.wildfire.gov/incident-information/florida-fire</link>
|
||||
<description>Last updated: 2026-05-18
|
||||
|
||||
---
|
||||
|
||||
State: Florida
|
||||
|
||||
---
|
||||
|
||||
Coordinates:
|
||||
|
||||
Latitude: 26° 0 0 Longitude: 80° 0 0
|
||||
|
||||
---
|
||||
|
||||
Incident Overview: This fire is in Florida, outside the CONUS west bbox.</description>
|
||||
<pubDate>Mon, 18 May 2026 10:00:00 EDT</pubDate>
|
||||
<dc:creator>test</dc:creator>
|
||||
<guid isPermaLink="false">888888</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>"""
|
||||
|
||||
|
||||
class TestInciWebHelpers:
|
||||
"""Tests for InciWeb helper functions."""
|
||||
|
||||
def test_parse_coordinates_from_description(self):
|
||||
"""Parse coordinates from description text."""
|
||||
from central.adapters.inciweb import parse_coordinates_from_description
|
||||
|
||||
description = """Coordinates:
|
||||
|
||||
Latitude: 47° 3 17 Longitude: 91° 38 6"""
|
||||
|
||||
result = parse_coordinates_from_description(description)
|
||||
assert result is not None
|
||||
lon, lat = result
|
||||
# 47° 3' 17" = 47.054722...
|
||||
assert 47.0 < lat < 47.1
|
||||
# 91° 38' 6" = -91.635 (west longitude)
|
||||
assert -92.0 < lon < -91.0
|
||||
|
||||
def test_parse_coordinates_no_match(self):
|
||||
"""No coordinates in description returns None."""
|
||||
from central.adapters.inciweb import parse_coordinates_from_description
|
||||
|
||||
result = parse_coordinates_from_description("No coordinates here")
|
||||
assert result is None
|
||||
|
||||
def test_parse_state_from_description(self):
|
||||
"""Parse state name and return 2-letter code."""
|
||||
from central.adapters.inciweb import parse_state_from_description
|
||||
|
||||
description = """---
|
||||
|
||||
State: Minnesota
|
||||
|
||||
---"""
|
||||
assert parse_state_from_description(description) == "MN"
|
||||
|
||||
def test_parse_state_from_description_new_mexico(self):
|
||||
"""Parse multi-word state name."""
|
||||
from central.adapters.inciweb import parse_state_from_description
|
||||
|
||||
description = """State: New Mexico
|
||||
|
||||
---"""
|
||||
assert parse_state_from_description(description) == "NM"
|
||||
|
||||
def test_parse_state_from_description_no_match(self):
|
||||
"""Unknown state name returns None."""
|
||||
from central.adapters.inciweb import parse_state_from_description
|
||||
|
||||
description = """State: Unknown State
|
||||
|
||||
---"""
|
||||
assert parse_state_from_description(description) is None
|
||||
|
||||
def test_strip_html(self):
|
||||
"""HTML tags are stripped, entities decoded."""
|
||||
from central.adapters.inciweb import strip_html
|
||||
|
||||
html = "This is &nbsp;a <strong>test</strong> with <br>line breaks."
|
||||
result = strip_html(html)
|
||||
assert "<" not in result
|
||||
assert ">" not in result
|
||||
assert " " not in result
|
||||
assert "&" not in result
|
||||
assert "test" in result
|
||||
|
||||
|
||||
class TestInciWebAdapter:
|
||||
"""Tests for InciWeb adapter."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config(self) -> AdapterConfig:
|
||||
return AdapterConfig(
|
||||
name="inciweb",
|
||||
enabled=True,
|
||||
cadence_s=600,
|
||||
settings={
|
||||
"region": {"north": 49.0, "south": 31.0, "east": -102.0, "west": -124.0}
|
||||
},
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config_no_region(self) -> AdapterConfig:
|
||||
return AdapterConfig(
|
||||
name="inciweb",
|
||||
enabled=True,
|
||||
cadence_s=600,
|
||||
settings={},
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config_store(self) -> MagicMock:
|
||||
return MagicMock()
|
||||
|
||||
@pytest.fixture
|
||||
def cursor_db_path(self, tmp_path: Path) -> Path:
|
||||
return tmp_path / "cursors.db"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalization_with_georss_point(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""Items with coordinates are correctly normalized."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.text = AsyncMock(return_value=SAMPLE_RSS_CONTENT)
|
||||
|
||||
with patch.object(adapter._session, "get", return_value=AsyncMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock())):
|
||||
events = [e async for e in adapter.poll()]
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
# Bbox is west=-124, east=-102 (CONUS west)
|
||||
# Minnesota at -91 longitude is OUTSIDE bbox (east of -102)
|
||||
# California at -120 longitude is INSIDE bbox
|
||||
# Florida at -80 longitude is OUTSIDE bbox
|
||||
# Unknown state without coords passes through
|
||||
assert len(events) == 2
|
||||
|
||||
# Check California event
|
||||
ca_event = next(e for e in events if e.data["guid"] == "327838")
|
||||
assert ca_event.id == "327838"
|
||||
assert ca_event.adapter == "inciweb"
|
||||
assert ca_event.category == "fire.narrative.inciweb"
|
||||
assert ca_event.severity == 0
|
||||
assert ca_event.geo.primary_region == "US-CA"
|
||||
assert ca_event.geo.centroid is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalization_without_georss_point(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""Items without coordinates have centroid=None."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.text = AsyncMock(return_value=SAMPLE_RSS_CONTENT)
|
||||
|
||||
with patch.object(adapter._session, "get", return_value=AsyncMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock())):
|
||||
events = [e async for e in adapter.poll()]
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
# All 4 items pass (no region filter)
|
||||
assert len(events) == 4
|
||||
|
||||
# Check item without coords
|
||||
no_coords_event = next(e for e in events if e.data["guid"] == "999999")
|
||||
assert no_coords_event.geo.centroid is None
|
||||
assert no_coords_event.geo.regions == []
|
||||
assert no_coords_event.geo.primary_region is None
|
||||
|
||||
def test_state_parse_from_title(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""State parsing from description produces correct region."""
|
||||
from central.adapters.inciweb import parse_state_from_description
|
||||
|
||||
# Test California
|
||||
assert parse_state_from_description("State: California\n") == "CA"
|
||||
# Test Minnesota
|
||||
assert parse_state_from_description("State: Minnesota\n---") == "MN"
|
||||
# Test multi-word
|
||||
assert parse_state_from_description("State: New York\n") == "NY"
|
||||
# Test unknown
|
||||
assert parse_state_from_description("State: Narnia\n") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_html_stripping(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""HTML is stripped from description, raw preserved in description_html."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.text = AsyncMock(return_value=SAMPLE_RSS_CONTENT)
|
||||
|
||||
with patch.object(adapter._session, "get", return_value=AsyncMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock())):
|
||||
events = [e async for e in adapter.poll()]
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
# California item has HTML tags in description
|
||||
ca_event = next(e for e in events if e.data["guid"] == "327838")
|
||||
|
||||
# Plain text should not have HTML tags
|
||||
assert "<br>" not in ca_event.data["description"]
|
||||
assert "<p>" not in ca_event.data["description"]
|
||||
assert "<strong>" not in ca_event.data["description"]
|
||||
assert " " not in ca_event.data["description"]
|
||||
|
||||
# Raw HTML should be preserved
|
||||
assert "<br>" in ca_event.data["description_html"] or "<br>" in ca_event.data["description_html"]
|
||||
|
||||
def test_subject_for_with_state(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""subject_for returns correct subject with state."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config, mock_config_store, cursor_db_path)
|
||||
|
||||
event = Event(
|
||||
id="test-id",
|
||||
adapter="inciweb",
|
||||
category="fire.narrative.inciweb",
|
||||
time=datetime.now(timezone.utc),
|
||||
severity=0,
|
||||
geo=Geo(primary_region="US-CA"),
|
||||
data={"title": "Test Fire", "description": "Test"},
|
||||
)
|
||||
|
||||
subject = adapter.subject_for(event)
|
||||
assert subject == "central.fire.narrative.inciweb.ca"
|
||||
|
||||
def test_subject_for_without_state(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""subject_for returns unknown when no state."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config, mock_config_store, cursor_db_path)
|
||||
|
||||
event = Event(
|
||||
id="test-id",
|
||||
adapter="inciweb",
|
||||
category="fire.narrative.inciweb",
|
||||
time=datetime.now(timezone.utc),
|
||||
severity=0,
|
||||
geo=Geo(),
|
||||
data={"title": "Test Fire", "description": "Test"},
|
||||
)
|
||||
|
||||
subject = adapter.subject_for(event)
|
||||
assert subject == "central.fire.narrative.inciweb.unknown"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dedup_same_guid(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""is_published/mark_published provides dedup functionality."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
# Initially not published
|
||||
assert adapter.is_published("327828") is False
|
||||
|
||||
# Mark as published
|
||||
adapter.mark_published("327828")
|
||||
|
||||
# Now it should be published
|
||||
assert adapter.is_published("327828") is True
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bbox_filters_point_outside(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""Items with coords outside bbox are filtered; items without coords pass."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.text = AsyncMock(return_value=SAMPLE_RSS_CONTENT)
|
||||
|
||||
with patch.object(adapter._session, "get", return_value=AsyncMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock())):
|
||||
events = [e async for e in adapter.poll()]
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
# Florida (-80 longitude) should be filtered out
|
||||
guids = {e.data["guid"] for e in events}
|
||||
assert "888888" not in guids # Florida, outside bbox
|
||||
|
||||
# Item without coords should pass through
|
||||
assert "999999" in guids
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_apply_config_region_change(
|
||||
self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""apply_config updates region."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config, mock_config_store, cursor_db_path)
|
||||
|
||||
assert adapter.region is not None
|
||||
assert adapter.region.north == 49.0
|
||||
|
||||
new_config = AdapterConfig(
|
||||
name="inciweb",
|
||||
enabled=True,
|
||||
cadence_s=600,
|
||||
settings={
|
||||
"region": {"north": 50.0, "south": 35.0, "east": -100.0, "west": -120.0}
|
||||
},
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
await adapter.apply_config(new_config)
|
||||
|
||||
assert adapter.region.north == 50.0
|
||||
assert adapter.region.south == 35.0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dedup_in_poll_loop(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""Dedup integration: second poll with same items yields zero events."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
# Single-item RSS for clarity
|
||||
single_item_rss = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<title>InciWeb</title>
|
||||
<item>
|
||||
<title>Test Fire</title>
|
||||
<link>http://inciweb.wildfire.gov/test</link>
|
||||
<description>State: California</description>
|
||||
<pubDate>Mon, 18 May 2026 09:00:00 EDT</pubDate>
|
||||
<guid isPermaLink="false">DEDUP-TEST-001</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>"""
|
||||
|
||||
def make_mock_response():
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.text = AsyncMock(return_value=single_item_rss)
|
||||
mock_response.headers = {"Last-Modified": None, "ETag": None}
|
||||
return mock_response
|
||||
|
||||
# First poll: should yield 1 event
|
||||
with patch.object(
|
||||
adapter._session, "get",
|
||||
return_value=AsyncMock(
|
||||
__aenter__=AsyncMock(return_value=make_mock_response()),
|
||||
__aexit__=AsyncMock()
|
||||
)
|
||||
):
|
||||
events_first = [e async for e in adapter.poll()]
|
||||
|
||||
assert len(events_first) == 1
|
||||
assert events_first[0].data["guid"] == "DEDUP-TEST-001"
|
||||
|
||||
# Verify mark_published was called
|
||||
assert adapter.is_published("DEDUP-TEST-001") is True
|
||||
|
||||
# Second poll: same item should be skipped (dedup)
|
||||
with patch.object(
|
||||
adapter._session, "get",
|
||||
return_value=AsyncMock(
|
||||
__aenter__=AsyncMock(return_value=make_mock_response()),
|
||||
__aexit__=AsyncMock()
|
||||
)
|
||||
):
|
||||
events_second = [e async for e in adapter.poll()]
|
||||
|
||||
assert len(events_second) == 0 # Dedup prevents re-yield
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conditional_304_yields_zero(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""HTTP 304 Not Modified returns empty list and yields zero events."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
# Mock 304 response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 304
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch.object(
|
||||
adapter._session, "get",
|
||||
return_value=AsyncMock(
|
||||
__aenter__=AsyncMock(return_value=mock_response),
|
||||
__aexit__=AsyncMock()
|
||||
)
|
||||
):
|
||||
events = [e async for e in adapter.poll()]
|
||||
|
||||
assert len(events) == 0
|
||||
|
||||
await adapter.shutdown()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conditional_headers_sent_after_first_poll(
|
||||
self, mock_config_no_region: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path
|
||||
):
|
||||
"""Conditional fetch headers sent on second poll after first captures them."""
|
||||
from central.adapters.inciweb import InciWebAdapter
|
||||
|
||||
adapter = InciWebAdapter(mock_config_no_region, mock_config_store, cursor_db_path)
|
||||
await adapter.startup()
|
||||
|
||||
# First response with Last-Modified and ETag
|
||||
first_response = AsyncMock()
|
||||
first_response.status = 200
|
||||
first_response.raise_for_status = MagicMock()
|
||||
first_response.text = AsyncMock(return_value="""<?xml version="1.0"?>
|
||||
<rss version="2.0"><channel><title>Test</title></channel></rss>""")
|
||||
first_response.headers = {
|
||||
"Last-Modified": "Tue, 19 May 2026 03:00:00 GMT",
|
||||
"ETag": "\"abc123\"",
|
||||
}
|
||||
|
||||
# Track headers sent on second request
|
||||
captured_headers = {}
|
||||
|
||||
def capture_get(*args, **kwargs):
|
||||
captured_headers.update(kwargs.get("headers", {}))
|
||||
second_response = AsyncMock()
|
||||
second_response.status = 304
|
||||
second_response.raise_for_status = MagicMock()
|
||||
return AsyncMock(
|
||||
__aenter__=AsyncMock(return_value=second_response),
|
||||
__aexit__=AsyncMock()
|
||||
)
|
||||
|
||||
# First poll
|
||||
with patch.object(
|
||||
adapter._session, "get",
|
||||
return_value=AsyncMock(
|
||||
__aenter__=AsyncMock(return_value=first_response),
|
||||
__aexit__=AsyncMock()
|
||||
)
|
||||
):
|
||||
[e async for e in adapter.poll()]
|
||||
|
||||
# Verify adapter captured the headers
|
||||
assert adapter._last_modified == "Tue, 19 May 2026 03:00:00 GMT"
|
||||
assert adapter._etag == "\"abc123\""
|
||||
|
||||
# Second poll with header capture
|
||||
with patch.object(adapter._session, "get", side_effect=capture_get):
|
||||
[e async for e in adapter.poll()]
|
||||
|
||||
# Verify conditional headers were sent
|
||||
assert captured_headers.get("If-Modified-Since") == "Tue, 19 May 2026 03:00:00 GMT"
|
||||
assert captured_headers.get("If-None-Match") == "\"abc123\""
|
||||
|
||||
await adapter.shutdown()
|
||||
Loading…
Add table
Add a link
Reference in a new issue