From b87696bf6738bb0a475e170957f973ea990fc9ab Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Thu, 4 Jun 2026 06:00:10 +0000 Subject: [PATCH] fix(weather): v0.5.7-weather -- NWS HTML strip + ALERT_CATEGORIES audit (NATS pattern already valid) First family of the v0.5.7 NATS-and-categories campaign (Matt review of Central v0.10.0 meshai_integration_guide.md). Weather lands first because the NWS NATS pattern is already legal; the other five families need invalid mid-subject > rewrites that will ship per-family. FIX 1 -- NWS NATS pattern validated. _subjects_for("nws", "us.id") -> ["central.wx.alert.us.id.>"]. The wildcard token > sits at the tail only (token index -1), so the subject is a legal NATS multi-level wildcard. No code change. Live introspection confirmed in-container. FIX 2 -- NWS HTML strip in mesh composer. Per Central guide Surprise 3, data["description"] and data["instruction"] arrive as raw HTML (

,
, ,  , —, ...). Until now the composer fed event.title / event.summary straight to LoRa, so any future title/summary populated from those fields would have leaked literal markup onto the wire. Added strip_html_tags(text) -> str in meshai/notifications/renderers/composer.py. Block-level tags (br, p, div, li, tr, h1-h6) become a single space so adjacent paragraphs do not fuse; all other tags are removed; HTML entities are decoded via html.unescape; whitespace is collapsed. Applied in _primary_identifier (title and summary paths) and _region_segment BEFORE byte-budget truncation, so the 150 B cap counts real glyphs, not markup. Universal (not NWS-gated) since strip is a no-op on plain text -- protects against future adapters that surface raw HTML too. FIX 3 -- ALERT_CATEGORIES weather audit. Cross-referenced ALERT_CATEGORIES{toggle="weather"} against meshai/env/nws.py:_derive_category() emission set: nws.py emits: weather_warning, weather_watch, weather_advisory, weather_statement registry weather: weather_warning, weather_watch, weather_advisory, weather_statement Parity. No additions, no removals. The v0.5.2 stream_* migration to the seismic family (USGS hydro under the GUI Geohazards tab) is already reflected; weather is clean at 4 entries. Added a comment block above the weather section pointing at test_alert_categories_weather_complete which now enforces this parity going forward -- if a new branch is added to _derive_category(), the test fails and forces a matching registry entry. Tests ----- PYTHONPATH=. pytest -q: 345 passed (was 328; +17 new in tests/test_weather_v057.py). - strip_html_tags: simple tags, br/paragraph -> space, entity decode (&   —), nested/attrs, plain-text no-op, empty input, whitespace collapse. - compose_mesh_message integration: HTML in title scrubbed; HTML in summary fallback scrubbed; 150 B budget still holds. - Weather parity: reflection-based scan of NWSAlertsAdapter._derive_category() vs registry; both must match. - Required-fields check on the four weather entries. Safe-mode preserved (master off, all family toggles off, all adapters native, central disabled). No live toggle flipped. Not tagging yet -- v0.5.7 tag waits until all families ship. Co-Authored-By: Claude Opus 4.7 (1M context) --- meshai/notifications/categories.py | 5 + meshai/notifications/renderers/composer.py | 44 +++++- tests/test_weather_v057.py | 172 +++++++++++++++++++++ 3 files changed, 217 insertions(+), 4 deletions(-) create mode 100644 tests/test_weather_v057.py diff --git a/meshai/notifications/categories.py b/meshai/notifications/categories.py index 43b428a..7f55b0a 100644 --- a/meshai/notifications/categories.py +++ b/meshai/notifications/categories.py @@ -192,6 +192,11 @@ ALERT_CATEGORIES = { }, # Environmental - Weather + # v0.5.7-weather audit: nws.py._derive_category() emits exactly these four + # category IDs (suffix dispatch on the NWS event_type: warning/watch/ + # advisory/{anything else -> statement}). The set is in lockstep — + # test_alert_categories_weather_complete enforces parity if nws.py changes. + # If a new branch is added there, add the matching entry here too. "weather_warning": { "name": "Severe Weather Warning", "description": "NWS Warning affecting your mesh area — highest urgency weather alert", diff --git a/meshai/notifications/renderers/composer.py b/meshai/notifications/renderers/composer.py index e384e22..099b074 100644 --- a/meshai/notifications/renderers/composer.py +++ b/meshai/notifications/renderers/composer.py @@ -20,6 +20,8 @@ exceed the budget, the primary identifier is shrunk by codepoints and suffixed with `…` so the byte budget always holds. """ +import re +from html import unescape from typing import Optional from meshai.notifications.events import Event @@ -129,6 +131,34 @@ def _byte_len(s: str) -> int: return len(s.encode("utf-8")) +# v0.5.7-weather: NWS data.description / data.instruction arrive as raw HTML +# (Central guide §"Surprise 3"). Adapters that reuse those fields for title / +# summary / region currently leak literal

/
/

tags to LoRa. Strip +# tags + decode entities BEFORE byte-budget truncation so the 150 B cap counts +# real glyphs, not markup. Applied universally — safe (no-op) on plain text. +_HTML_TAG_RE = re.compile(r"<[^>]+>") +#
and block-closers become spaces so adjacent paragraphs don't fuse. +_HTML_BREAK_RE = re.compile(r"]*>", re.IGNORECASE) + + +def strip_html_tags(text: str) -> str: + """Remove HTML tags and decode entities, collapsing whitespace. + + Block-level tags (
,

, etc.) become a single space so sentences + from adjacent paragraphs don't fuse. All other tags are removed outright. + HTML entities (&,  , —, …) are decoded via html.unescape. + Result is whitespace-collapsed and stripped. + """ + if not text: + return "" + s = _HTML_BREAK_RE.sub(" ", text) + s = _HTML_TAG_RE.sub("", s) + s = unescape(s) + # Collapse runs of whitespace (incl. newlines from the original markup). + s = re.sub(r"\s+", " ", s).strip() + return s + + def _category_emoji(event: Event) -> str: e = _CATEGORY_EMOJI.get(event.category) if e: @@ -156,11 +186,14 @@ def _category_label(event: Event) -> str: def _primary_identifier(event: Event) -> str: - """Title > summary > registry friendly name > scrubbed category.""" - t = (event.title or "").strip() + """Title > summary > registry friendly name > scrubbed category. + + HTML is stripped first so the byte budget counts real glyphs. + """ + t = strip_html_tags((event.title or "").strip()) if t: return t - s = (event.summary or "").strip() + s = strip_html_tags((event.summary or "").strip()) if s: return s try: @@ -179,7 +212,10 @@ def _primary_identifier(event: Event) -> str: def _region_segment(event: Event) -> Optional[str]: region = event.region or (event.regions[0] if event.regions else None) - return str(region) if region else None + if region is None: + return None + cleaned = strip_html_tags(str(region)) + return cleaned or None def _safe(callable_): diff --git a/tests/test_weather_v057.py b/tests/test_weather_v057.py new file mode 100644 index 0000000..06557fe --- /dev/null +++ b/tests/test_weather_v057.py @@ -0,0 +1,172 @@ +"""v0.5.7-weather: NWS HTML strip + ALERT_CATEGORIES weather audit. + +Covers three things shipped in v0.5.7-weather: + +1. strip_html_tags() — NWS data.description / data.instruction arrive as raw + HTML (per Central guide §Surprise 3). Verify tags are stripped, entities + decoded, paragraph breaks become spaces, plain text is a no-op. +2. compose_mesh_message() integration — an Event whose title contains HTML + produces a clean LoRa string (no literal

/
). +3. Weather category parity — ALERT_CATEGORIES{toggle=weather} is exactly the + set that nws.py._derive_category() can emit. Fail loudly if either side + drifts so the weather family stays "every event meshai sees is selectable". +""" + +import inspect + +import pytest + +from meshai.notifications.categories import ALERT_CATEGORIES +from meshai.notifications.events import make_event +from meshai.notifications.renderers.composer import ( + compose_mesh_message, + strip_html_tags, +) + + +# ---------- strip_html_tags() ---------------------------------------------- + + +def test_strip_html_tags_removes_simple_tags(): + assert strip_html_tags("

Severe

") == "Severe" + + +def test_strip_html_tags_br_becomes_space(): + #
separates two sentences in NWS bodies; must not fuse. + assert strip_html_tags("hello
world") == "hello world" + + +def test_strip_html_tags_paragraph_break_becomes_space(): + assert strip_html_tags("

hello

world

") == "hello world" + + +def test_strip_html_tags_decodes_entities(): + assert strip_html_tags("Wind gusts 25 & 35 mph") == "Wind gusts 25 & 35 mph" + #   decodes to U+00A0 which the whitespace collapse normalizes to a + # regular space — tight ASCII whitespace is what we want on LoRa. + assert strip_html_tags("Twin Falls County") == "Twin Falls County" + assert strip_html_tags("12 — 35 mph") == "12 — 35 mph" + + +def test_strip_html_tags_nested_and_attrs(): + raw = '

Tornado WARNING

' + assert strip_html_tags(raw) == "Tornado WARNING" + + +def test_strip_html_tags_plain_text_noop(): + assert strip_html_tags("Red Flag Warning until 04:00Z") == "Red Flag Warning until 04:00Z" + + +def test_strip_html_tags_empty_inputs(): + assert strip_html_tags("") == "" + assert strip_html_tags(None) == "" # type: ignore[arg-type] + + +def test_strip_html_tags_collapses_whitespace(): + raw = "

line 1

\n

line\t2

" + assert strip_html_tags(raw) == "line 1 line 2" + + +# ---------- compose_mesh_message integration ------------------------------- + + +def test_compose_mesh_message_strips_html_in_title(): + event = make_event( + source="nws", + category="weather_warning", + severity="priority", + title="

Severe Thunderstorm Warning

", + summary="", + region="Twin Falls", + ) + line = compose_mesh_message(event) + # No literal markup escapes onto the wire. + assert "<" not in line + assert "

" not in line + assert "Severe Thunderstorm Warning" in line + + +def test_compose_mesh_message_strips_html_with_entities_and_br(): + event = make_event( + source="nws", + category="weather_advisory", + severity="routine", + title="Wind Advisory —
SW gusts 50 mph", + summary="", + region="Magic Valley", + ) + line = compose_mesh_message(event) + assert "
" not in line + assert " " not in line + assert "—" not in line + # Byte budget still holds. + assert len(line.encode("utf-8")) <= 150 + + +def test_compose_mesh_message_html_fallthrough_to_summary(): + # title empty -> summary path also strips HTML. + event = make_event( + source="nws", + category="weather_statement", + severity="routine", + title="", + summary="

Special Weather Statement

", + ) + line = compose_mesh_message(event) + assert "<" not in line + assert "Special Weather Statement" in line + + +# ---------- ALERT_CATEGORIES weather audit --------------------------------- + + +def _nws_emitted_categories() -> set[str]: + """Walk nws.py source for every literal returned by _derive_category(). + + Reflection-style audit: read the method body's source and collect the + quoted return values. Keeps the test honest if someone adds a 5th branch + without thinking about ALERT_CATEGORIES. + """ + from meshai.env.nws import NWSAlertsAdapter + src = inspect.getsource(NWSAlertsAdapter._derive_category) + import re + return set(re.findall(r'return\s+"([a-z_]+)"', src)) + + +def test_nws_emits_exactly_four_weather_categories(): + emitted = _nws_emitted_categories() + assert emitted == { + "weather_warning", + "weather_watch", + "weather_advisory", + "weather_statement", + }, f"nws.py emission set drifted: {emitted}" + + +def test_alert_categories_weather_complete(): + """Every weather category nws.py can emit must exist in ALERT_CATEGORIES + with toggle='weather'. Anything tagged toggle='weather' that nws.py + cannot emit is an orphan (no UI selectable event would ever surface it). + """ + registry_weather = { + cid for cid, info in ALERT_CATEGORIES.items() + if info.get("toggle") == "weather" + } + emitted = _nws_emitted_categories() + missing = emitted - registry_weather + orphans = registry_weather - emitted + assert not missing, f"nws.py emits categories missing from ALERT_CATEGORIES: {missing}" + assert not orphans, f"ALERT_CATEGORIES has orphan weather entries: {orphans}" + + +@pytest.mark.parametrize( + "cat", + ["weather_warning", "weather_watch", "weather_advisory", "weather_statement"], +) +def test_weather_categories_have_required_fields(cat): + info = ALERT_CATEGORIES[cat] + assert info["toggle"] == "weather" + assert info["name"] + assert info["description"] + assert info["default_severity"] in {"routine", "priority", "immediate"} + assert info["example_message"]