diff --git a/meshai/notifications/categories.py b/meshai/notifications/categories.py index 43b428a..7f55b0a 100644 --- a/meshai/notifications/categories.py +++ b/meshai/notifications/categories.py @@ -192,6 +192,11 @@ ALERT_CATEGORIES = { }, # Environmental - Weather + # v0.5.7-weather audit: nws.py._derive_category() emits exactly these four + # category IDs (suffix dispatch on the NWS event_type: warning/watch/ + # advisory/{anything else -> statement}). The set is in lockstep — + # test_alert_categories_weather_complete enforces parity if nws.py changes. + # If a new branch is added there, add the matching entry here too. "weather_warning": { "name": "Severe Weather Warning", "description": "NWS Warning affecting your mesh area — highest urgency weather alert", diff --git a/meshai/notifications/renderers/composer.py b/meshai/notifications/renderers/composer.py index e384e22..099b074 100644 --- a/meshai/notifications/renderers/composer.py +++ b/meshai/notifications/renderers/composer.py @@ -20,6 +20,8 @@ exceed the budget, the primary identifier is shrunk by codepoints and suffixed with `…` so the byte budget always holds. """ +import re +from html import unescape from typing import Optional from meshai.notifications.events import Event @@ -129,6 +131,34 @@ def _byte_len(s: str) -> int: return len(s.encode("utf-8")) +# v0.5.7-weather: NWS data.description / data.instruction arrive as raw HTML +# (Central guide §"Surprise 3"). Adapters that reuse those fields for title / +# summary / region currently leak literal
/
/
, etc.) become a single space so sentences + from adjacent paragraphs don't fuse. All other tags are removed outright. + HTML entities (&, , —, …) are decoded via html.unescape. + Result is whitespace-collapsed and stripped. + """ + if not text: + return "" + s = _HTML_BREAK_RE.sub(" ", text) + s = _HTML_TAG_RE.sub("", s) + s = unescape(s) + # Collapse runs of whitespace (incl. newlines from the original markup). + s = re.sub(r"\s+", " ", s).strip() + return s + + def _category_emoji(event: Event) -> str: e = _CATEGORY_EMOJI.get(event.category) if e: @@ -156,11 +186,14 @@ def _category_label(event: Event) -> str: def _primary_identifier(event: Event) -> str: - """Title > summary > registry friendly name > scrubbed category.""" - t = (event.title or "").strip() + """Title > summary > registry friendly name > scrubbed category. + + HTML is stripped first so the byte budget counts real glyphs. + """ + t = strip_html_tags((event.title or "").strip()) if t: return t - s = (event.summary or "").strip() + s = strip_html_tags((event.summary or "").strip()) if s: return s try: @@ -179,7 +212,10 @@ def _primary_identifier(event: Event) -> str: def _region_segment(event: Event) -> Optional[str]: region = event.region or (event.regions[0] if event.regions else None) - return str(region) if region else None + if region is None: + return None + cleaned = strip_html_tags(str(region)) + return cleaned or None def _safe(callable_): diff --git a/tests/test_weather_v057.py b/tests/test_weather_v057.py new file mode 100644 index 0000000..06557fe --- /dev/null +++ b/tests/test_weather_v057.py @@ -0,0 +1,172 @@ +"""v0.5.7-weather: NWS HTML strip + ALERT_CATEGORIES weather audit. + +Covers three things shipped in v0.5.7-weather: + +1. strip_html_tags() — NWS data.description / data.instruction arrive as raw + HTML (per Central guide §Surprise 3). Verify tags are stripped, entities + decoded, paragraph breaks become spaces, plain text is a no-op. +2. compose_mesh_message() integration — an Event whose title contains HTML + produces a clean LoRa string (no literal
/
).
+3. Weather category parity — ALERT_CATEGORIES{toggle=weather} is exactly the
+ set that nws.py._derive_category() can emit. Fail loudly if either side
+ drifts so the weather family stays "every event meshai sees is selectable".
+"""
+
+import inspect
+
+import pytest
+
+from meshai.notifications.categories import ALERT_CATEGORIES
+from meshai.notifications.events import make_event
+from meshai.notifications.renderers.composer import (
+ compose_mesh_message,
+ strip_html_tags,
+)
+
+
+# ---------- strip_html_tags() ----------------------------------------------
+
+
+def test_strip_html_tags_removes_simple_tags():
+ assert strip_html_tags("
Severe
") == "Severe" + + +def test_strip_html_tags_br_becomes_space(): + #hello
world
") == "hello world" + + +def test_strip_html_tags_decodes_entities(): + assert strip_html_tags("Wind gusts 25 & 35 mph") == "Wind gusts 25 & 35 mph" + # decodes to U+00A0 which the whitespace collapse normalizes to a + # regular space — tight ASCII whitespace is what we want on LoRa. + assert strip_html_tags("Twin Falls County") == "Twin Falls County" + assert strip_html_tags("12 — 35 mph") == "12 — 35 mph" + + +def test_strip_html_tags_nested_and_attrs(): + raw = 'Tornado WARNING
line 1
\nline\t2
" + assert strip_html_tags(raw) == "line 1 line 2" + + +# ---------- compose_mesh_message integration ------------------------------- + + +def test_compose_mesh_message_strips_html_in_title(): + event = make_event( + source="nws", + category="weather_warning", + severity="priority", + title="Severe Thunderstorm Warning
", + summary="", + region="Twin Falls", + ) + line = compose_mesh_message(event) + # No literal markup escapes onto the wire. + assert "<" not in line + assert "" not in line + assert "Severe Thunderstorm Warning" in line + + +def test_compose_mesh_message_strips_html_with_entities_and_br(): + event = make_event( + source="nws", + category="weather_advisory", + severity="routine", + title="Wind Advisory —Special Weather Statement
", + ) + line = compose_mesh_message(event) + assert "<" not in line + assert "Special Weather Statement" in line + + +# ---------- ALERT_CATEGORIES weather audit --------------------------------- + + +def _nws_emitted_categories() -> set[str]: + """Walk nws.py source for every literal returned by _derive_category(). + + Reflection-style audit: read the method body's source and collect the + quoted return values. Keeps the test honest if someone adds a 5th branch + without thinking about ALERT_CATEGORIES. + """ + from meshai.env.nws import NWSAlertsAdapter + src = inspect.getsource(NWSAlertsAdapter._derive_category) + import re + return set(re.findall(r'return\s+"([a-z_]+)"', src)) + + +def test_nws_emits_exactly_four_weather_categories(): + emitted = _nws_emitted_categories() + assert emitted == { + "weather_warning", + "weather_watch", + "weather_advisory", + "weather_statement", + }, f"nws.py emission set drifted: {emitted}" + + +def test_alert_categories_weather_complete(): + """Every weather category nws.py can emit must exist in ALERT_CATEGORIES + with toggle='weather'. Anything tagged toggle='weather' that nws.py + cannot emit is an orphan (no UI selectable event would ever surface it). + """ + registry_weather = { + cid for cid, info in ALERT_CATEGORIES.items() + if info.get("toggle") == "weather" + } + emitted = _nws_emitted_categories() + missing = emitted - registry_weather + orphans = registry_weather - emitted + assert not missing, f"nws.py emits categories missing from ALERT_CATEGORIES: {missing}" + assert not orphans, f"ALERT_CATEGORIES has orphan weather entries: {orphans}" + + +@pytest.mark.parametrize( + "cat", + ["weather_warning", "weather_watch", "weather_advisory", "weather_statement"], +) +def test_weather_categories_have_required_fields(cat): + info = ALERT_CATEGORIES[cat] + assert info["toggle"] == "weather" + assert info["name"] + assert info["description"] + assert info["default_severity"] in {"routine", "priority", "immediate"} + assert info["example_message"]