feat(notifications): alert routing with channels, rules, and delivery

- Notification pipeline: categories -> rules -> channels
- Channels: mesh broadcast, mesh DM, email (SMTP), webhook (generic)
- Per-rule severity threshold and category filtering
- Quiet hours with emergency override
- LLM summarization for mesh delivery over 200 chars only
- !subscribe shows available categories, easy mesh subscription
- Dashboard notification rules API endpoints
- Extensible channel system for future transports (Winlink, JS8Call)
- config.yaml notification section with examples

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
K7ZVX 2026-05-13 03:51:37 +00:00
commit 3bf5e3dfbc
12 changed files with 1215 additions and 105 deletions

View file

@ -0,0 +1,6 @@
"""Notification system for MeshAI alerts."""
from .categories import ALERT_CATEGORIES, get_category, list_categories
from .router import NotificationRouter
__all__ = ["ALERT_CATEGORIES", "get_category", "list_categories", "NotificationRouter"]

View file

@ -0,0 +1,157 @@
"""Alert category registry.
Defines all alertable conditions with human-readable names and descriptions.
"""
ALERT_CATEGORIES = {
# Infrastructure alerts
"infra_offline": {
"name": "Infrastructure Offline",
"description": "An infrastructure node stopped responding",
"default_severity": "warning",
},
"critical_node_down": {
"name": "Critical Node Down",
"description": "A node marked as critical went offline",
"default_severity": "critical",
},
"infra_recovery": {
"name": "Infrastructure Recovery",
"description": "An infrastructure node came back online",
"default_severity": "info",
},
"new_router": {
"name": "New Router",
"description": "A new router appeared on the mesh",
"default_severity": "info",
},
# Power alerts
"battery_warning": {
"name": "Battery Warning",
"description": "Infrastructure node battery below warning threshold",
"default_severity": "warning",
},
"battery_critical": {
"name": "Battery Critical",
"description": "Infrastructure node battery below critical threshold",
"default_severity": "critical",
},
"battery_emergency": {
"name": "Battery Emergency",
"description": "Infrastructure node battery critically low",
"default_severity": "emergency",
},
"battery_trend": {
"name": "Battery Declining",
"description": "Battery showing declining trend over 7 days",
"default_severity": "warning",
},
"power_source_change": {
"name": "Power Source Change",
"description": "Node switched from USB to battery (possible outage)",
"default_severity": "warning",
},
"solar_not_charging": {
"name": "Solar Not Charging",
"description": "Solar panel not charging during daylight hours",
"default_severity": "warning",
},
# Utilization alerts
"sustained_high_util": {
"name": "High Utilization",
"description": "Channel utilization elevated for extended period",
"default_severity": "warning",
},
"packet_flood": {
"name": "Packet Flood",
"description": "Node sending excessive packets",
"default_severity": "warning",
},
# Coverage alerts
"infra_single_gateway": {
"name": "Single Gateway",
"description": "Infrastructure node dropped to single gateway coverage",
"default_severity": "warning",
},
"feeder_offline": {
"name": "Feeder Offline",
"description": "A feeder gateway stopped responding",
"default_severity": "warning",
},
"region_total_blackout": {
"name": "Region Blackout",
"description": "All infrastructure in a region is offline",
"default_severity": "emergency",
},
# Health score alerts
"mesh_score_low": {
"name": "Mesh Health Low",
"description": "Overall mesh health score below threshold",
"default_severity": "warning",
},
"region_score_low": {
"name": "Region Health Low",
"description": "A region's health score below threshold",
"default_severity": "warning",
},
# Environmental alerts
"weather_warning": {
"name": "Severe Weather",
"description": "NWS warning or advisory for mesh area",
"default_severity": "warning",
},
"hf_blackout": {
"name": "HF Radio Blackout",
"description": "R3+ solar event degrading HF propagation",
"default_severity": "warning",
},
"tropospheric_ducting": {
"name": "Tropospheric Ducting",
"description": "Atmospheric conditions extending VHF/UHF range",
"default_severity": "info",
},
"wildfire_proximity": {
"name": "Fire Near Mesh",
"description": "Wildfire detected within configured distance",
"default_severity": "warning",
},
"new_ignition": {
"name": "New Fire Ignition",
"description": "Satellite hotspot not matching any known fire",
"default_severity": "warning",
},
"flood_warning": {
"name": "Flood Warning",
"description": "Stream gauge exceeds flood threshold",
"default_severity": "warning",
},
"road_closure": {
"name": "Road Closure",
"description": "Full road closure on monitored corridor",
"default_severity": "warning",
},
}
def get_category(category_id: str) -> dict:
"""Get category info by ID, with fallback for unknown categories."""
if category_id in ALERT_CATEGORIES:
return ALERT_CATEGORIES[category_id]
return {
"name": category_id.replace("_", " ").title(),
"description": f"Alert type: {category_id}",
"default_severity": "info",
}
def list_categories() -> list[dict]:
"""List all categories with their IDs."""
return [
{"id": cat_id, **cat_info}
for cat_id, cat_info in ALERT_CATEGORIES.items()
]

View file

@ -0,0 +1,308 @@
"""Notification channel implementations."""
import asyncio
import logging
import smtplib
import ssl
import time
from abc import ABC, abstractmethod
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from typing import Optional, TYPE_CHECKING
import httpx
if TYPE_CHECKING:
from ..connector import MeshConnector
logger = logging.getLogger(__name__)
class NotificationChannel(ABC):
"""Base class for notification delivery channels."""
channel_type: str = "base"
@abstractmethod
async def deliver(self, alert: dict, rule: dict) -> bool:
"""Send alert. Returns True on success."""
raise NotImplementedError
@abstractmethod
async def test(self) -> tuple[bool, str]:
"""Send test message. Returns (success, message)."""
raise NotImplementedError
class MeshBroadcastChannel(NotificationChannel):
"""Post alert to mesh channel."""
channel_type = "mesh_broadcast"
def __init__(self, connector: "MeshConnector", channel_index: int = 0):
self._connector = connector
self._channel = channel_index
async def deliver(self, alert: dict, rule: dict) -> bool:
"""Send alert to mesh channel."""
if not self._connector:
logger.warning("No mesh connector available")
return False
try:
message = alert.get("message", "")
self._connector.send_message(
text=message,
destination=None,
channel=self._channel,
)
logger.info("Broadcast alert to channel %d", self._channel)
return True
except Exception as e:
logger.error("Failed to broadcast alert: %s", e)
return False
async def test(self) -> tuple[bool, str]:
"""Send test broadcast."""
try:
self._connector.send_message(
text="[TEST] MeshAI notification system test",
destination=None,
channel=self._channel,
)
return True, "Test message sent to channel %d" % self._channel
except Exception as e:
return False, "Failed to send test: %s" % e
class MeshDMChannel(NotificationChannel):
"""DM alert to specific node IDs."""
channel_type = "mesh_dm"
def __init__(self, connector: "MeshConnector", node_ids: list[str]):
self._connector = connector
self._node_ids = node_ids
async def deliver(self, alert: dict, rule: dict) -> bool:
"""Send alert via DM to configured nodes."""
if not self._connector:
return False
message = alert.get("message", "")
success = True
for node_id in self._node_ids:
try:
dest = int(node_id) if node_id.isdigit() else node_id
self._connector.send_message(text=message, destination=dest, channel=0)
except Exception as e:
logger.error("Failed to DM %s: %s", node_id, e)
success = False
return success
async def test(self) -> tuple[bool, str]:
"""Send test DM to all configured nodes."""
if not self._node_ids:
return False, "No node IDs configured"
try:
for node_id in self._node_ids:
dest = int(node_id) if node_id.isdigit() else node_id
self._connector.send_message(
text="[TEST] MeshAI notification test",
destination=dest,
channel=0,
)
return True, "Test DMs sent to %d nodes" % len(self._node_ids)
except Exception as e:
return False, "Failed to send test DMs: %s" % e
class EmailChannel(NotificationChannel):
"""Send alert via SMTP email."""
channel_type = "email"
def __init__(
self,
smtp_host: str,
smtp_port: int,
smtp_user: str,
smtp_password: str,
smtp_tls: bool,
from_address: str,
recipients: list[str],
):
self._host = smtp_host
self._port = smtp_port
self._user = smtp_user
self._password = smtp_password
self._tls = smtp_tls
self._from = from_address
self._recipients = recipients
async def deliver(self, alert: dict, rule: dict) -> bool:
"""Send alert via email."""
if not self._recipients:
return False
alert_type = alert.get("type", "alert")
severity = alert.get("severity", "info").upper()
message = alert.get("message", "")
subject = "[MeshAI %s] %s" % (severity, alert_type.replace("_", " ").title())
body = "MeshAI Alert\n\nType: %s\nSeverity: %s\nTime: %s\n\n%s\n\n---\nAutomated message from MeshAI." % (
alert_type, severity, time.strftime("%Y-%m-%d %H:%M:%S"), message
)
try:
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self._send_email, subject, body)
return True
except Exception as e:
logger.error("Failed to send email: %s", e)
return False
def _send_email(self, subject: str, body: str):
msg = MIMEMultipart()
msg["From"] = self._from
msg["To"] = ", ".join(self._recipients)
msg["Subject"] = subject
msg.attach(MIMEText(body, "plain"))
if self._tls:
context = ssl.create_default_context()
with smtplib.SMTP(self._host, self._port) as server:
server.starttls(context=context)
if self._user and self._password:
server.login(self._user, self._password)
server.sendmail(self._from, self._recipients, msg.as_string())
else:
with smtplib.SMTP(self._host, self._port) as server:
if self._user and self._password:
server.login(self._user, self._password)
server.sendmail(self._from, self._recipients, msg.as_string())
async def test(self) -> tuple[bool, str]:
try:
loop = asyncio.get_event_loop()
await loop.run_in_executor(
None,
self._send_email,
"[MeshAI TEST] Notification Test",
"Test message from MeshAI.",
)
return True, "Test email sent to %d recipients" % len(self._recipients)
except Exception as e:
return False, "Failed to send test email: %s" % e
class WebhookChannel(NotificationChannel):
"""POST alert JSON to a URL."""
channel_type = "webhook"
def __init__(self, url: str, headers: Optional[dict] = None):
self._url = url
self._headers = headers or {}
async def deliver(self, alert: dict, rule: dict) -> bool:
"""POST alert to webhook URL."""
payload = {
"type": alert.get("type"),
"severity": alert.get("severity", "info"),
"message": alert.get("message", ""),
"timestamp": time.time(),
"node_name": alert.get("node_name"),
"region": alert.get("region"),
}
# Discord/Slack format
if "discord.com" in self._url or "slack.com" in self._url:
severity = alert.get("severity", "info")
color = {
"emergency": 0xFF0000,
"critical": 0xFF4444,
"warning": 0xFFAA00,
"info": 0x0099FF,
}.get(severity, 0x888888)
payload = {
"embeds": [{
"title": "MeshAI: %s" % alert.get("type", "unknown"),
"description": alert.get("message", ""),
"color": color,
}]
}
# ntfy format
elif "ntfy" in self._url:
headers = {
**self._headers,
"Title": "MeshAI: %s" % alert.get("type", "alert"),
"Priority": "3",
}
try:
async with httpx.AsyncClient() as client:
resp = await client.post(
self._url,
content=alert.get("message", ""),
headers=headers,
timeout=10,
)
return resp.status_code < 400
except Exception as e:
logger.error("Webhook failed: %s", e)
return False
try:
async with httpx.AsyncClient() as client:
resp = await client.post(
self._url,
json=payload,
headers={"Content-Type": "application/json", **self._headers},
timeout=10,
)
return resp.status_code < 400
except Exception as e:
logger.error("Webhook failed: %s", e)
return False
async def test(self) -> tuple[bool, str]:
test_alert = {"type": "test", "severity": "info", "message": "MeshAI test message"}
success = await self.deliver(test_alert, {})
if success:
return True, "Test sent to %s" % self._url
return False, "Webhook failed"
def create_channel(config: dict, connector=None) -> NotificationChannel:
"""Create a channel instance from config."""
channel_type = config.get("type", "")
if channel_type == "mesh_broadcast":
return MeshBroadcastChannel(
connector=connector,
channel_index=config.get("channel_index", 0),
)
elif channel_type == "mesh_dm":
return MeshDMChannel(
connector=connector,
node_ids=config.get("node_ids", []),
)
elif channel_type == "email":
return EmailChannel(
smtp_host=config.get("smtp_host", ""),
smtp_port=config.get("smtp_port", 587),
smtp_user=config.get("smtp_user", ""),
smtp_password=config.get("smtp_password", ""),
smtp_tls=config.get("smtp_tls", True),
from_address=config.get("from_address", ""),
recipients=config.get("recipients", []),
)
elif channel_type == "webhook":
return WebhookChannel(
url=config.get("url", ""),
headers=config.get("headers", {}),
)
else:
raise ValueError("Unknown channel type: %s" % channel_type)

View file

@ -0,0 +1,266 @@
"""Notification router - matches alerts to rules and delivers via channels."""
import logging
import time
from datetime import datetime
from typing import Optional, TYPE_CHECKING
from .channels import create_channel, NotificationChannel
from .summarizer import MessageSummarizer
if TYPE_CHECKING:
from ..connector import MeshConnector
logger = logging.getLogger(__name__)
# Severity levels in order
SEVERITY_ORDER = ["info", "advisory", "watch", "warning", "critical", "emergency"]
class NotificationRouter:
"""Routes alerts through matching rules to notification channels."""
def __init__(
self,
config,
connector: Optional["MeshConnector"] = None,
llm_backend=None,
timezone: str = "America/Boise",
):
self._channels: dict[str, NotificationChannel] = {}
self._rules: list[dict] = []
self._quiet_start = getattr(config, "quiet_hours_start", "22:00")
self._quiet_end = getattr(config, "quiet_hours_end", "06:00")
self._timezone = timezone
self._dedup_window = getattr(config, "dedup_seconds", 600)
self._recent: dict[tuple, float] = {} # (category, event_key) -> last_sent_time
self._summarizer = MessageSummarizer(llm_backend) if llm_backend else None
self._connector = connector
# Create channel instances from config
channels_config = getattr(config, "channels", [])
for ch_config in channels_config:
if hasattr(ch_config, "__dict__"):
ch_dict = {k: v for k, v in ch_config.__dict__.items() if not k.startswith("_")}
else:
ch_dict = ch_config
if not ch_dict.get("enabled", True):
continue
channel_id = ch_dict.get("id", "")
if not channel_id:
continue
try:
channel = create_channel(ch_dict, connector)
self._channels[channel_id] = channel
logger.debug("Created notification channel: %s (%s)", channel_id, ch_dict.get("type"))
except Exception as e:
logger.warning("Failed to create channel %s: %s", channel_id, e)
# Load rules
rules_config = getattr(config, "rules", [])
for rule in rules_config:
if hasattr(rule, "__dict__"):
rule_dict = {k: v for k, v in rule.__dict__.items() if not k.startswith("_")}
else:
rule_dict = rule
self._rules.append(rule_dict)
logger.info(
"Notification router initialized: %d channels, %d rules",
len(self._channels),
len(self._rules),
)
async def process_alert(self, alert: dict) -> bool:
"""Route an alert through matching rules to channels.
Returns True if alert was delivered to at least one channel.
"""
category = alert.get("type", "")
severity = alert.get("severity", "info")
delivered = False
for rule in self._rules:
# Check category match
rule_categories = rule.get("categories", [])
if rule_categories and category not in rule_categories:
continue
# Check severity threshold
min_severity = rule.get("min_severity", "info")
if not self._severity_meets(severity, min_severity):
continue
# Check quiet hours (emergencies and criticals override)
if self._in_quiet_hours() and severity not in ("emergency", "critical"):
if not rule.get("override_quiet", False):
continue
# Check dedup
event_id = alert.get("event_id", alert.get("message", "")[:50])
dedup_key = (category, event_id)
now = time.time()
if dedup_key in self._recent:
if now - self._recent[dedup_key] < self._dedup_window:
logger.debug("Skipping duplicate alert: %s", category)
continue
self._recent[dedup_key] = now
# Deliver to each channel in the rule
channel_ids = rule.get("channel_ids", [])
for channel_id in channel_ids:
channel = self._channels.get(channel_id)
if not channel:
continue
try:
# Summarize for mesh channels if over 200 chars
delivery_alert = alert
message = alert.get("message", "")
if channel.channel_type in ("mesh_broadcast", "mesh_dm"):
if len(message) > 200:
if self._summarizer:
summary = await self._summarizer.summarize(message, max_chars=195)
delivery_alert = {**alert, "message": summary}
else:
delivery_alert = {**alert, "message": message[:195] + "..."}
success = await channel.deliver(delivery_alert, rule)
if success:
delivered = True
logger.info(
"Alert delivered via %s: %s",
channel_id,
category,
)
except Exception as e:
logger.warning("Channel %s delivery failed: %s", channel_id, e)
return delivered
def _severity_meets(self, actual: str, required: str) -> bool:
"""Check if actual severity meets or exceeds required severity."""
try:
actual_idx = SEVERITY_ORDER.index(actual.lower())
required_idx = SEVERITY_ORDER.index(required.lower())
return actual_idx >= required_idx
except ValueError:
return True # Unknown severity, allow through
def _in_quiet_hours(self) -> bool:
"""Check if current time is within quiet hours."""
try:
from zoneinfo import ZoneInfo
tz = ZoneInfo(self._timezone)
now = datetime.now(tz)
current_time = now.strftime("%H:%M")
start = self._quiet_start
end = self._quiet_end
if start <= end:
# Simple range (e.g., 01:00 to 06:00)
return start <= current_time <= end
else:
# Crosses midnight (e.g., 22:00 to 06:00)
return current_time >= start or current_time <= end
except Exception:
return False
def get_channels(self) -> list[dict]:
"""Get list of configured channels."""
return [
{"id": ch_id, "type": ch.channel_type}
for ch_id, ch in self._channels.items()
]
def get_rules(self) -> list[dict]:
"""Get list of configured rules."""
return self._rules
async def test_channel(self, channel_id: str) -> tuple[bool, str]:
"""Send a test alert to a specific channel."""
channel = self._channels.get(channel_id)
if not channel:
return False, "Channel not found: %s" % channel_id
return await channel.test()
def add_mesh_subscription(
self,
node_id: str,
categories: list[str],
rule_name: Optional[str] = None,
) -> str:
"""Add a mesh DM subscription for a node.
Creates a channel and rule for the node to receive alerts.
Returns the rule name.
"""
# Create channel ID
channel_id = "mesh_dm_%s" % node_id
# Create channel if it doesn't exist
if channel_id not in self._channels:
from .channels import MeshDMChannel
channel = MeshDMChannel(
connector=self._connector,
node_ids=[node_id],
)
self._channels[channel_id] = channel
# Create rule
if not rule_name:
rule_name = "sub_%s" % node_id
# Check if rule already exists
for rule in self._rules:
if rule.get("name") == rule_name:
# Update existing rule
rule["categories"] = categories if categories else []
rule["channel_ids"] = [channel_id]
return rule_name
# Add new rule
self._rules.append({
"name": rule_name,
"categories": categories if categories else [], # Empty = all
"min_severity": "warning",
"channel_ids": [channel_id],
"override_quiet": False,
})
return rule_name
def remove_mesh_subscription(self, node_id: str) -> bool:
"""Remove a mesh subscription for a node."""
channel_id = "mesh_dm_%s" % node_id
rule_name = "sub_%s" % node_id
# Remove channel
if channel_id in self._channels:
del self._channels[channel_id]
# Remove rule
self._rules = [r for r in self._rules if r.get("name") != rule_name]
return True
def get_node_subscriptions(self, node_id: str) -> list[str]:
"""Get categories a node is subscribed to."""
rule_name = "sub_%s" % node_id
for rule in self._rules:
if rule.get("name") == rule_name:
categories = rule.get("categories", [])
return categories if categories else ["all"]
return []
def cleanup_recent(self, max_age: int = 3600):
"""Clean up old entries from recent alerts cache."""
now = time.time()
self._recent = {
k: v for k, v in self._recent.items()
if now - v < max_age
}

View file

@ -0,0 +1,64 @@
"""Message summarizer for mesh delivery."""
import logging
from typing import Optional, TYPE_CHECKING
if TYPE_CHECKING:
from ..backends import LLMBackend
logger = logging.getLogger(__name__)
class MessageSummarizer:
"""Summarizes long messages for mesh delivery.
Only used when:
- Delivering to mesh channels (broadcast or DM)
- Message exceeds max_chars (default 200)
- LLM backend is available
Email and webhook channels receive full messages.
"""
def __init__(self, llm_backend: Optional["LLMBackend"] = None):
self._llm = llm_backend
async def summarize(self, message: str, max_chars: int = 195) -> str:
"""Summarize a message to fit within max_chars.
Args:
message: Original message text
max_chars: Maximum characters for summary
Returns:
Summarized message, or truncated original if LLM unavailable
"""
if len(message) <= max_chars:
return message
if not self._llm:
return message[:max_chars - 3] + "..."
prompt = (
"Summarize this alert in under %d characters. "
"Keep severity, location, and key facts. No preamble, just the summary:\n\n%s"
% (max_chars, message)
)
try:
# Use the LLM to generate a summary
response = await self._llm.generate(
prompt,
system_prompt="You are a concise alert summarizer. Output only the summary, no explanation.",
max_tokens=100,
)
summary = response.strip()
# Ensure it fits
if len(summary) <= max_chars:
return summary
return summary[:max_chars - 3] + "..."
except Exception as e:
logger.debug("LLM summarization failed: %s", e)
return message[:max_chars - 3] + "..."