fix(health): use real telemetry, fix hardcoded thresholds

- Utilization pillar reads firmware channel_utilization (max of infra
  nodes) instead of estimating from packet counts × 200ms
- is_online uses configured threshold, not hardcoded 24 hours
- Updated defaults: offline 2h (was 24h), battery warning 30% (was 20%)
- Utilization thresholds: 20/25/35/45% matching real Meshtastic behavior
- Behavior pillar threshold aligned with notification config (7200/day)
- has_solar marked as dead code pending Solar Quality Engine

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
K7ZVX 2026-05-13 23:06:31 +00:00
commit c6b4a64163
4 changed files with 1576 additions and 1567 deletions

View file

@ -265,6 +265,10 @@ class AlertEngine:
)) ))
state.fire(now) state.fire(now)
# NOTE: has_solar is never populated in current version.
# Solar Quality Engine (v0.3) will replace this with real solar
# monitoring based on location, weather, and inversion data.
# For now this check effectively never fires.
if self._rules.solar_not_charging and getattr(node, "has_solar", False) and 0 < bat <= 100: if self._rules.solar_not_charging and getattr(node, "has_solar", False) and 0 < bat <= 100:
try: try:
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo

View file

@ -282,9 +282,9 @@ class MeshIntelligenceConfig:
enabled: bool = False enabled: bool = False
regions: list[RegionAnchor] = field(default_factory=list) # Fixed region anchors regions: list[RegionAnchor] = field(default_factory=list) # Fixed region anchors
locality_radius_miles: float = 8.0 # Radius for locality clustering within regions locality_radius_miles: float = 8.0 # Radius for locality clustering within regions
offline_threshold_hours: int = 24 # Hours before node considered offline offline_threshold_hours: int = 2 # Hours before node considered offline
packet_threshold: int = 500 # Non-text packets per 24h to flag packet_threshold: int = 500 # Non-text packets per 24h to flag
battery_warning_percent: int = 20 # Battery level for warnings battery_warning_percent: int = 30 # Battery level for warnings
# Alert settings # Alert settings
critical_nodes: list[str] = field(default_factory=list) # Short names of critical nodes (e.g., ["MHR", "HPR"]) critical_nodes: list[str] = field(default_factory=list) # Short names of critical nodes (e.g., ["MHR", "HPR"])

View file

@ -745,9 +745,11 @@ class MeshDataStore:
node.last_heard = ts or 0.0 node.last_heard = ts or 0.0
# Is online (computed from last_heard) # NOTE: is_online is set by MeshHealthEngine.compute() using the
now = time.time() # configured offline_threshold_hours. Don't set it here with a
node.is_online = (now - node.last_heard) < 86400 if node.last_heard else False # hardcoded value - let the health engine determine online status.
# The health engine runs on every refresh cycle and will set is_online
# based on: (now - last_heard) < (offline_threshold_hours * 3600)
# Hops, SNR, RSSI (MM) # Hops, SNR, RSSI (MM)
node.hops_away = raw.get("hopsAway") node.hops_away = raw.get("hopsAway")

View file

@ -26,9 +26,12 @@ INFRASTRUCTURE_ROLES = {"ROUTER", "ROUTER_LATE", "ROUTER_CLIENT"}
# Default thresholds # Default thresholds
DEFAULT_LOCALITY_RADIUS_MILES = 8.0 DEFAULT_LOCALITY_RADIUS_MILES = 8.0
DEFAULT_OFFLINE_THRESHOLD_HOURS = 24 DEFAULT_OFFLINE_THRESHOLD_HOURS = 2 # Hours before node considered offline
DEFAULT_PACKET_THRESHOLD = 500 # Non-text packets per 24h DEFAULT_PACKET_THRESHOLD = 7200 # Non-text packets per 24h (5/min avg)
DEFAULT_BATTERY_WARNING_PERCENT = 20 # NOTE: This is aligned with notification config's packet_flood threshold.
# 5 packets/min avg × 60 min × 24 hr = 7,200 packets/day.
# A node averaging 5+ non-text packets/min is misbehaving.
DEFAULT_BATTERY_WARNING_PERCENT = 30 # Battery level to warn (30% gives time to respond)
# Utilization thresholds (percentage) - based on real Meshtastic behavior # Utilization thresholds (percentage) - based on real Meshtastic behavior
# Firmware starts throttling GPS at 25%, severe degradation above 35% # Firmware starts throttling GPS at 25%, severe degradation above 35%