mirror of
https://github.com/zvx-echo6/meshai.git
synced 2026-05-21 23:24:44 +02:00
fix(health): use real telemetry, fix hardcoded thresholds
- Utilization pillar reads firmware channel_utilization (max of infra nodes) instead of estimating from packet counts × 200ms - is_online uses configured threshold, not hardcoded 24 hours - Updated defaults: offline 2h (was 24h), battery warning 30% (was 20%) - Utilization thresholds: 20/25/35/45% matching real Meshtastic behavior - Behavior pillar threshold aligned with notification config (7200/day) - has_solar marked as dead code pending Solar Quality Engine Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
57a19aeec6
commit
c6b4a64163
4 changed files with 1576 additions and 1567 deletions
|
|
@ -265,6 +265,10 @@ class AlertEngine:
|
||||||
))
|
))
|
||||||
state.fire(now)
|
state.fire(now)
|
||||||
|
|
||||||
|
# NOTE: has_solar is never populated in current version.
|
||||||
|
# Solar Quality Engine (v0.3) will replace this with real solar
|
||||||
|
# monitoring based on location, weather, and inversion data.
|
||||||
|
# For now this check effectively never fires.
|
||||||
if self._rules.solar_not_charging and getattr(node, "has_solar", False) and 0 < bat <= 100:
|
if self._rules.solar_not_charging and getattr(node, "has_solar", False) and 0 < bat <= 100:
|
||||||
try:
|
try:
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
|
|
|
||||||
|
|
@ -282,9 +282,9 @@ class MeshIntelligenceConfig:
|
||||||
enabled: bool = False
|
enabled: bool = False
|
||||||
regions: list[RegionAnchor] = field(default_factory=list) # Fixed region anchors
|
regions: list[RegionAnchor] = field(default_factory=list) # Fixed region anchors
|
||||||
locality_radius_miles: float = 8.0 # Radius for locality clustering within regions
|
locality_radius_miles: float = 8.0 # Radius for locality clustering within regions
|
||||||
offline_threshold_hours: int = 24 # Hours before node considered offline
|
offline_threshold_hours: int = 2 # Hours before node considered offline
|
||||||
packet_threshold: int = 500 # Non-text packets per 24h to flag
|
packet_threshold: int = 500 # Non-text packets per 24h to flag
|
||||||
battery_warning_percent: int = 20 # Battery level for warnings
|
battery_warning_percent: int = 30 # Battery level for warnings
|
||||||
|
|
||||||
# Alert settings
|
# Alert settings
|
||||||
critical_nodes: list[str] = field(default_factory=list) # Short names of critical nodes (e.g., ["MHR", "HPR"])
|
critical_nodes: list[str] = field(default_factory=list) # Short names of critical nodes (e.g., ["MHR", "HPR"])
|
||||||
|
|
|
||||||
|
|
@ -745,9 +745,11 @@ class MeshDataStore:
|
||||||
|
|
||||||
node.last_heard = ts or 0.0
|
node.last_heard = ts or 0.0
|
||||||
|
|
||||||
# Is online (computed from last_heard)
|
# NOTE: is_online is set by MeshHealthEngine.compute() using the
|
||||||
now = time.time()
|
# configured offline_threshold_hours. Don't set it here with a
|
||||||
node.is_online = (now - node.last_heard) < 86400 if node.last_heard else False
|
# hardcoded value - let the health engine determine online status.
|
||||||
|
# The health engine runs on every refresh cycle and will set is_online
|
||||||
|
# based on: (now - last_heard) < (offline_threshold_hours * 3600)
|
||||||
|
|
||||||
# Hops, SNR, RSSI (MM)
|
# Hops, SNR, RSSI (MM)
|
||||||
node.hops_away = raw.get("hopsAway")
|
node.hops_away = raw.get("hopsAway")
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,12 @@ INFRASTRUCTURE_ROLES = {"ROUTER", "ROUTER_LATE", "ROUTER_CLIENT"}
|
||||||
|
|
||||||
# Default thresholds
|
# Default thresholds
|
||||||
DEFAULT_LOCALITY_RADIUS_MILES = 8.0
|
DEFAULT_LOCALITY_RADIUS_MILES = 8.0
|
||||||
DEFAULT_OFFLINE_THRESHOLD_HOURS = 24
|
DEFAULT_OFFLINE_THRESHOLD_HOURS = 2 # Hours before node considered offline
|
||||||
DEFAULT_PACKET_THRESHOLD = 500 # Non-text packets per 24h
|
DEFAULT_PACKET_THRESHOLD = 7200 # Non-text packets per 24h (5/min avg)
|
||||||
DEFAULT_BATTERY_WARNING_PERCENT = 20
|
# NOTE: This is aligned with notification config's packet_flood threshold.
|
||||||
|
# 5 packets/min avg × 60 min × 24 hr = 7,200 packets/day.
|
||||||
|
# A node averaging 5+ non-text packets/min is misbehaving.
|
||||||
|
DEFAULT_BATTERY_WARNING_PERCENT = 30 # Battery level to warn (30% gives time to respond)
|
||||||
|
|
||||||
# Utilization thresholds (percentage) - based on real Meshtastic behavior
|
# Utilization thresholds (percentage) - based on real Meshtastic behavior
|
||||||
# Firmware starts throttling GPS at 25%, severe degradation above 35%
|
# Firmware starts throttling GPS at 25%, severe degradation above 35%
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue