fix(health): use real channel utilization from node telemetry

- Utilization pillar now reads firmware-reported channel_utilization
  instead of estimating from packet counts with hardcoded 200ms/pkt
- Uses highest infra node value (busiest node = bottleneck)
- Falls back to packet count estimate only when telemetry unavailable
- Updated thresholds: 20/25/35/45% matching real Meshtastic behavior
- Per-region utilization from region nodes, not mesh-wide
- API response includes util_method, util_max_percent, util_node_count

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
K7ZVX 2026-05-13 22:49:41 +00:00
commit 57a19aeec6
3 changed files with 1267 additions and 1164 deletions

View file

@ -746,27 +746,32 @@ export default function Reference() {
<SubHeader>Utilization (25%)</SubHeader> <SubHeader>Utilization (25%)</SubHeader>
<p> <p>
Estimates how much of the radio channel's airtime is being used. MeshAI can't measure airtime directly, so it estimates based on packet counts over the last 24 hours. MeshAI reads the channel utilization that each router reports in its telemetry this is the firmware's own measurement of how busy the radio channel is. MeshAI uses the <strong>highest</strong> value from any infrastructure node because the busiest router is the bottleneck for the whole mesh.
</p>
<p className="p-3 bg-slate-800 rounded font-mono text-sm">
packets_per_hour = non_text_packets ÷ 24<br/>
airtime_estimate = (packets_per_hour × 200ms) ÷ 3,600,000ms × 100%
</p> </p>
<p> <p>
The 200ms is an approximation for the MediumFast radio preset each LoRa packet takes roughly 200ms of airtime. Text messages don't count toward utilization (chatting is the point of a mesh). <strong>How it works:</strong>
</p>
<ol className="list-decimal list-inside space-y-1 ml-4">
<li>Collect <Mono>channel_utilization</Mono> from all infrastructure nodes that report it</li>
<li>If no infra nodes have telemetry, try all nodes</li>
<li>Use the <strong>maximum</strong> value for scoring (busiest node = bottleneck)</li>
<li>If no nodes report utilization (older firmware), fall back to packet count estimate</li>
</ol>
<p className="mt-4">
<strong>Fallback method</strong> (when telemetry unavailable): estimates from packet counts using 200ms/packet airtime. This is less accurate it assumes MediumFast preset and sums packets across all nodes.
</p> </p>
<RefTable <RefTable
headers={['Estimated Airtime', 'Score', 'What It Means']} headers={['Channel Utilization', 'Score', 'What It Means']}
rows={[ rows={[
['Under 20%', '100', 'Channel is clear — this is the goal'], ['Under 20%', '100', 'Channel is clear — this is the goal'],
['20-25%', '75-100', 'Slight degradation, occasional collisions'], ['20-25%', '75-100', 'Slight degradation, occasional collisions'],
['25-35%', '50-75', 'Severe degradation — firmware throttling active'], ['25-35%', '50-75', 'Severe degradation — firmware throttling active'],
['35-45%', '25-50', 'Mesh struggling badly — reliability dropping'], ['35-45%', '25-50', 'Mesh struggling badly — reliability dropping'],
['Over 45%', '0-25', 'Mesh is effectively dead'], ['Over 45%', '0-25', 'Mesh is effectively unusable'],
]} ]}
/> />
<p> <p>
<strong>Special case:</strong> If MeshAI doesn't have packet data (no sources reporting packet counts), this pillar scores 100. You're not penalized for missing data. <strong>Special case:</strong> If no utilization data is available (no telemetry and no packet data), this pillar scores 100. You're not penalized for missing data.
</p> </p>
<SubHeader>Coverage (20%)</SubHeader> <SubHeader>Coverage (20%)</SubHeader>

View file

@ -1,403 +1,409 @@
"""Mesh health and node API routes.""" """Mesh health and node API routes."""
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Optional
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
router = APIRouter(tags=["mesh"]) router = APIRouter(tags=["mesh"])
def _serialize_health_score(score) -> dict: def _serialize_health_score(score) -> dict:
"""Serialize a HealthScore object.""" """Serialize a HealthScore object."""
return { return {
"composite": round(score.composite, 1), "composite": round(score.composite, 1),
"tier": score.tier, "tier": score.tier,
"infrastructure": round(score.infrastructure, 1), "infrastructure": round(score.infrastructure, 1),
"utilization": round(score.utilization, 1), "utilization": round(score.utilization, 1),
"behavior": round(score.behavior, 1), "behavior": round(score.behavior, 1),
"power": round(score.power, 1), "power": round(score.power, 1),
"infra_online": score.infra_online, "infra_online": score.infra_online,
"infra_total": score.infra_total, "infra_total": score.infra_total,
"util_percent": round(score.util_percent, 1), "util_percent": round(score.util_percent, 1),
"flagged_nodes": score.flagged_nodes, "util_max_percent": round(getattr(score, 'util_max_percent', score.util_percent), 1),
"battery_warnings": score.battery_warnings, "util_method": getattr(score, 'util_method', 'unknown'),
"solar_index": round(score.solar_index, 1), "util_node_count": getattr(score, 'util_node_count', 0),
} "flagged_nodes": score.flagged_nodes,
"battery_warnings": score.battery_warnings,
"solar_index": round(score.solar_index, 1),
def _serialize_region(region) -> dict: }
"""Serialize a RegionHealth object."""
return {
"name": region.name, def _serialize_region(region) -> dict:
"center_lat": region.center_lat, """Serialize a RegionHealth object."""
"center_lon": region.center_lon, return {
"node_count": len(region.node_ids), "name": region.name,
"locality_count": len(region.localities), "center_lat": region.center_lat,
"score": _serialize_health_score(region.score), "center_lon": region.center_lon,
"node_ids": region.node_ids, "node_count": len(region.node_ids),
} "locality_count": len(region.localities),
"score": _serialize_health_score(region.score),
"node_ids": region.node_ids,
def _format_timestamp(ts: Optional[float]) -> Optional[str]: }
"""Format a Unix timestamp as ISO string."""
if not ts or ts <= 0:
return None def _format_timestamp(ts: Optional[float]) -> Optional[str]:
try: """Format a Unix timestamp as ISO string."""
return datetime.fromtimestamp(ts).isoformat() if not ts or ts <= 0:
except (ValueError, OSError): return None
return None try:
return datetime.fromtimestamp(ts).isoformat()
except (ValueError, OSError):
@router.get("/health") return None
async def get_health(request: Request):
"""Get mesh health data."""
health_engine = request.app.state.health_engine @router.get("/health")
async def get_health(request: Request):
if not health_engine or not health_engine.mesh_health: """Get mesh health data."""
return { health_engine = request.app.state.health_engine
"score": 0,
"tier": "Unknown", if not health_engine or not health_engine.mesh_health:
"message": "Health engine not ready", return {
} "score": 0,
"tier": "Unknown",
health = health_engine.mesh_health "message": "Health engine not ready",
score = health.score }
return { health = health_engine.mesh_health
"score": round(score.composite, 1), score = health.score
"tier": score.tier,
"pillars": { return {
"infrastructure": round(score.infrastructure, 1), "score": round(score.composite, 1),
"utilization": round(score.utilization, 1), "tier": score.tier,
"behavior": round(score.behavior, 1), "pillars": {
"power": round(score.power, 1), "infrastructure": round(score.infrastructure, 1),
}, "utilization": round(score.utilization, 1),
"infra_online": score.infra_online, "behavior": round(score.behavior, 1),
"infra_total": score.infra_total, "power": round(score.power, 1),
"util_percent": round(score.util_percent, 1), },
"flagged_nodes": score.flagged_nodes, "infra_online": score.infra_online,
"battery_warnings": score.battery_warnings, "infra_total": score.infra_total,
"total_nodes": health.total_nodes, "util_percent": round(score.util_percent, 1),
"total_regions": health.total_regions, "util_max_percent": round(getattr(score, 'util_max_percent', score.util_percent), 1),
"unlocated_count": len(health.unlocated_nodes), "util_method": getattr(score, 'util_method', 'unknown'),
"last_computed": _format_timestamp(health.last_computed), "util_node_count": getattr(score, 'util_node_count', 0),
"recommendations": [], # TODO: Add recommendations "flagged_nodes": score.flagged_nodes,
} "battery_warnings": score.battery_warnings,
"total_nodes": health.total_nodes,
"total_regions": health.total_regions,
@router.get("/nodes") "unlocated_count": len(health.unlocated_nodes),
async def get_nodes(request: Request): "last_computed": _format_timestamp(health.last_computed),
"""Get all nodes.""" "recommendations": [], # TODO: Add recommendations
data_store = request.app.state.data_store }
health_engine = request.app.state.health_engine
if not data_store: @router.get("/nodes")
return [] async def get_nodes(request: Request):
"""Get all nodes."""
try: data_store = request.app.state.data_store
raw_nodes = data_store.get_all_nodes() health_engine = request.app.state.health_engine
except Exception:
return [] if not data_store:
return []
nodes = []
for node in raw_nodes: try:
# Extract node_num from various formats raw_nodes = data_store.get_all_nodes()
node_num = node.get("nodeNum") or node.get("num") or node.get("node_num") except Exception:
if node_num is None: return []
node_id = node.get("node_id") or node.get("id")
if node_id and isinstance(node_id, str): nodes = []
try: for node in raw_nodes:
node_num = int(node_id.lstrip("!"), 16) # Extract node_num from various formats
except ValueError: node_num = node.get("nodeNum") or node.get("num") or node.get("node_num")
continue if node_num is None:
node_id = node.get("node_id") or node.get("id")
if node_num is None: if node_id and isinstance(node_id, str):
continue try:
node_num = int(node_id.lstrip("!"), 16)
# Get health data if available except ValueError:
health_data = {} continue
if health_engine and health_engine.mesh_health:
node_health = health_engine.mesh_health.nodes.get(str(node_num)) if node_num is None:
if node_health: continue
health_data = {
"region": node_health.region, # Get health data if available
"locality": node_health.locality, health_data = {}
"is_infrastructure": node_health.is_infrastructure, if health_engine and health_engine.mesh_health:
"is_online": node_health.is_online, node_health = health_engine.mesh_health.nodes.get(str(node_num))
"packet_count_24h": node_health.packet_count_24h, if node_health:
} health_data = {
"region": node_health.region,
# Build node dict "locality": node_health.locality,
node_dict = { "is_infrastructure": node_health.is_infrastructure,
"node_num": node_num, "is_online": node_health.is_online,
"node_id_hex": f"!{node_num:08x}", "packet_count_24h": node_health.packet_count_24h,
"short_name": node.get("shortName") or node.get("short_name") or "", }
"long_name": node.get("longName") or node.get("long_name") or "",
"role": node.get("role") or "", # Build node dict
"latitude": node.get("latitude"), node_dict = {
"longitude": node.get("longitude"), "node_num": node_num,
"last_heard": _format_timestamp(node.get("last_heard")), "node_id_hex": f"!{node_num:08x}",
"battery_level": node.get("battery_level") or node.get("batteryLevel"), "short_name": node.get("shortName") or node.get("short_name") or "",
"voltage": node.get("voltage"), "long_name": node.get("longName") or node.get("long_name") or "",
"snr": node.get("snr"), "role": node.get("role") or "",
"firmware": node.get("firmware_version") or node.get("firmwareVersion") or "", "latitude": node.get("latitude"),
"hardware": node.get("hw_model") or node.get("hwModel") or "", "longitude": node.get("longitude"),
"uptime": node.get("uptime_seconds") or node.get("uptimeSeconds"), "last_heard": _format_timestamp(node.get("last_heard")),
"sources": node.get("_sources", []), "battery_level": node.get("battery_level") or node.get("batteryLevel"),
**health_data, "voltage": node.get("voltage"),
} "snr": node.get("snr"),
nodes.append(node_dict) "firmware": node.get("firmware_version") or node.get("firmwareVersion") or "",
"hardware": node.get("hw_model") or node.get("hwModel") or "",
return nodes "uptime": node.get("uptime_seconds") or node.get("uptimeSeconds"),
"sources": node.get("_sources", []),
**health_data,
@router.get("/nodes/{node_num}") }
async def get_node_detail(node_num: int, request: Request): nodes.append(node_dict)
"""Get detailed info for a specific node."""
data_store = request.app.state.data_store return nodes
health_engine = request.app.state.health_engine
if not data_store: @router.get("/nodes/{node_num}")
raise HTTPException(status_code=404, detail="Data store not available") async def get_node_detail(node_num: int, request: Request):
"""Get detailed info for a specific node."""
# Find the node data_store = request.app.state.data_store
try: health_engine = request.app.state.health_engine
raw_nodes = data_store.get_all_nodes()
except Exception: if not data_store:
raise HTTPException(status_code=500, detail="Failed to fetch nodes") raise HTTPException(status_code=404, detail="Data store not available")
target_node = None # Find the node
for node in raw_nodes: try:
n_num = node.get("nodeNum") or node.get("num") or node.get("node_num") raw_nodes = data_store.get_all_nodes()
if n_num is None: except Exception:
node_id = node.get("node_id") or node.get("id") raise HTTPException(status_code=500, detail="Failed to fetch nodes")
if node_id and isinstance(node_id, str):
try: target_node = None
n_num = int(node_id.lstrip("!"), 16) for node in raw_nodes:
except ValueError: n_num = node.get("nodeNum") or node.get("num") or node.get("node_num")
continue if n_num is None:
node_id = node.get("node_id") or node.get("id")
if n_num == node_num: if node_id and isinstance(node_id, str):
target_node = node try:
break n_num = int(node_id.lstrip("!"), 16)
except ValueError:
if not target_node: continue
raise HTTPException(status_code=404, detail=f"Node {node_num} not found")
if n_num == node_num:
# Get health data target_node = node
health_data = {} break
if health_engine and health_engine.mesh_health:
node_health = health_engine.mesh_health.nodes.get(str(node_num)) if not target_node:
if node_health: raise HTTPException(status_code=404, detail=f"Node {node_num} not found")
health_data = {
"region": node_health.region, # Get health data
"locality": node_health.locality, health_data = {}
"is_infrastructure": node_health.is_infrastructure, if health_engine and health_engine.mesh_health:
"is_online": node_health.is_online, node_health = health_engine.mesh_health.nodes.get(str(node_num))
"packet_count_24h": node_health.packet_count_24h, if node_health:
"text_packet_count_24h": node_health.text_packet_count_24h, health_data = {
"non_text_packets": node_health.non_text_packets, "region": node_health.region,
"has_solar": node_health.has_solar, "locality": node_health.locality,
} "is_infrastructure": node_health.is_infrastructure,
"is_online": node_health.is_online,
# Get neighbors from edges "packet_count_24h": node_health.packet_count_24h,
neighbors = [] "text_packet_count_24h": node_health.text_packet_count_24h,
try: "non_text_packets": node_health.non_text_packets,
edges = data_store.get_all_edges() "has_solar": node_health.has_solar,
for edge in edges: }
from_num = edge.get("from_node") or edge.get("from")
to_num = edge.get("to_node") or edge.get("to") # Get neighbors from edges
neighbors = []
if from_num == node_num: try:
neighbors.append({ edges = data_store.get_all_edges()
"node_num": to_num, for edge in edges:
"snr": edge.get("snr"), from_num = edge.get("from_node") or edge.get("from")
}) to_num = edge.get("to_node") or edge.get("to")
elif to_num == node_num:
neighbors.append({ if from_num == node_num:
"node_num": from_num, neighbors.append({
"snr": edge.get("snr"), "node_num": to_num,
}) "snr": edge.get("snr"),
except Exception: })
pass elif to_num == node_num:
neighbors.append({
return { "node_num": from_num,
"node_num": node_num, "snr": edge.get("snr"),
"node_id_hex": f"!{node_num:08x}", })
"short_name": target_node.get("shortName") or target_node.get("short_name") or "", except Exception:
"long_name": target_node.get("longName") or target_node.get("long_name") or "", pass
"role": target_node.get("role") or "",
"latitude": target_node.get("latitude"), return {
"longitude": target_node.get("longitude"), "node_num": node_num,
"last_heard": _format_timestamp(target_node.get("last_heard")), "node_id_hex": f"!{node_num:08x}",
"battery_level": target_node.get("battery_level") or target_node.get("batteryLevel"), "short_name": target_node.get("shortName") or target_node.get("short_name") or "",
"voltage": target_node.get("voltage"), "long_name": target_node.get("longName") or target_node.get("long_name") or "",
"snr": target_node.get("snr"), "role": target_node.get("role") or "",
"firmware": target_node.get("firmware_version") or target_node.get("firmwareVersion") or "", "latitude": target_node.get("latitude"),
"hardware": target_node.get("hw_model") or target_node.get("hwModel") or "", "longitude": target_node.get("longitude"),
"uptime": target_node.get("uptime_seconds") or target_node.get("uptimeSeconds"), "last_heard": _format_timestamp(target_node.get("last_heard")),
"sources": target_node.get("_sources", []), "battery_level": target_node.get("battery_level") or target_node.get("batteryLevel"),
"neighbors": neighbors, "voltage": target_node.get("voltage"),
**health_data, "snr": target_node.get("snr"),
} "firmware": target_node.get("firmware_version") or target_node.get("firmwareVersion") or "",
"hardware": target_node.get("hw_model") or target_node.get("hwModel") or "",
"uptime": target_node.get("uptime_seconds") or target_node.get("uptimeSeconds"),
@router.get("/regions") "sources": target_node.get("_sources", []),
async def get_regions(request: Request): "neighbors": neighbors,
"""Get region summaries.""" **health_data,
health_engine = request.app.state.health_engine }
if not health_engine or not health_engine.mesh_health:
return [] @router.get("/regions")
async def get_regions(request: Request):
regions = [] """Get region summaries."""
for region in health_engine.mesh_health.regions: health_engine = request.app.state.health_engine
# Count online infrastructure
infra_online = 0 if not health_engine or not health_engine.mesh_health:
infra_total = 0 return []
online_count = 0
regions = []
for nid in region.node_ids: for region in health_engine.mesh_health.regions:
node = health_engine.mesh_health.nodes.get(nid) # Count online infrastructure
if node: infra_online = 0
if node.is_online: infra_total = 0
online_count += 1 online_count = 0
if node.is_infrastructure:
infra_total += 1 for nid in region.node_ids:
if node.is_online: node = health_engine.mesh_health.nodes.get(nid)
infra_online += 1 if node:
if node.is_online:
regions.append({ online_count += 1
"name": region.name, if node.is_infrastructure:
"local_name": region.name, # Could be overridden by region_labels infra_total += 1
"node_count": len(region.node_ids), if node.is_online:
"infra_count": infra_total, infra_online += 1
"infra_online": infra_online,
"online_count": online_count, regions.append({
"score": round(region.score.composite, 1), "name": region.name,
"tier": region.score.tier, "local_name": region.name, # Could be overridden by region_labels
"center_lat": region.center_lat, "node_count": len(region.node_ids),
"center_lon": region.center_lon, "infra_count": infra_total,
}) "infra_online": infra_online,
"online_count": online_count,
return regions "score": round(region.score.composite, 1),
"tier": region.score.tier,
"center_lat": region.center_lat,
@router.get("/sources") "center_lon": region.center_lon,
async def get_sources(request: Request): })
"""Get per-source health information."""
data_store = request.app.state.data_store return regions
if not data_store:
return [] @router.get("/sources")
async def get_sources(request: Request):
sources = [] """Get per-source health information."""
try: data_store = request.app.state.data_store
for name, source in data_store._sources.items():
source_info = { if not data_store:
"name": name, return []
"type": "meshview" if hasattr(source, "edges") else "meshmonitor",
"url": getattr(source, "url", ""), sources = []
"is_loaded": source.is_loaded, try:
"last_error": source.last_error, for name, source in data_store._sources.items():
"consecutive_errors": getattr(source, "consecutive_errors", 0), source_info = {
"response_time_ms": getattr(source, "last_response_time_ms", None), "name": name,
"tick_count": getattr(source, "tick_count", 0), "type": "meshview" if hasattr(source, "edges") else "meshmonitor",
"node_count": len(source.nodes) if hasattr(source, "nodes") else 0, "url": getattr(source, "url", ""),
} "is_loaded": source.is_loaded,
sources.append(source_info) "last_error": source.last_error,
except Exception: "consecutive_errors": getattr(source, "consecutive_errors", 0),
pass "response_time_ms": getattr(source, "last_response_time_ms", None),
"tick_count": getattr(source, "tick_count", 0),
return sources "node_count": len(source.nodes) if hasattr(source, "nodes") else 0,
}
sources.append(source_info)
@router.get("/edges") except Exception:
async def get_edges(request: Request): pass
"""Get neighbor/edge relationships."""
data_store = request.app.state.data_store return sources
if not data_store:
return [] @router.get("/edges")
async def get_edges(request: Request):
try: """Get neighbor/edge relationships."""
raw_edges = data_store.get_all_edges() data_store = request.app.state.data_store
except Exception:
return [] if not data_store:
return []
edges = []
for edge in raw_edges: try:
from_num = edge.get("from_node") or edge.get("from") raw_edges = data_store.get_all_edges()
to_num = edge.get("to_node") or edge.get("to") except Exception:
snr = edge.get("snr") return []
# Derive quality from SNR edges = []
if snr is None: for edge in raw_edges:
quality = "unknown" from_num = edge.get("from_node") or edge.get("from")
elif snr > 12: to_num = edge.get("to_node") or edge.get("to")
quality = "excellent" snr = edge.get("snr")
elif snr > 8:
quality = "good" # Derive quality from SNR
elif snr > 5: if snr is None:
quality = "fair" quality = "unknown"
elif snr > 3: elif snr > 12:
quality = "marginal" quality = "excellent"
else: elif snr > 8:
quality = "poor" quality = "good"
elif snr > 5:
edges.append({ quality = "fair"
"from_node": from_num, elif snr > 3:
"to_node": to_num, quality = "marginal"
"snr": snr, else:
"quality": quality, quality = "poor"
})
edges.append({
return edges "from_node": from_num,
"to_node": to_num,
"snr": snr,
"quality": quality,
@router.get("/channels") })
async def get_channels(request: Request):
"""Get radio channels from the connected Meshtastic interface.""" return edges
connector = getattr(request.app.state, "connector", None)
if not connector or not connector.connected:
return [] @router.get("/channels")
async def get_channels(request: Request):
try: """Get radio channels from the connected Meshtastic interface."""
interface = connector._interface connector = getattr(request.app.state, "connector", None)
if not interface or not hasattr(interface, "localNode"):
return [] if not connector or not connector.connected:
return []
local_node = interface.localNode
if not local_node or not hasattr(local_node, "channels"): try:
return [] interface = connector._interface
if not interface or not hasattr(interface, "localNode"):
channels = [] return []
for ch in local_node.channels:
if ch is None: local_node = interface.localNode
continue if not local_node or not hasattr(local_node, "channels"):
return []
# Get channel settings
settings = getattr(ch, "settings", None) channels = []
name = getattr(settings, "name", "") if settings else "" for ch in local_node.channels:
role_val = getattr(ch, "role", 0) if ch is None:
continue
# Map role enum to string
role_map = {0: "DISABLED", 1: "PRIMARY", 2: "SECONDARY"} # Get channel settings
role = role_map.get(role_val, "UNKNOWN") settings = getattr(ch, "settings", None)
name = getattr(settings, "name", "") if settings else ""
channels.append({ role_val = getattr(ch, "role", 0)
"index": ch.index,
"name": name or f"Channel {ch.index}", # Map role enum to string
"role": role, role_map = {0: "DISABLED", 1: "PRIMARY", 2: "SECONDARY"}
"enabled": role_val != 0, role = role_map.get(role_val, "UNKNOWN")
})
channels.append({
return channels "index": ch.index,
"name": name or f"Channel {ch.index}",
except Exception as e: "role": role,
import logging "enabled": role_val != 0,
logging.getLogger(__name__).warning(f"Failed to get channels: {e}") })
return []
return channels
except Exception as e:
import logging
logging.getLogger(__name__).warning(f"Failed to get channels: {e}")
return []

File diff suppressed because it is too large Load diff