From 09346012659877683b624b46e7d2f8f7652461b8 Mon Sep 17 00:00:00 2001 From: "Matt Johnson (via Claude)" Date: Sat, 6 Jun 2026 08:07:01 +0000 Subject: [PATCH] fix(v0.7-fire-tracker-4-final): widen env-scope keyword catch + anti-hallucination clause -- close all 7 LLM DM paths Follow-up to v0.7-fire-tracker-4-revised. 7-path verify identified 4 paths failing for 2 root causes: (A) _ENV_KEYWORDS_TO_SUBTYPE missing "traffic" + drop-audit phrases so build_traffic_detail and build_drop_audit never ran; (B) LLM hallucinated specific numbers when env blocks were empty (fabricated 144 earthquakes from a blank quake_events table). This commit widens the keyword catch (phrase-match for multi-word triggers to avoid false positives) and adds a positive- framed anti-hallucination instruction to the env-context system prompt. Re-verified all 7 paths against real Gemini in the prod container; verdicts in v0.7-firetracker-phase4.md. Class A -- routing miss fix: - _ENV_KEYWORDS_TO_SUBTYPE gains "traffic"/"commute"/"highway" mapped to the existing "traffic" subtype. - New _ENV_PHRASES_TO_SUBTYPE dict for multi-word triggers, matched as whole-phrase substrings (NOT single-word membership). Drop-audit phrases: "why didn't"/"why didnt"/"why am i not"/"why am i missing" /"what was filtered"/"drop audit"/"filtered out" all map to a new "drop_audit" subtype. Phrase-match keeps "why" alone from false-positing every "why is X" question. - _detect_env_subtype now checks phrases first, then falls back to single-word tokenized match. Class B -- positive-framed anti-hallucination clause: - New module-level ENV_GROUNDING_CLAUSE constant. Appended to the system prompt whenever env scope is detected (after env_block + drop_block injection). - Per Matt's mitigation guidance: positive ("answer from the blocks") not negative ("do not hallucinate"). Wording: "ENVIRONMENTAL CONTEXT GROUNDING: Answer only from the environmental context blocks above. If a block is empty or missing for an adapter the user asked about (e.g. no NWS alerts in the block), say something like 'No active right now' -- never invent specific numbers, place names, or counts. If you do not have a relevant block for the question, say so briefly." 7-path verification, post-fix (real Gemini, prod container): | # | query | method | verdict | |---|-------------------------------------------------|----------------------|---------| | 1 | "are there any fires near me?" | build_fires_detail | PASS | | 2 | "any weather alerts?" | build_alerts_detail | PASS | | 3 | "any earthquakes nearby?" | build_quakes_detail | PASS | | 4 | "how's traffic on I-84?" | build_traffic_detail | PASS | | 5 | "what's the snake river level?" | build_gauges_detail | PASS | | 6 | "what are the band conditions?" | build_swpc_detail | PASS | | 7 | "why didn't I hear about anything today?" | build_drop_audit | PASS | Hallucination evidence (pre vs post on the quakes path): pre-fix: "There have been 144 earthquakes of magnitude 1.5 or greater in the past 24 hours worldwide. Some of the most recent earthquakes reported include: A magnitude 2.1 earthquake in Pahala, Hawaii..." (fabricated) post-fix: "I haven't observed any information about earthquakes in the mesh data." (grounded) Routing-miss evidence (pre vs post on the traffic path): pre-fix: _detect_env_subtype("how's traffic on I-84?") -> None -> env scope NOT triggered, build_traffic_detail never called, LLM fabricated I-84 conditions in OR post-fix: _detect_env_subtype("how's traffic on I-84?") -> "traffic" -> env scope triggers, build_traffic_detail returns 185 chars of real Ada-county incident data, LLM grounds on it: "I haven't observed any active traffic incidents on I-84 within the last two hours. The current active incidents are on North 9th Street / South 9th Street and SH-21, both in Ada." Tests: - 7 phase4 tests pass (no new tests needed; verification is the LLM DM path itself). - Full suite: 56 passed in 3.80s across phase1+phase2+phase3+phase4 +or-arch+include-roundtrip. Co-Authored-By: Claude Opus 4.7 (1M context) --- meshai/router.py | 61 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/meshai/router.py b/meshai/router.py index 54a3bb6..a912e0b 100644 --- a/meshai/router.py +++ b/meshai/router.py @@ -91,18 +91,49 @@ _ENV_KEYWORDS_TO_SUBTYPE: dict[str, str] = { "road": "traffic", "roads": "traffic", "jam": "traffic", "crash": "traffic", "closure": "traffic", "511": "traffic", "incident": "traffic", "incidents": "traffic", + # v0.7-fire-4-final: "traffic"/"commute"/"highway" added so a + # query literally mentioning "traffic" hits the traffic subtype. + "traffic": "traffic", "commute": "traffic", "highway": "traffic", # generic "storm": "alerts", "weather": "alerts", } -def _detect_env_subtype(message_lower: str) -> Optional[str]: - """Return the env subtype matched by the first env keyword in the message. +# v0.7-fire-4-final: multi-word phrase triggers. Matched as whole- +# phrase substrings of the message (NOT single-word membership) so +# they carry multi-word semantics without a single word like "why" +# or "filtered" firing false-positives in unrelated queries. +# Drop-audit phrases unlock the env scope path so +# env_reporter.build_drop_audit lands in the system prompt. +_ENV_PHRASES_TO_SUBTYPE: dict[str, str] = { + "why didn't": "drop_audit", + "why didnt": "drop_audit", + "why am i not": "drop_audit", + "why am i missing": "drop_audit", + "what was filtered":"drop_audit", + "drop audit": "drop_audit", + "filtered out": "drop_audit", +} - `None` when no env keyword matches. Uses set intersection on tokenized - words so partial-word collisions (e.g. "firearm" / "fire") don\'t fire.""" + +def _detect_env_subtype(message_lower: str) -> Optional[str]: + """Return the env subtype matched by the first env keyword/phrase + in the message. `None` when no env keyword matches. + + v0.7-fire-4-final: phrase map is checked FIRST so multi-word + triggers (e.g. "why didn't I hear ...") work without their + constituent single words (e.g. "why" alone) firing false + positives. Single-word map then uses set-intersection on + tokenized words so partial-word collisions ("firearm" / "fire") + don't fire. + """ if not message_lower: return None + # 1) Phrase substring match (multi-word semantics). + for phrase, subtype in _ENV_PHRASES_TO_SUBTYPE.items(): + if phrase in message_lower: + return subtype + # 2) Single-word tokenized match. words = set(re.findall(r"\b\w+\b", message_lower)) for kw, subtype in _ENV_KEYWORDS_TO_SUBTYPE.items(): if kw in words: @@ -777,6 +808,12 @@ class MessageRouter: drop_block = env_reporter.build_drop_audit(hours=1) if drop_block: system_prompt += "\n\n" + drop_block + # v0.7-fire-4-final: positive-framed grounding clause. + # Closes Class B hallucination (LLM inventing counts + # / place names when an env block is empty -- e.g. + # "144 earthquakes worldwide" against an empty + # quake_events 24h window). + system_prompt += "\n\n" + ENV_GROUNDING_CLAUSE except Exception: logger.exception("env_reporter injection failed") @@ -949,3 +986,19 @@ class MessageRouter: connector=self.connector, history=self.history, ) + + +# v0.7-fire-4-final: positive-framed grounding clause appended to +# the system prompt whenever env scope is detected. Frames the +# constraint as "answer from the blocks" rather than "do not +# hallucinate" (Matt's mitigation guidance) so the LLM doesn't +# default to a blanket apology disclaimer every other message. +ENV_GROUNDING_CLAUSE = ( + "ENVIRONMENTAL CONTEXT GROUNDING:\n" + "Answer only from the environmental context blocks above. If a " + "block is empty or missing for an adapter the user asked about " + "(e.g. no NWS alerts in the block), say something like \"No " + "active right now\" -- never invent specific numbers, " + "place names, or counts. If you do not have a relevant block for " + "the question, say so briefly." +)