mirror of
https://github.com/zvx-echo6/meshai.git
synced 2026-06-11 01:14:45 +02:00
fix(v0.7-fire-tracker-4-final): widen env-scope keyword catch + anti-hallucination clause -- close all 7 LLM DM paths
Follow-up to v0.7-fire-tracker-4-revised. 7-path verify identified 4
paths failing for 2 root causes: (A) _ENV_KEYWORDS_TO_SUBTYPE missing
"traffic" + drop-audit phrases so build_traffic_detail and
build_drop_audit never ran; (B) LLM hallucinated specific numbers when
env blocks were empty (fabricated 144 earthquakes from a blank
quake_events table). This commit widens the keyword catch (phrase-match
for multi-word triggers to avoid false positives) and adds a positive-
framed anti-hallucination instruction to the env-context system prompt.
Re-verified all 7 paths against real Gemini in the prod container;
verdicts in v0.7-firetracker-phase4.md.
Class A -- routing miss fix:
- _ENV_KEYWORDS_TO_SUBTYPE gains "traffic"/"commute"/"highway"
mapped to the existing "traffic" subtype.
- New _ENV_PHRASES_TO_SUBTYPE dict for multi-word triggers, matched
as whole-phrase substrings (NOT single-word membership). Drop-audit
phrases: "why didn't"/"why didnt"/"why am i not"/"why am i missing"
/"what was filtered"/"drop audit"/"filtered out" all map to a new
"drop_audit" subtype. Phrase-match keeps "why" alone from
false-positing every "why is X" question.
- _detect_env_subtype now checks phrases first, then falls back to
single-word tokenized match.
Class B -- positive-framed anti-hallucination clause:
- New module-level ENV_GROUNDING_CLAUSE constant. Appended to the
system prompt whenever env scope is detected (after env_block +
drop_block injection).
- Per Matt's mitigation guidance: positive ("answer from the blocks")
not negative ("do not hallucinate"). Wording:
"ENVIRONMENTAL CONTEXT GROUNDING:
Answer only from the environmental context blocks above. If a
block is empty or missing for an adapter the user asked about
(e.g. no NWS alerts in the block), say something like 'No active
<category> right now' -- never invent specific numbers, place
names, or counts. If you do not have a relevant block for the
question, say so briefly."
7-path verification, post-fix (real Gemini, prod container):
| # | query | method | verdict |
|---|-------------------------------------------------|----------------------|---------|
| 1 | "are there any fires near me?" | build_fires_detail | PASS |
| 2 | "any weather alerts?" | build_alerts_detail | PASS |
| 3 | "any earthquakes nearby?" | build_quakes_detail | PASS |
| 4 | "how's traffic on I-84?" | build_traffic_detail | PASS |
| 5 | "what's the snake river level?" | build_gauges_detail | PASS |
| 6 | "what are the band conditions?" | build_swpc_detail | PASS |
| 7 | "why didn't I hear about anything today?" | build_drop_audit | PASS |
Hallucination evidence (pre vs post on the quakes path):
pre-fix: "There have been 144 earthquakes of magnitude 1.5 or
greater in the past 24 hours worldwide. Some of the most
recent earthquakes reported include: A magnitude 2.1
earthquake in Pahala, Hawaii..." (fabricated)
post-fix: "I haven't observed any information about earthquakes
in the mesh data." (grounded)
Routing-miss evidence (pre vs post on the traffic path):
pre-fix: _detect_env_subtype("how's traffic on I-84?") -> None
-> env scope NOT triggered, build_traffic_detail never
called, LLM fabricated I-84 conditions in OR
post-fix: _detect_env_subtype("how's traffic on I-84?") -> "traffic"
-> env scope triggers, build_traffic_detail returns
185 chars of real Ada-county incident data, LLM
grounds on it: "I haven't observed any active
traffic incidents on I-84 within the last two hours.
The current active incidents are on North 9th Street
/ South 9th Street and SH-21, both in Ada."
Tests:
- 7 phase4 tests pass (no new tests needed; verification is the LLM
DM path itself).
- Full suite: 56 passed in 3.80s across phase1+phase2+phase3+phase4
+or-arch+include-roundtrip.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
89640f624d
commit
0934601265
1 changed files with 57 additions and 4 deletions
|
|
@ -91,18 +91,49 @@ _ENV_KEYWORDS_TO_SUBTYPE: dict[str, str] = {
|
||||||
"road": "traffic", "roads": "traffic", "jam": "traffic",
|
"road": "traffic", "roads": "traffic", "jam": "traffic",
|
||||||
"crash": "traffic", "closure": "traffic", "511": "traffic",
|
"crash": "traffic", "closure": "traffic", "511": "traffic",
|
||||||
"incident": "traffic", "incidents": "traffic",
|
"incident": "traffic", "incidents": "traffic",
|
||||||
|
# v0.7-fire-4-final: "traffic"/"commute"/"highway" added so a
|
||||||
|
# query literally mentioning "traffic" hits the traffic subtype.
|
||||||
|
"traffic": "traffic", "commute": "traffic", "highway": "traffic",
|
||||||
# generic
|
# generic
|
||||||
"storm": "alerts", "weather": "alerts",
|
"storm": "alerts", "weather": "alerts",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _detect_env_subtype(message_lower: str) -> Optional[str]:
|
# v0.7-fire-4-final: multi-word phrase triggers. Matched as whole-
|
||||||
"""Return the env subtype matched by the first env keyword in the message.
|
# phrase substrings of the message (NOT single-word membership) so
|
||||||
|
# they carry multi-word semantics without a single word like "why"
|
||||||
|
# or "filtered" firing false-positives in unrelated queries.
|
||||||
|
# Drop-audit phrases unlock the env scope path so
|
||||||
|
# env_reporter.build_drop_audit lands in the system prompt.
|
||||||
|
_ENV_PHRASES_TO_SUBTYPE: dict[str, str] = {
|
||||||
|
"why didn't": "drop_audit",
|
||||||
|
"why didnt": "drop_audit",
|
||||||
|
"why am i not": "drop_audit",
|
||||||
|
"why am i missing": "drop_audit",
|
||||||
|
"what was filtered":"drop_audit",
|
||||||
|
"drop audit": "drop_audit",
|
||||||
|
"filtered out": "drop_audit",
|
||||||
|
}
|
||||||
|
|
||||||
`None` when no env keyword matches. Uses set intersection on tokenized
|
|
||||||
words so partial-word collisions (e.g. "firearm" / "fire") don\'t fire."""
|
def _detect_env_subtype(message_lower: str) -> Optional[str]:
|
||||||
|
"""Return the env subtype matched by the first env keyword/phrase
|
||||||
|
in the message. `None` when no env keyword matches.
|
||||||
|
|
||||||
|
v0.7-fire-4-final: phrase map is checked FIRST so multi-word
|
||||||
|
triggers (e.g. "why didn't I hear ...") work without their
|
||||||
|
constituent single words (e.g. "why" alone) firing false
|
||||||
|
positives. Single-word map then uses set-intersection on
|
||||||
|
tokenized words so partial-word collisions ("firearm" / "fire")
|
||||||
|
don't fire.
|
||||||
|
"""
|
||||||
if not message_lower:
|
if not message_lower:
|
||||||
return None
|
return None
|
||||||
|
# 1) Phrase substring match (multi-word semantics).
|
||||||
|
for phrase, subtype in _ENV_PHRASES_TO_SUBTYPE.items():
|
||||||
|
if phrase in message_lower:
|
||||||
|
return subtype
|
||||||
|
# 2) Single-word tokenized match.
|
||||||
words = set(re.findall(r"\b\w+\b", message_lower))
|
words = set(re.findall(r"\b\w+\b", message_lower))
|
||||||
for kw, subtype in _ENV_KEYWORDS_TO_SUBTYPE.items():
|
for kw, subtype in _ENV_KEYWORDS_TO_SUBTYPE.items():
|
||||||
if kw in words:
|
if kw in words:
|
||||||
|
|
@ -777,6 +808,12 @@ class MessageRouter:
|
||||||
drop_block = env_reporter.build_drop_audit(hours=1)
|
drop_block = env_reporter.build_drop_audit(hours=1)
|
||||||
if drop_block:
|
if drop_block:
|
||||||
system_prompt += "\n\n" + drop_block
|
system_prompt += "\n\n" + drop_block
|
||||||
|
# v0.7-fire-4-final: positive-framed grounding clause.
|
||||||
|
# Closes Class B hallucination (LLM inventing counts
|
||||||
|
# / place names when an env block is empty -- e.g.
|
||||||
|
# "144 earthquakes worldwide" against an empty
|
||||||
|
# quake_events 24h window).
|
||||||
|
system_prompt += "\n\n" + ENV_GROUNDING_CLAUSE
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("env_reporter injection failed")
|
logger.exception("env_reporter injection failed")
|
||||||
|
|
||||||
|
|
@ -949,3 +986,19 @@ class MessageRouter:
|
||||||
connector=self.connector,
|
connector=self.connector,
|
||||||
history=self.history,
|
history=self.history,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# v0.7-fire-4-final: positive-framed grounding clause appended to
|
||||||
|
# the system prompt whenever env scope is detected. Frames the
|
||||||
|
# constraint as "answer from the blocks" rather than "do not
|
||||||
|
# hallucinate" (Matt's mitigation guidance) so the LLM doesn't
|
||||||
|
# default to a blanket apology disclaimer every other message.
|
||||||
|
ENV_GROUNDING_CLAUSE = (
|
||||||
|
"ENVIRONMENTAL CONTEXT GROUNDING:\n"
|
||||||
|
"Answer only from the environmental context blocks above. If a "
|
||||||
|
"block is empty or missing for an adapter the user asked about "
|
||||||
|
"(e.g. no NWS alerts in the block), say something like \"No "
|
||||||
|
"active <category> right now\" -- never invent specific numbers, "
|
||||||
|
"place names, or counts. If you do not have a relevant block for "
|
||||||
|
"the question, say so briefly."
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue