central/tests/test_consumer_doc.py

"""Consistency tests for docs/CONSUMER-INTEGRATION.md.

The doc is the consumer contract. These tests catch drift between the doc and
the live code:

  - Every StreamEntry in src/central/streams.py must appear in the doc's
    "Stream layout" table — and vice versa.
  - Every adapter discovered via central.adapter_discovery.discover_adapters()
    must have a per-adapter subsection (### <name>) in the doc — and vice versa.

The doc's subject patterns and per-adapter copy are NOT directly asserted (they
are operator-readable prose, not machine-parsable); the registry-level checks
guard against the most common drift (adding a stream / adapter and forgetting
to document it).
"""

import re
from pathlib import Path

from central.adapter_discovery import discover_adapters
from central.streams import STREAMS

DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "CONSUMER-INTEGRATION.md"


def _doc_text() -> str:
    assert DOC_PATH.is_file(), f"missing: {DOC_PATH}"
    return DOC_PATH.read_text()


def _stream_layout_rows(doc: str) -> list[tuple[str, str]]:
    """Parse the doc's "Stream layout" table -> list of (stream_name, subject_filter)."""
    section_re = re.compile(
        r"^## 3\. Stream layout\s*\n(.*?)(?=^## )",
        re.DOTALL | re.MULTILINE,
    )
    m = section_re.search(doc)
    assert m, "doc missing '## 3. Stream layout' section"
    section = m.group(1)

    rows: list[tuple[str, str]] = []
    # Each row: | `CENTRAL_XX` | `central.xx.>` | ... |
    row_re = re.compile(r"^\|\s*`([A-Z_]+)`\s*\|\s*`(central\.[a-z_]+\.>)`\s*\|", re.MULTILINE)
    for name, subj in row_re.findall(section):
        rows.append((name, subj))
    return rows


def _per_adapter_subsections(doc: str) -> list[str]:
    """Pull adapter names from the per-adapter section headings: '### <adapter> — ...'.

    Only counts subsections inside '## 6. Per-adapter reference'.
    """
    section_re = re.compile(
        r"^## 6\. Per-adapter reference\s*\n(.*?)(?=^## )",
        re.DOTALL | re.MULTILINE,
    )
    m = section_re.search(doc)
    assert m, "doc missing '## 6. Per-adapter reference' section"
    section = m.group(1)

    heading_re = re.compile(r"^### ([a-z0-9_]+) — ", re.MULTILINE)
    return heading_re.findall(section)


def test_doc_exists():
    assert DOC_PATH.is_file(), f"doc missing: {DOC_PATH}"


def test_stream_table_matches_registry():
    """Every StreamEntry in streams.py must appear in the doc's stream layout table."""
    doc_rows = _stream_layout_rows(_doc_text())
    doc_names = {n for n, _ in doc_rows}
    doc_filters = {f for _, f in doc_rows}

    code_names = {s.name for s in STREAMS}
    code_filters = {s.subject_filter for s in STREAMS}

    assert doc_names == code_names, (
        f"stream-name drift: doc-only={doc_names - code_names}, "
        f"code-only={code_names - doc_names}"
    )
    assert doc_filters == code_filters, (
        f"subject-filter drift: doc-only={doc_filters - code_filters}, "
        f"code-only={code_filters - doc_filters}"
    )


def test_stream_table_name_subject_pairs_consistent():
    """Each (stream_name, subject_filter) pair in the doc must match the registry exactly.

    Catches the case where someone swaps the subject filter on one stream
    without updating its row.
    """
    doc_rows = set(_stream_layout_rows(_doc_text()))
    code_rows = {(s.name, s.subject_filter) for s in STREAMS}
    assert doc_rows == code_rows, (
        f"row drift: doc-only={doc_rows - code_rows}, code-only={code_rows - doc_rows}"
    )


def test_every_adapter_has_a_subsection():
    """Every adapter discovered in central.adapters must have a per-adapter doc subsection."""
    doc_adapters = set(_per_adapter_subsections(_doc_text()))
    code_adapters = set(discover_adapters().keys())
    assert doc_adapters == code_adapters, (
        f"adapter coverage drift: doc-only={doc_adapters - code_adapters}, "
        f"code-only={code_adapters - doc_adapters}"
    )


def test_subsections_appear_in_doc_order_matches_registry_size():
    """Sanity: the count of '### <adapter>' headings inside §6 equals the registry size.

    Independent count check; catches the case where one heading is duplicated.
    """
    doc_adapters = _per_adapter_subsections(_doc_text())
    assert len(doc_adapters) == len(set(doc_adapters)), (
        f"duplicate per-adapter sections: {[a for a in doc_adapters if doc_adapters.count(a) > 1]}"
    )
    assert len(doc_adapters) == len(discover_adapters())


def test_castle_rock_legacy_adapters_remain_removed():
    """v0.10.3 regression guard: ``state_511_atis`` and ``state_511_atis_cameras``
    were ripped out because the Castle Rock legacy ``/map/mapIcons/`` +
    ``/List/GetData/`` shape is end-of-life on the only Idaho source we cared
    about (Idaho 511) -- the official ITD adapters (``itd_511`` + ``itd_511_cameras``,
    v0.10.0) supersede them. The sister-site discovery confirmed no other
    Castle Rock customer still exposes the legacy shape that this adapter pair
    consumed. Re-adding either module would resurrect a dying-upstream dependency."""
    registry = discover_adapters()
    assert "state_511_atis" not in registry, (
        "state_511_atis was removed in v0.10.3; use itd_511 (v0.10.0) instead"
    )
    assert "state_511_atis_cameras" not in registry, (
        "state_511_atis_cameras was removed in v0.10.3; use itd_511_cameras (v0.10.0) instead"
    )
    adapters_dir = Path(__file__).resolve().parents[1] / "src" / "central" / "adapters"
    assert not (adapters_dir / "state_511_atis.py").exists(), (
        "state_511_atis.py was removed in v0.10.3; do not re-add"
    )
    assert not (adapters_dir / "state_511_atis_cameras.py").exists(), (
        "state_511_atis_cameras.py was removed in v0.10.3; do not re-add"
    )
docs(2-H): consumer integration spec — docs/CONSUMER-INTEGRATION.md (#38) Adds the consumer contract for Central's NATS event streams. Primary reader: a Claude Code instance building MeshAI's ingestion layer. The doc IS the spec -- no "see source for details". Opens with Matt's framing: "Central takes it all and gives it all. It's up to the pipe to do with it what it will." Central is a faithful firehose -- adapters preserve every upstream field with no enrichment / formatting / opinionated translation. The CloudEvents envelope adds routing + dedup support; everything else is upstream-shaped. Where the doc lists upstream lookup endpoints for ID-only fields, that is consumer-side convenience -- explicitly NOT a recommendation that Central enrich. Sections (11 total): 1. Quick start (5-line nats-py subscribe-and-print) 2. Connection details (URL / auth / JetStream context / stream discovery) 3. Stream layout (7 streams, derived from streams.py registry) 4. Subject namespace registry (Mermaid tree + full pattern table) 5. Wire format (5a CloudEvents envelope; 5b inner Event payload) -- explicit callout that geo.centroid is [lon, lat] GeoJSON, NOT [lat, lon] 6. Per-adapter reference (12 subsections, locked template) 7. Fall-off / removal semantics (explicit subjects vs absence-as-signal) 8. Consumer patterns (durable vs ephemeral, ack/nack/term, worked example) 9. Dedup implementation guide (single-token vs composite-key adapters) 10. Writing a new consumer checklist 11. Troubleshooting Doc length: 1878 lines (target was 600-1000 originally; revised to 1200-1800 once full-fidelity JSON examples + inciweb 3x narratives + wfigs_perimeters polygon were folded in). Completeness wins per the design principle. Every JSON example is verbatim from CT104. 11 examples sourced from /tmp/nwis-build/evidence.txt (dumped via psql jsonb_pretty); the wfigs_perimeters example is a freshly pulled smallest-active-polygon record so the doc captures the live polygon shape without flooding the page with thousands of coordinate pairs. The doc is assembled by /tmp/nwis-build/build_doc.py which splices live JSON blocks into a markdown template. The build script is local-only (not committed) because the doc itself is the artifact; future updates regenerate by re-pulling live evidence and re-running the assembler. New test: tests/test_consumer_doc.py (5 tests). Parses the doc and asserts: - The "Stream layout" table matches central.streams.STREAMS exactly (stream names + subject filters). - The (name, subject_filter) pairs match the registry as pairs (catches swapped subject filters on existing streams). - Every adapter discovered via central.adapter_discovery.discover_adapters() has a per-adapter subsection -- and vice versa. - The subsection count equals the registry size (catches duplicates). Verification: - 463/463 full suite green (was 458; +5 new consumer_doc tests). - Doc structure: 1 H1, 12 H2, 33 H3, 12 per-adapter sections, 1 mermaid block, 12 JSON blocks (all parse). - All 12 adapters covered. - No regressions elsewhere. Acceptance bars (a)-(e) verbatim: (a) grep "subject_for_event\|_ADAPTER_REGISTRY" -> empty (b) all 12 adapters have per-adapter subsections (c) 5/5 consumer-doc tests pass (d) 463/463 full suite (e) doc length 1878 lines markdownlint was not available on CT104; substituted an inline Python sanity check confirming code-fence balance, JSON-block validity, and structural integrity (12 H2 / 33 H3 / 1 mermaid). Co-authored-by: zvx <zvx@central> 2026-05-19 14:33:51 -06:00			`"""Consistency tests for docs/CONSUMER-INTEGRATION.md.`

			`The doc is the consumer contract. These tests catch drift between the doc and`
			`the live code:`

			`- Every StreamEntry in src/central/streams.py must appear in the doc's`
			`"Stream layout" table — and vice versa.`
			`- Every adapter discovered via central.adapter_discovery.discover_adapters()`
			`must have a per-adapter subsection (### <name>) in the doc — and vice versa.`

			`The doc's subject patterns and per-adapter copy are NOT directly asserted (they`
			`are operator-readable prose, not machine-parsable); the registry-level checks`
			`guard against the most common drift (adding a stream / adapter and forgetting`
			`to document it).`
			`"""`

			`import re`
			`from pathlib import Path`

			`from central.adapter_discovery import discover_adapters`
			`from central.streams import STREAMS`

			`DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "CONSUMER-INTEGRATION.md"`


			`def _doc_text() -> str:`
			`assert DOC_PATH.is_file(), f"missing: {DOC_PATH}"`
			`return DOC_PATH.read_text()`


			`def _stream_layout_rows(doc: str) -> list[tuple[str, str]]:`
			`"""Parse the doc's "Stream layout" table -> list of (stream_name, subject_filter)."""`
			`section_re = re.compile(`
			`r"^## 3\. Stream layout\s\n(.?)(?=^## )",`
			`re.DOTALL \| re.MULTILINE,`
			`)`
			`m = section_re.search(doc)`
			`assert m, "doc missing '## 3. Stream layout' section"`
			`section = m.group(1)`

			`rows: list[tuple[str, str]] = []`
			# Each row: \| `CENTRAL_XX` \| `central.xx.>` \| ... \|
			row_re = re.compile(r"^\\|\s`([A-Z_]+)`\s\\|\s`(central\.[a-z_]+\.>)`\s\\|", re.MULTILINE)
			`for name, subj in row_re.findall(section):`
			`rows.append((name, subj))`
			`return rows`


			`def _per_adapter_subsections(doc: str) -> list[str]:`
			`"""Pull adapter names from the per-adapter section headings: '### <adapter> — ...'.`

			`Only counts subsections inside '## 6. Per-adapter reference'.`
			`"""`
			`section_re = re.compile(`
			`r"^## 6\. Per-adapter reference\s\n(.?)(?=^## )",`
			`re.DOTALL \| re.MULTILINE,`
			`)`
			`m = section_re.search(doc)`
			`assert m, "doc missing '## 6. Per-adapter reference' section"`
			`section = m.group(1)`

feat(state_511_atis): Castle Rock 511 adapter — Idaho incidents/closures/road work (v0.9.2) Second CENTRAL_TRAFFIC adapter. Production code; central-supervisor + central-gui restart (new adapter class + ADAPTER_GROUPS). No new stream -> no archive restart; migration 026 adds the adapter row only. Ships disabled. Two-endpoint join per layer: GET /map/mapIcons/<Layer> (markers: itemId + coords) joined on id with POST /List/GetData/<Layer> (DataTables detail: roadwayName, description, county, severity). The marker feed has coords but no text; the List feed has text but no coords. Layers -> event_types (wzdx category/subject precedent): Incidents->incident, Closures->closure, Construction (type "Roadwork")->work_zone. category is "<event_type>.state_511_atis"; subject central.traffic.<event_type>.<state>. Severity 3 if isFullClosure else 1. Cadence 300s. Dedup inherited from the v0.9.1 SourceAdapter mixin. enrichment_locations canonical (latitude,longitude) from the marker join; county/state come upstream. Templatized per state via settings {"states":[{code,base_url}]} but ships Idaho-only: cross-state spot-checks refuted the shared-URL hypothesis (Oregon TripCheck is HTML, Wyoming wyoroad 404 -- neither is Castle Rock). Add states as settings rows once each host is verified. Also fixes a latent test bug: test_consumer_doc per-adapter heading regex was [a-z_]+ (no digits); state_511_atis is the first adapter name with digits, so widened to [a-z0-9_]+. Full suite: 759 passed, 1 skipped (central and unprivileged zvx, 3x each). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-25 22:01:11 +00:00			`heading_re = re.compile(r"^### ([a-z0-9_]+) — ", re.MULTILINE)`
docs(2-H): consumer integration spec — docs/CONSUMER-INTEGRATION.md (#38) Adds the consumer contract for Central's NATS event streams. Primary reader: a Claude Code instance building MeshAI's ingestion layer. The doc IS the spec -- no "see source for details". Opens with Matt's framing: "Central takes it all and gives it all. It's up to the pipe to do with it what it will." Central is a faithful firehose -- adapters preserve every upstream field with no enrichment / formatting / opinionated translation. The CloudEvents envelope adds routing + dedup support; everything else is upstream-shaped. Where the doc lists upstream lookup endpoints for ID-only fields, that is consumer-side convenience -- explicitly NOT a recommendation that Central enrich. Sections (11 total): 1. Quick start (5-line nats-py subscribe-and-print) 2. Connection details (URL / auth / JetStream context / stream discovery) 3. Stream layout (7 streams, derived from streams.py registry) 4. Subject namespace registry (Mermaid tree + full pattern table) 5. Wire format (5a CloudEvents envelope; 5b inner Event payload) -- explicit callout that geo.centroid is [lon, lat] GeoJSON, NOT [lat, lon] 6. Per-adapter reference (12 subsections, locked template) 7. Fall-off / removal semantics (explicit subjects vs absence-as-signal) 8. Consumer patterns (durable vs ephemeral, ack/nack/term, worked example) 9. Dedup implementation guide (single-token vs composite-key adapters) 10. Writing a new consumer checklist 11. Troubleshooting Doc length: 1878 lines (target was 600-1000 originally; revised to 1200-1800 once full-fidelity JSON examples + inciweb 3x narratives + wfigs_perimeters polygon were folded in). Completeness wins per the design principle. Every JSON example is verbatim from CT104. 11 examples sourced from /tmp/nwis-build/evidence.txt (dumped via psql jsonb_pretty); the wfigs_perimeters example is a freshly pulled smallest-active-polygon record so the doc captures the live polygon shape without flooding the page with thousands of coordinate pairs. The doc is assembled by /tmp/nwis-build/build_doc.py which splices live JSON blocks into a markdown template. The build script is local-only (not committed) because the doc itself is the artifact; future updates regenerate by re-pulling live evidence and re-running the assembler. New test: tests/test_consumer_doc.py (5 tests). Parses the doc and asserts: - The "Stream layout" table matches central.streams.STREAMS exactly (stream names + subject filters). - The (name, subject_filter) pairs match the registry as pairs (catches swapped subject filters on existing streams). - Every adapter discovered via central.adapter_discovery.discover_adapters() has a per-adapter subsection -- and vice versa. - The subsection count equals the registry size (catches duplicates). Verification: - 463/463 full suite green (was 458; +5 new consumer_doc tests). - Doc structure: 1 H1, 12 H2, 33 H3, 12 per-adapter sections, 1 mermaid block, 12 JSON blocks (all parse). - All 12 adapters covered. - No regressions elsewhere. Acceptance bars (a)-(e) verbatim: (a) grep "subject_for_event\|_ADAPTER_REGISTRY" -> empty (b) all 12 adapters have per-adapter subsections (c) 5/5 consumer-doc tests pass (d) 463/463 full suite (e) doc length 1878 lines markdownlint was not available on CT104; substituted an inline Python sanity check confirming code-fence balance, JSON-block validity, and structural integrity (12 H2 / 33 H3 / 1 mermaid). Co-authored-by: zvx <zvx@central> 2026-05-19 14:33:51 -06:00			`return heading_re.findall(section)`


			`def test_doc_exists():`
			`assert DOC_PATH.is_file(), f"doc missing: {DOC_PATH}"`


			`def test_stream_table_matches_registry():`
			`"""Every StreamEntry in streams.py must appear in the doc's stream layout table."""`
			`doc_rows = _stream_layout_rows(_doc_text())`
			`doc_names = {n for n, _ in doc_rows}`
			`doc_filters = {f for _, f in doc_rows}`

			`code_names = {s.name for s in STREAMS}`
			`code_filters = {s.subject_filter for s in STREAMS}`

			`assert doc_names == code_names, (`
			`f"stream-name drift: doc-only={doc_names - code_names}, "`
			`f"code-only={code_names - doc_names}"`
			`)`
			`assert doc_filters == code_filters, (`
			`f"subject-filter drift: doc-only={doc_filters - code_filters}, "`
			`f"code-only={code_filters - doc_filters}"`
			`)`


			`def test_stream_table_name_subject_pairs_consistent():`
			`"""Each (stream_name, subject_filter) pair in the doc must match the registry exactly.`

			`Catches the case where someone swaps the subject filter on one stream`
			`without updating its row.`
			`"""`
			`doc_rows = set(_stream_layout_rows(_doc_text()))`
			`code_rows = {(s.name, s.subject_filter) for s in STREAMS}`
			`assert doc_rows == code_rows, (`
			`f"row drift: doc-only={doc_rows - code_rows}, code-only={code_rows - doc_rows}"`
			`)`


			`def test_every_adapter_has_a_subsection():`
			`"""Every adapter discovered in central.adapters must have a per-adapter doc subsection."""`
			`doc_adapters = set(_per_adapter_subsections(_doc_text()))`
			`code_adapters = set(discover_adapters().keys())`
			`assert doc_adapters == code_adapters, (`
			`f"adapter coverage drift: doc-only={doc_adapters - code_adapters}, "`
			`f"code-only={code_adapters - doc_adapters}"`
			`)`


			`def test_subsections_appear_in_doc_order_matches_registry_size():`
			`"""Sanity: the count of '### <adapter>' headings inside §6 equals the registry size.`

			`Independent count check; catches the case where one heading is duplicated.`
			`"""`
			`doc_adapters = _per_adapter_subsections(_doc_text())`
			`assert len(doc_adapters) == len(set(doc_adapters)), (`
			`f"duplicate per-adapter sections: {[a for a in doc_adapters if doc_adapters.count(a) > 1]}"`
			`)`
			`assert len(doc_adapters) == len(discover_adapters())`
v0.10.3: rip out state_511_atis adapter (superseded by itd_511 v0.10.0; Castle Rock legacy shape EOL per sister-site discovery) (#88) Closes #88 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-06-06 14:44:00 -06:00

			`def test_castle_rock_legacy_adapters_remain_removed():`
			"""v0.10.3 regression guard: ``state_511_atis`` and ``state_511_atis_cameras``
			were ripped out because the Castle Rock legacy ``/map/mapIcons/`` +
			``/List/GetData/`` shape is end-of-life on the only Idaho source we cared
			about (Idaho 511) -- the official ITD adapters (``itd_511`` + ``itd_511_cameras``,
			`v0.10.0) supersede them. The sister-site discovery confirmed no other`
			`Castle Rock customer still exposes the legacy shape that this adapter pair`
			`consumed. Re-adding either module would resurrect a dying-upstream dependency."""`
			`registry = discover_adapters()`
			`assert "state_511_atis" not in registry, (`
			`"state_511_atis was removed in v0.10.3; use itd_511 (v0.10.0) instead"`
			`)`
			`assert "state_511_atis_cameras" not in registry, (`
			`"state_511_atis_cameras was removed in v0.10.3; use itd_511_cameras (v0.10.0) instead"`
			`)`
			`adapters_dir = Path(__file__).resolve().parents[1] / "src" / "central" / "adapters"`
			`assert not (adapters_dir / "state_511_atis.py").exists(), (`
			`"state_511_atis.py was removed in v0.10.3; do not re-add"`
			`)`
			`assert not (adapters_dir / "state_511_atis_cameras.py").exists(), (`
			`"state_511_atis_cameras.py was removed in v0.10.3; do not re-add"`
			`)`