mirror of
https://github.com/zvx-echo6/central.git
synced 2026-05-21 18:14:44 +02:00
The producer-side contract for adapter authors, mirroring PR H's consumer
spec. Self-contained — readers should not need to grep the codebase to
understand what a new SourceAdapter subclass must implement.
Bakes in the Phase 2 design principle ("Central takes it all and gives it
all. It's up to the pipe to do with it what it will.") so future authors
reject enrichment / silent-drop / opinionated-translation proposals on
sight. The previously-proposed Phase 3 NWIS metadata-enrichment ticket is
called out by name as an example of what gets rejected.
12-section outline locked with PM: design principle, quick start (clone
swpc_kindex), SourceAdapter base class, settings, subject namespace,
dedup keys, StreamEntry registry, removal/fall-off, anti-patterns,
preview hook, acceptance gate.
Sibling test (tests/test_producer_doc.py) mirrors test_consumer_doc.py
discipline:
- bidirectional == between SourceAdapter API and §4 method coverage
- preview_for_settings contract verbatim against live docstring
- top-level domain enumeration vs central.streams.STREAMS prefixes
- §8 STREAMS snippet vs central.streams.STREAMS
- anti-patterns adapter-name examples vs discover_adapters()
No hardcoded stream / adapter / domain lists anywhere in the test —
every expected value derives from central.streams,
central.adapter_discovery, or central.adapter at runtime.
Honest about the pre-existing `:` vs `|` dedup-key separator
inconsistency (swpc_alerts and swpc_protons use `|`; everyone else
uses `:`). Recommends `:` for new adapters without forcing a rename PR
on the SWPC pair (separators are persisted in cursors.db rows).
Acceptance bars:
(a) grep -rn 'subject_for_event\|_ADAPTER_REGISTRY' src tests → empty
(b) bidirectional override-method coverage asserted in test
(c) tests/test_producer_doc.py → 6/6 pass
(d) full pytest suite → 469 pass (was 463 pre-PR; +6 new)
(e) doc length: 823 lines (within 500–1200 envelope)
(f) code fences balanced; JSON/Python blocks parse
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
215 lines
8.1 KiB
Python
215 lines
8.1 KiB
Python
"""Consistency tests for docs/PRODUCER-INTEGRATION.md.
|
|
|
|
The doc is the producer-side contract — what an adapter author implements and
|
|
the conventions Central enforces around it. These tests catch drift between
|
|
the doc and the live code:
|
|
|
|
- Every overridable SourceAdapter method documented in §4 must exist on
|
|
central.adapter.SourceAdapter — and vice versa.
|
|
- The preview_for_settings contract quoted in §11.1 must come from the
|
|
actual SourceAdapter.preview_for_settings docstring.
|
|
- The set of top-level domain tokens documented in §6.1 must equal the set
|
|
derived from central.streams.STREAMS subject_filter prefixes.
|
|
- The verbatim STREAMS snippet quoted in §8 must match the live registry.
|
|
|
|
Per the doc's own §10.4, NO hardcoded stream / adapter list literals: every
|
|
expected value derives from central.streams, central.adapter, or
|
|
central.adapter_discovery at runtime.
|
|
"""
|
|
|
|
import inspect
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from central.adapter import SourceAdapter
|
|
from central.adapter_discovery import discover_adapters
|
|
from central.streams import STREAMS
|
|
|
|
DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "PRODUCER-INTEGRATION.md"
|
|
|
|
|
|
def _doc_text() -> str:
|
|
assert DOC_PATH.is_file(), f"missing: {DOC_PATH}"
|
|
return DOC_PATH.read_text()
|
|
|
|
|
|
def _documented_override_methods(doc: str) -> set[str]:
|
|
"""Extract method names documented under '## 4. The SourceAdapter base class'.
|
|
|
|
Looks for the '**`async def <name>(...)`**' / '**`def <name>(...)`**'
|
|
method headings inside §4.
|
|
"""
|
|
section_re = re.compile(
|
|
r"^## 4\. The SourceAdapter base class\s*\n(.*?)(?=^## )",
|
|
re.DOTALL | re.MULTILINE,
|
|
)
|
|
m = section_re.search(doc)
|
|
assert m, "doc missing '## 4. The SourceAdapter base class' section"
|
|
section = m.group(1)
|
|
heading_re = re.compile(r"\*\*`(?:async\s+)?def\s+(\w+)\s*\(", re.MULTILINE)
|
|
return set(heading_re.findall(section))
|
|
|
|
|
|
def _sourceadapter_overridable_methods() -> set[str]:
|
|
"""Methods on SourceAdapter that an adapter author is expected to implement
|
|
or may override. Excludes Python internals (dunder), the constructor, and
|
|
private helpers.
|
|
"""
|
|
methods: set[str] = set()
|
|
for name, member in inspect.getmembers(SourceAdapter):
|
|
if name.startswith("_"):
|
|
continue
|
|
if not (inspect.isfunction(member) or inspect.iscoroutinefunction(member)):
|
|
continue
|
|
methods.add(name)
|
|
return methods
|
|
|
|
|
|
def _streams_domains() -> set[str]:
|
|
"""Top-level <domain> tokens derived from STREAMS subject filters
|
|
(central.<domain>.>).
|
|
"""
|
|
domain_re = re.compile(r"^central\.([a-z_]+)\.>$")
|
|
out: set[str] = set()
|
|
for s in STREAMS:
|
|
m = domain_re.match(s.subject_filter)
|
|
assert m, f"unexpected subject filter shape: {s.subject_filter!r}"
|
|
out.add(m.group(1))
|
|
return out
|
|
|
|
|
|
def _documented_domains(doc: str) -> set[str]:
|
|
"""Domain tokens enumerated in §6.1 as backtick literals (`wx`, `fire`, …)."""
|
|
section_re = re.compile(
|
|
r"`<domain>` is one of ([^.]+)\.",
|
|
re.DOTALL,
|
|
)
|
|
m = section_re.search(doc)
|
|
assert m, "doc missing the '`<domain>` is one of ...' enumeration in §6.1"
|
|
enum_text = m.group(1)
|
|
return set(re.findall(r"`([a-z_]+)`", enum_text))
|
|
|
|
|
|
def test_doc_exists():
|
|
assert DOC_PATH.is_file(), f"doc missing: {DOC_PATH}"
|
|
|
|
|
|
def test_documented_methods_match_sourceadapter_api():
|
|
"""Every override-able SourceAdapter method must appear in the §4 contract,
|
|
and the doc may not advertise methods that don't exist."""
|
|
doc_methods = _documented_override_methods(_doc_text())
|
|
code_methods = _sourceadapter_overridable_methods()
|
|
assert doc_methods == code_methods, (
|
|
f"override-method drift: "
|
|
f"doc-only={doc_methods - code_methods}, "
|
|
f"code-only={code_methods - doc_methods}"
|
|
)
|
|
|
|
|
|
def test_preview_hook_contract_matches_docstring():
|
|
"""The contract block quoted in §11.1 must come from the live
|
|
SourceAdapter.preview_for_settings docstring.
|
|
|
|
Normalizes both sides by collapsing whitespace and stripping the doc's
|
|
Markdown blockquote prefix (`> `).
|
|
"""
|
|
doc = _doc_text()
|
|
section_re = re.compile(
|
|
r"^### 11\.1[^\n]*\n(.*?)(?=^### |^## )",
|
|
re.DOTALL | re.MULTILINE,
|
|
)
|
|
m = section_re.search(doc)
|
|
assert m, "doc missing '### 11.1' subsection"
|
|
blockquote = "\n".join(
|
|
line[2:] if line.startswith("> ") else line.lstrip(">").lstrip()
|
|
for line in m.group(1).splitlines()
|
|
if line.lstrip().startswith(">")
|
|
)
|
|
docstring = inspect.getdoc(SourceAdapter.preview_for_settings) or ""
|
|
|
|
def norm(s: str) -> str:
|
|
# Strip markdown backticks; collapse whitespace.
|
|
return re.sub(r"\s+", " ", s.replace("`", "")).strip()
|
|
|
|
norm_block = norm(blockquote)
|
|
norm_doc = norm(docstring)
|
|
# Bidirectional: every non-empty sentence of the docstring must appear in
|
|
# the doc's blockquote, and the blockquote must not introduce new sentences
|
|
# the docstring lacks.
|
|
sentences = lambda s: {x.strip() for x in re.split(r"(?<=[.:])\s+", s) if x.strip()}
|
|
doc_sents = sentences(norm_block)
|
|
code_sents = sentences(norm_doc)
|
|
assert doc_sents == code_sents, (
|
|
f"preview_for_settings contract drift: "
|
|
f"doc-only={doc_sents - code_sents}, "
|
|
f"code-only={code_sents - doc_sents}"
|
|
)
|
|
|
|
|
|
def test_top_level_domains_match_streams_registry():
|
|
"""The §6.1 domain enumeration must equal the domain tokens derived from
|
|
central.streams.STREAMS — bidirectional, no hardcoded list."""
|
|
doc_domains = _documented_domains(_doc_text())
|
|
code_domains = _streams_domains()
|
|
assert doc_domains == code_domains, (
|
|
f"domain-token drift: "
|
|
f"doc-only={doc_domains - code_domains}, "
|
|
f"code-only={code_domains - doc_domains}"
|
|
)
|
|
|
|
|
|
def test_streams_snippet_quotes_live_registry():
|
|
"""The §8 verbatim STREAMS snippet must agree with central.streams.STREAMS
|
|
on (name, subject_filter, event_bearing).
|
|
"""
|
|
doc = _doc_text()
|
|
section_re = re.compile(
|
|
r"^## 8\. The StreamEntry registry\s*\n(.*?)(?=^## )",
|
|
re.DOTALL | re.MULTILINE,
|
|
)
|
|
m = section_re.search(doc)
|
|
assert m, "doc missing '## 8. The StreamEntry registry' section"
|
|
section = m.group(1)
|
|
# Each documented entry: StreamEntry("NAME", "central.x.>"[, event_bearing=False])
|
|
entry_re = re.compile(
|
|
r'StreamEntry\(\s*"([A-Z_]+)"\s*,\s*"(central\.[a-z_]+\.>)"'
|
|
r'(?:\s*,\s*event_bearing\s*=\s*(False|True))?\s*\)',
|
|
)
|
|
doc_rows: set[tuple[str, str, bool]] = set()
|
|
for name, subj, eb in entry_re.findall(section):
|
|
event_bearing = (eb != "False") # default True if unspecified
|
|
doc_rows.add((name, subj, event_bearing))
|
|
code_rows = {(s.name, s.subject_filter, s.event_bearing) for s in STREAMS}
|
|
assert doc_rows == code_rows, (
|
|
f"STREAMS snippet drift: "
|
|
f"doc-only={doc_rows - code_rows}, code-only={code_rows - doc_rows}"
|
|
)
|
|
|
|
|
|
def test_no_orphan_adapter_references_in_anti_patterns():
|
|
"""Anti-patterns section names two real adapter modules as examples
|
|
(firms, inciweb in §10.4). Those names must still resolve via
|
|
central.adapter_discovery — protects against a silent rename leaving
|
|
dead example references in the doc.
|
|
"""
|
|
doc = _doc_text()
|
|
section_re = re.compile(
|
|
r"^## 10\. Anti-patterns.*?\n(.*?)(?=^## )",
|
|
re.DOTALL | re.MULTILINE,
|
|
)
|
|
m = section_re.search(doc)
|
|
assert m, "doc missing '## 10. Anti-patterns' section"
|
|
section = m.group(1)
|
|
quoted = set(re.findall(r'"([a-z][a-z_]*)"', section))
|
|
# Whitelist Python-syntax tokens that incidentally appear in the section;
|
|
# everything else in this set is asserted to be a real adapter name.
|
|
syntax_tokens = {
|
|
"CENTRAL_WX", "CENTRAL_FIRE", # stream names quoted as examples
|
|
}
|
|
candidate_adapter_names = quoted - {t.lower() for t in syntax_tokens}
|
|
known_adapters = set(discover_adapters().keys())
|
|
orphans = {n for n in candidate_adapter_names if n not in known_adapters}
|
|
assert not orphans, (
|
|
f"anti-patterns section references unknown adapter names: {orphans} "
|
|
f"(known adapters: {sorted(known_adapters)})"
|
|
)
|