"""Consistency tests for docs/PRODUCER-INTEGRATION.md. The doc is the producer-side contract — what an adapter author implements and the conventions Central enforces around it. These tests catch drift between the doc and the live code: - Every overridable SourceAdapter method documented in §4 must exist on central.adapter.SourceAdapter — and vice versa. - The preview_for_settings contract quoted in §11.1 must come from the actual SourceAdapter.preview_for_settings docstring. - The set of top-level domain tokens documented in §6.1 must equal the set derived from central.streams.STREAMS subject_filter prefixes. - The verbatim STREAMS snippet quoted in §8 must match the live registry. Per the doc's own §10.4, NO hardcoded stream / adapter list literals: every expected value derives from central.streams, central.adapter, or central.adapter_discovery at runtime. """ import inspect import re from pathlib import Path from central.adapter import SourceAdapter from central.adapter_discovery import discover_adapters from central.streams import STREAMS DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "PRODUCER-INTEGRATION.md" def _doc_text() -> str: assert DOC_PATH.is_file(), f"missing: {DOC_PATH}" return DOC_PATH.read_text() def _documented_override_methods(doc: str) -> set[str]: """Extract method names documented under '## 4. The SourceAdapter base class'. Looks for the '**`async def (...)`**' / '**`def (...)`**' method headings inside §4. """ section_re = re.compile( r"^## 4\. The SourceAdapter base class\s*\n(.*?)(?=^## )", re.DOTALL | re.MULTILINE, ) m = section_re.search(doc) assert m, "doc missing '## 4. The SourceAdapter base class' section" section = m.group(1) heading_re = re.compile(r"\*\*`(?:async\s+)?def\s+(\w+)\s*\(", re.MULTILINE) return set(heading_re.findall(section)) def _sourceadapter_overridable_methods() -> set[str]: """Methods on SourceAdapter that an adapter author is expected to implement or may override. Excludes Python internals (dunder), the constructor, and private helpers. """ methods: set[str] = set() for name, member in inspect.getmembers(SourceAdapter): if name.startswith("_"): continue if not (inspect.isfunction(member) or inspect.iscoroutinefunction(member)): continue methods.add(name) return methods def _streams_domains() -> set[str]: """Top-level tokens derived from STREAMS subject filters (central..>). """ domain_re = re.compile(r"^central\.([a-z_]+)\.>$") out: set[str] = set() for s in STREAMS: m = domain_re.match(s.subject_filter) assert m, f"unexpected subject filter shape: {s.subject_filter!r}" out.add(m.group(1)) return out def _documented_domains(doc: str) -> set[str]: """Domain tokens enumerated in §6.1 as backtick literals (`wx`, `fire`, …).""" section_re = re.compile( r"`` is one of ([^.]+)\.", re.DOTALL, ) m = section_re.search(doc) assert m, "doc missing the '`` is one of ...' enumeration in §6.1" enum_text = m.group(1) return set(re.findall(r"`([a-z_]+)`", enum_text)) def test_doc_exists(): assert DOC_PATH.is_file(), f"doc missing: {DOC_PATH}" def test_documented_methods_match_sourceadapter_api(): """Every override-able SourceAdapter method must appear in the §4 contract, and the doc may not advertise methods that don't exist.""" doc_methods = _documented_override_methods(_doc_text()) code_methods = _sourceadapter_overridable_methods() assert doc_methods == code_methods, ( f"override-method drift: " f"doc-only={doc_methods - code_methods}, " f"code-only={code_methods - doc_methods}" ) def test_preview_hook_contract_matches_docstring(): """The contract block quoted in §11.1 must come from the live SourceAdapter.preview_for_settings docstring. Normalizes both sides by collapsing whitespace and stripping the doc's Markdown blockquote prefix (`> `). """ doc = _doc_text() section_re = re.compile( r"^### 11\.1[^\n]*\n(.*?)(?=^### |^## )", re.DOTALL | re.MULTILINE, ) m = section_re.search(doc) assert m, "doc missing '### 11.1' subsection" blockquote = "\n".join( line[2:] if line.startswith("> ") else line.lstrip(">").lstrip() for line in m.group(1).splitlines() if line.lstrip().startswith(">") ) docstring = inspect.getdoc(SourceAdapter.preview_for_settings) or "" def norm(s: str) -> str: # Strip markdown backticks; collapse whitespace. return re.sub(r"\s+", " ", s.replace("`", "")).strip() norm_block = norm(blockquote) norm_doc = norm(docstring) # Bidirectional: every non-empty sentence of the docstring must appear in # the doc's blockquote, and the blockquote must not introduce new sentences # the docstring lacks. sentences = lambda s: {x.strip() for x in re.split(r"(?<=[.:])\s+", s) if x.strip()} doc_sents = sentences(norm_block) code_sents = sentences(norm_doc) assert doc_sents == code_sents, ( f"preview_for_settings contract drift: " f"doc-only={doc_sents - code_sents}, " f"code-only={code_sents - doc_sents}" ) def test_top_level_domains_match_streams_registry(): """The §6.1 domain enumeration must equal the domain tokens derived from central.streams.STREAMS — bidirectional, no hardcoded list.""" doc_domains = _documented_domains(_doc_text()) code_domains = _streams_domains() assert doc_domains == code_domains, ( f"domain-token drift: " f"doc-only={doc_domains - code_domains}, " f"code-only={code_domains - doc_domains}" ) def test_streams_snippet_quotes_live_registry(): """The §8 verbatim STREAMS snippet must agree with central.streams.STREAMS on (name, subject_filter, event_bearing). """ doc = _doc_text() section_re = re.compile( r"^## 8\. The StreamEntry registry\s*\n(.*?)(?=^## )", re.DOTALL | re.MULTILINE, ) m = section_re.search(doc) assert m, "doc missing '## 8. The StreamEntry registry' section" section = m.group(1) # Each documented entry: StreamEntry("NAME", "central.x.>"[, event_bearing=False]) entry_re = re.compile( r'StreamEntry$\s*"([A-Z_]+)"\s*,\s*"(central\.[a-z_]+\.>)"' r'(?:\s*,\s*event_bearing\s*=\s*(False|True))?\s*$', ) doc_rows: set[tuple[str, str, bool]] = set() for name, subj, eb in entry_re.findall(section): event_bearing = (eb != "False") # default True if unspecified doc_rows.add((name, subj, event_bearing)) code_rows = {(s.name, s.subject_filter, s.event_bearing) for s in STREAMS} assert doc_rows == code_rows, ( f"STREAMS snippet drift: " f"doc-only={doc_rows - code_rows}, code-only={code_rows - doc_rows}" ) def test_no_orphan_adapter_references_in_anti_patterns(): """Anti-patterns section names two real adapter modules as examples (firms, inciweb in §10.4). Those names must still resolve via central.adapter_discovery — protects against a silent rename leaving dead example references in the doc. """ doc = _doc_text() section_re = re.compile( r"^## 10\. Anti-patterns.*?\n(.*?)(?=^## )", re.DOTALL | re.MULTILINE, ) m = section_re.search(doc) assert m, "doc missing '## 10. Anti-patterns' section" section = m.group(1) quoted = set(re.findall(r'"([a-z][a-z_]*)"', section)) # Whitelist Python-syntax tokens that incidentally appear in the section; # everything else in this set is asserted to be a real adapter name. # Derived from STREAMS per §10.4 — stream names appear quoted as examples # and would otherwise look like orphan adapter references. syntax_tokens = {s.name for s in STREAMS} candidate_adapter_names = quoted - {t.lower() for t in syntax_tokens} known_adapters = set(discover_adapters().keys()) orphans = {n for n in candidate_adapter_names if n not in known_adapters} assert not orphans, ( f"anti-patterns section references unknown adapter names: {orphans} " f"(known adapters: {sorted(known_adapters)})" )