refactor(archive): use bootstrap_config for connection strings

Archive now reads NATS URL and Postgres DSN from bootstrap_config
instead of TOML file. This is sufficient for archive since it only
needs connection strings, not adapter configuration.

No ConfigSource wiring needed - archive just consumes from JetStream.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-05-16 01:55:39 +00:00
commit daa7852cc0

View file

@ -1,342 +1,353 @@
"""Central archive consumer - JetStream to TimescaleDB.""" """Central archive consumer - JetStream to TimescaleDB."""
import asyncio import asyncio
import json import json
import logging import logging
import signal import signal
import sys import sys
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any from typing import Any
import asyncpg import asyncpg
import nats import nats
from nats.js import JetStreamContext from nats.js import JetStreamContext
from nats.js.api import ConsumerConfig, DeliverPolicy, AckPolicy from nats.js.api import ConsumerConfig, DeliverPolicy, AckPolicy
from central.config import load_config, Config from central.bootstrap_config import get_settings
CONFIG_PATH = "/etc/central/central.toml" CONSUMER_NAME = "archive"
CONSUMER_NAME = "archive" STREAM_NAME = "CENTRAL_WX"
STREAM_NAME = "CENTRAL_WX" SUBJECT_FILTER = "central.wx.>"
SUBJECT_FILTER = "central.wx.>" BATCH_SIZE = 100
BATCH_SIZE = 100 FETCH_TIMEOUT = 5.0
FETCH_TIMEOUT = 5.0 ACK_WAIT = 30
ACK_WAIT = 30
class JsonFormatter(logging.Formatter):
class JsonFormatter(logging.Formatter): """JSON log formatter for structured logging."""
"""JSON log formatter for structured logging."""
def format(self, record: logging.LogRecord) -> str:
def format(self, record: logging.LogRecord) -> str: log_obj: dict[str, Any] = {
log_obj: dict[str, Any] = { "ts": datetime.now(timezone.utc).isoformat(),
"ts": datetime.now(timezone.utc).isoformat(), "level": record.levelname,
"level": record.levelname, "logger": record.name,
"logger": record.name, "msg": record.getMessage(),
"msg": record.getMessage(), }
} if record.exc_info:
if record.exc_info: log_obj["exc"] = self.formatException(record.exc_info)
log_obj["exc"] = self.formatException(record.exc_info) for key in record.__dict__:
for key in record.__dict__: if key not in (
if key not in ( "name", "msg", "args", "created", "filename", "funcName",
"name", "msg", "args", "created", "filename", "funcName", "levelname", "levelno", "lineno", "module", "msecs",
"levelname", "levelno", "lineno", "module", "msecs", "pathname", "process", "processName", "relativeCreated",
"pathname", "process", "processName", "relativeCreated", "stack_info", "exc_info", "exc_text", "thread", "threadName",
"stack_info", "exc_info", "exc_text", "thread", "threadName", "taskName", "message",
"taskName", "message", ):
): log_obj[key] = record.__dict__[key]
log_obj[key] = record.__dict__[key] return json.dumps(log_obj)
return json.dumps(log_obj)
def setup_logging() -> None:
def setup_logging() -> None: """Configure JSON logging to stdout."""
"""Configure JSON logging to stdout.""" handler = logging.StreamHandler(sys.stdout)
handler = logging.StreamHandler(sys.stdout) handler.setFormatter(JsonFormatter())
handler.setFormatter(JsonFormatter()) logging.root.handlers = [handler]
logging.root.handlers = [handler] logging.root.setLevel(logging.INFO)
logging.root.setLevel(logging.INFO)
logger = logging.getLogger("central.archive")
logger = logging.getLogger("central.archive")
def _build_geom_sql(geo_data: dict[str, Any] | None) -> str | None:
def _build_geom_sql(geo_data: dict[str, Any] | None) -> str | None: """Build PostGIS geometry from event geo data."""
"""Build PostGIS geometry from event geo data.""" if not geo_data:
if not geo_data: return None
return None
bbox = geo_data.get("bbox")
bbox = geo_data.get("bbox") centroid = geo_data.get("centroid")
centroid = geo_data.get("centroid")
if bbox and len(bbox) == 4:
if bbox and len(bbox) == 4: # Create polygon from bbox
# Create polygon from bbox min_lon, min_lat, max_lon, max_lat = bbox
min_lon, min_lat, max_lon, max_lat = bbox return json.dumps({
return json.dumps({ "type": "Polygon",
"type": "Polygon", "coordinates": [[
"coordinates": [[ [min_lon, min_lat],
[min_lon, min_lat], [max_lon, min_lat],
[max_lon, min_lat], [max_lon, max_lat],
[max_lon, max_lat], [min_lon, max_lat],
[min_lon, max_lat], [min_lon, min_lat],
[min_lon, min_lat], ]]
]] })
}) elif centroid and len(centroid) == 2:
elif centroid and len(centroid) == 2: # Create point from centroid
# Create point from centroid return json.dumps({
return json.dumps({ "type": "Point",
"type": "Point", "coordinates": centroid
"coordinates": centroid })
})
return None
return None
class ArchiveConsumer:
class ArchiveConsumer: """Archive consumer process."""
"""Archive consumer process."""
def __init__(self, nats_url: str, postgres_dsn: str) -> None:
def __init__(self, config: Config) -> None: self._nats_url = nats_url
self.config = config self._postgres_dsn = postgres_dsn
self._nc: nats.NATS | None = None self._nc: nats.NATS | None = None
self._js: JetStreamContext | None = None self._js: JetStreamContext | None = None
self._pool: asyncpg.Pool | None = None self._pool: asyncpg.Pool | None = None
self._shutdown_event = asyncio.Event() self._shutdown_event = asyncio.Event()
async def connect(self) -> None: async def connect(self) -> None:
"""Connect to NATS and PostgreSQL.""" """Connect to NATS and PostgreSQL."""
self._nc = await nats.connect(self.config.nats.url) self._nc = await nats.connect(self._nats_url)
self._js = self._nc.jetstream() self._js = self._nc.jetstream()
logger.info("Connected to NATS", extra={"url": self.config.nats.url}) logger.info("Connected to NATS", extra={"url": self._nats_url})
self._pool = await asyncpg.create_pool( self._pool = await asyncpg.create_pool(
self.config.postgres.dsn, self._postgres_dsn,
min_size=1, min_size=1,
max_size=5, max_size=5,
) )
logger.info("Connected to PostgreSQL") logger.info("Connected to PostgreSQL")
async def disconnect(self) -> None: async def disconnect(self) -> None:
"""Disconnect from NATS and PostgreSQL.""" """Disconnect from NATS and PostgreSQL."""
if self._pool: if self._pool:
await self._pool.close() await self._pool.close()
self._pool = None self._pool = None
if self._nc: if self._nc:
await self._nc.drain() await self._nc.drain()
await self._nc.close() await self._nc.close()
self._nc = None self._nc = None
self._js = None self._js = None
logger.info("Disconnected") logger.info("Disconnected")
async def _ensure_consumer(self) -> None: async def _ensure_consumer(self) -> None:
"""Ensure the durable consumer exists.""" """Ensure the durable consumer exists."""
if not self._js: if not self._js:
return return
try: try:
await self._js.consumer_info(STREAM_NAME, CONSUMER_NAME) await self._js.consumer_info(STREAM_NAME, CONSUMER_NAME)
logger.info("Consumer exists", extra={"consumer": CONSUMER_NAME}) logger.info("Consumer exists", extra={"consumer": CONSUMER_NAME})
except nats.js.errors.NotFoundError: except nats.js.errors.NotFoundError:
consumer_config = ConsumerConfig( consumer_config = ConsumerConfig(
durable_name=CONSUMER_NAME, durable_name=CONSUMER_NAME,
deliver_policy=DeliverPolicy.ALL, deliver_policy=DeliverPolicy.ALL,
ack_policy=AckPolicy.EXPLICIT, ack_policy=AckPolicy.EXPLICIT,
ack_wait=ACK_WAIT, ack_wait=ACK_WAIT,
filter_subject=SUBJECT_FILTER, filter_subject=SUBJECT_FILTER,
) )
await self._js.add_consumer(STREAM_NAME, consumer_config) await self._js.add_consumer(STREAM_NAME, consumer_config)
logger.info("Consumer created", extra={"consumer": CONSUMER_NAME}) logger.info("Consumer created", extra={"consumer": CONSUMER_NAME})
async def _process_message(self, msg: Any, conn: asyncpg.Connection) -> None: async def _process_message(self, msg: Any, conn: asyncpg.Connection) -> None:
"""Process a single message and insert into database.""" """Process a single message and insert into database."""
try: try:
envelope = json.loads(msg.data.decode()) envelope = json.loads(msg.data.decode())
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.warning("Invalid JSON in message", extra={"error": str(e)}) logger.warning("Invalid JSON in message", extra={"error": str(e)})
await msg.ack() await msg.ack()
return return
event_data = envelope.get("data", {}) event_data = envelope.get("data", {})
geo_data = event_data.get("geo") geo_data = event_data.get("geo")
event_id = envelope.get("id") event_id = envelope.get("id")
source = event_data.get("source", "") source = event_data.get("source", "")
category = event_data.get("category", "") category = event_data.get("category", "")
time_str = event_data.get("time") time_str = event_data.get("time")
expires_str = event_data.get("expires") expires_str = event_data.get("expires")
severity = event_data.get("severity") severity = event_data.get("severity")
regions = event_data.get("geo", {}).get("regions", []) regions = event_data.get("geo", {}).get("regions", [])
primary_region = event_data.get("geo", {}).get("primary_region") primary_region = event_data.get("geo", {}).get("primary_region")
# Parse timestamps # Parse timestamps
event_time = None event_time = None
if time_str: if time_str:
try: try:
event_time = datetime.fromisoformat(time_str.replace("Z", "+00:00")) event_time = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
expires_time = None expires_time = None
if expires_str: if expires_str:
try: try:
expires_time = datetime.fromisoformat(expires_str.replace("Z", "+00:00")) expires_time = datetime.fromisoformat(expires_str.replace("Z", "+00:00"))
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
if not event_id or not event_time: if not event_id or not event_time:
logger.warning( logger.warning(
"Message missing required fields", "Message missing required fields",
extra={"id": event_id, "time": time_str} extra={"id": event_id, "time": time_str}
) )
await msg.ack() await msg.ack()
return return
geom_json = _build_geom_sql(geo_data) geom_json = _build_geom_sql(geo_data)
try: try:
if geom_json: if geom_json:
await conn.execute( await conn.execute(
""" """
INSERT INTO events (id, source, category, time, expires, severity, INSERT INTO events (id, source, category, time, expires, severity,
geom, regions, primary_region, payload) geom, regions, primary_region, payload)
VALUES ($1, $2, $3, $4, $5, $6, VALUES ($1, $2, $3, $4, $5, $6,
ST_GeomFromGeoJSON($7), $8, $9, $10) ST_GeomFromGeoJSON($7), $8, $9, $10)
ON CONFLICT (id, time) DO UPDATE SET ON CONFLICT (id, time) DO UPDATE SET
source = EXCLUDED.source, source = EXCLUDED.source,
category = EXCLUDED.category, category = EXCLUDED.category,
expires = EXCLUDED.expires, expires = EXCLUDED.expires,
severity = EXCLUDED.severity, severity = EXCLUDED.severity,
geom = EXCLUDED.geom, geom = EXCLUDED.geom,
regions = EXCLUDED.regions, regions = EXCLUDED.regions,
primary_region = EXCLUDED.primary_region, primary_region = EXCLUDED.primary_region,
payload = EXCLUDED.payload payload = EXCLUDED.payload
""", """,
event_id, source, category, event_time, expires_time, severity, event_id, source, category, event_time, expires_time, severity,
geom_json, regions, primary_region, json.dumps(envelope) geom_json, regions, primary_region, json.dumps(envelope)
) )
else: else:
await conn.execute( await conn.execute(
""" """
INSERT INTO events (id, source, category, time, expires, severity, INSERT INTO events (id, source, category, time, expires, severity,
geom, regions, primary_region, payload) geom, regions, primary_region, payload)
VALUES ($1, $2, $3, $4, $5, $6, NULL, $7, $8, $9) VALUES ($1, $2, $3, $4, $5, $6, NULL, $7, $8, $9)
ON CONFLICT (id, time) DO UPDATE SET ON CONFLICT (id, time) DO UPDATE SET
source = EXCLUDED.source, source = EXCLUDED.source,
category = EXCLUDED.category, category = EXCLUDED.category,
expires = EXCLUDED.expires, expires = EXCLUDED.expires,
severity = EXCLUDED.severity, severity = EXCLUDED.severity,
geom = EXCLUDED.geom, geom = EXCLUDED.geom,
regions = EXCLUDED.regions, regions = EXCLUDED.regions,
primary_region = EXCLUDED.primary_region, primary_region = EXCLUDED.primary_region,
payload = EXCLUDED.payload payload = EXCLUDED.payload
""", """,
event_id, source, category, event_time, expires_time, severity, event_id, source, category, event_time, expires_time, severity,
regions, primary_region, json.dumps(envelope) regions, primary_region, json.dumps(envelope)
) )
await msg.ack() await msg.ack()
logger.info("Archived event", extra={"id": event_id, "category": category}) logger.info("Archived event", extra={"id": event_id, "category": category})
except Exception as e: except Exception as e:
logger.error( logger.error(
"Failed to insert event", "Failed to insert event",
extra={"id": event_id, "error": str(e)} extra={"id": event_id, "error": str(e)}
) )
# Don't ack - let it be redelivered # Don't ack - let it be redelivered
async def _consume_loop(self) -> None: async def _consume_loop(self) -> None:
"""Main consume loop.""" """Main consume loop."""
if not self._js or not self._pool: if not self._js or not self._pool:
return return
await self._ensure_consumer() await self._ensure_consumer()
sub = await self._js.pull_subscribe( sub = await self._js.pull_subscribe(
SUBJECT_FILTER, SUBJECT_FILTER,
durable=CONSUMER_NAME, durable=CONSUMER_NAME,
stream=STREAM_NAME, stream=STREAM_NAME,
) )
logger.info( logger.info(
"Subscribed to stream", "Subscribed to stream",
extra={"stream": STREAM_NAME, "filter": SUBJECT_FILTER} extra={"stream": STREAM_NAME, "filter": SUBJECT_FILTER}
) )
while not self._shutdown_event.is_set(): while not self._shutdown_event.is_set():
try: try:
msgs = await sub.fetch( msgs = await sub.fetch(
batch=BATCH_SIZE, batch=BATCH_SIZE,
timeout=FETCH_TIMEOUT, timeout=FETCH_TIMEOUT,
) )
if msgs: if msgs:
async with self._pool.acquire() as conn: async with self._pool.acquire() as conn:
for msg in msgs: for msg in msgs:
await self._process_message(msg, conn) await self._process_message(msg, conn)
except nats.errors.TimeoutError: except nats.errors.TimeoutError:
# No messages available, continue # No messages available, continue
pass pass
except asyncio.CancelledError: except asyncio.CancelledError:
break break
except Exception as e: except Exception as e:
logger.exception("Error in consume loop", extra={"error": str(e)}) logger.exception("Error in consume loop", extra={"error": str(e)})
await asyncio.sleep(1) await asyncio.sleep(1)
logger.info("Consume loop stopped") logger.info("Consume loop stopped")
async def start(self) -> None: async def start(self) -> None:
"""Start the consumer.""" """Start the consumer."""
await self.connect() await self.connect()
logger.info("Archive consumer ready") logger.info("Archive consumer ready")
async def run(self) -> None: async def run(self) -> None:
"""Run the consume loop until shutdown.""" """Run the consume loop until shutdown."""
await self._consume_loop() await self._consume_loop()
async def stop(self) -> None: async def stop(self) -> None:
"""Stop the consumer gracefully.""" """Stop the consumer gracefully."""
logger.info("Archive consumer shutting down") logger.info("Archive consumer shutting down")
self._shutdown_event.set() self._shutdown_event.set()
await self.disconnect() await self.disconnect()
logger.info("Archive consumer stopped") logger.info("Archive consumer stopped")
async def async_main() -> None: async def async_main() -> None:
"""Async entry point.""" """Async entry point."""
setup_logging() setup_logging()
config = load_config(CONFIG_PATH) settings = get_settings()
consumer = ArchiveConsumer(config) logger.info(
"Archive starting",
loop = asyncio.get_running_loop() extra={
shutdown_event = asyncio.Event() "nats_url": settings.nats_url,
"config_source": settings.config_source,
def handle_signal() -> None: },
shutdown_event.set() )
for sig in (signal.SIGTERM, signal.SIGINT): consumer = ArchiveConsumer(
loop.add_signal_handler(sig, handle_signal) nats_url=settings.nats_url,
postgres_dsn=settings.db_dsn,
await consumer.start() )
# Run consumer in background loop = asyncio.get_running_loop()
consume_task = asyncio.create_task(consumer.run()) shutdown_event = asyncio.Event()
# Wait for shutdown signal def handle_signal() -> None:
await shutdown_event.wait() shutdown_event.set()
consumer._shutdown_event.set() for sig in (signal.SIGTERM, signal.SIGINT):
consume_task.cancel() loop.add_signal_handler(sig, handle_signal)
try:
await consume_task await consumer.start()
except asyncio.CancelledError:
pass # Run consumer in background
consume_task = asyncio.create_task(consumer.run())
await consumer.stop()
# Wait for shutdown signal
await shutdown_event.wait()
def main() -> None:
"""Entry point.""" consumer._shutdown_event.set()
asyncio.run(async_main()) consume_task.cancel()
try:
await consume_task
if __name__ == "__main__": except asyncio.CancelledError:
main() pass
await consumer.stop()
def main() -> None:
"""Entry point."""
asyncio.run(async_main())
if __name__ == "__main__":
main()