From a9510b5ed9d234fb94b7d6d58be0e9687fe2fad9 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 19 Apr 2026 22:14:26 +0000 Subject: [PATCH 01/72] feat(navi): add nav_tools with route() and reverse_geocode() - Phase H2 - nav_tools.py: route() geocodes via Photon, routes via Valhalla, returns summary/maneuvers/polyline. reverse_geocode() for coordinate lookups. Supports auto/pedestrian/bicycle/truck modes. - nav_tools_test.py: 5 live tests against local Photon (2322) and Valhalla (8002) - aurora_nav_tool.py: Open WebUI Tool exposing get_directions to Aurora LLM Co-Authored-By: Claude Opus 4.6 --- lib/aurora_nav_tool.py | 122 ++++++++++++++++++++++++++++++++ lib/nav_tools.py | 153 +++++++++++++++++++++++++++++++++++++++++ lib/nav_tools_test.py | 77 +++++++++++++++++++++ 3 files changed, 352 insertions(+) create mode 100644 lib/aurora_nav_tool.py create mode 100644 lib/nav_tools.py create mode 100644 lib/nav_tools_test.py diff --git a/lib/aurora_nav_tool.py b/lib/aurora_nav_tool.py new file mode 100644 index 0000000..ef4b604 --- /dev/null +++ b/lib/aurora_nav_tool.py @@ -0,0 +1,122 @@ +""" +title: Navigation +author: Echo6 +version: 1.0.0 +description: Turn-by-turn directions and geocoding via Photon + Valhalla on recon-vm. Supports driving, walking, cycling, and truck routing with worldwide coverage (281M places). +""" + +import re +import json +import requests +from pydantic import BaseModel, Field + +_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$') + + +class Tools: + class Valves(BaseModel): + photon_url: str = Field( + default="http://100.64.0.24:2322", + description="Photon geocoding service URL (recon-vm)", + ) + valhalla_url: str = Field( + default="http://100.64.0.24:8002", + description="Valhalla routing service URL (recon-vm)", + ) + + def __init__(self): + self.valves = self.Valves() + + def _geocode(self, query: str): + m = _COORD_RE.match(query.strip()) + if m: + lat, lon = float(m.group(1)), float(m.group(2)) + return lat, lon, query + resp = requests.get( + f"{self.valves.photon_url}/api", + params={"q": query, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + features = resp.json().get("features", []) + if not features: + return None, None, None + props = features[0]["properties"] + coords = features[0]["geometry"]["coordinates"] + parts = [props.get("name", "")] + for key in ("city", "state", "country"): + v = props.get(key) + if v and v != parts[-1]: + parts.append(v) + return coords[1], coords[0], ", ".join(p for p in parts if p) + + def get_directions( + self, + origin: str, + destination: str, + mode: str = "auto", + ) -> str: + """ + Get turn-by-turn driving, walking, or cycling directions between two locations. + Use this when someone asks how to get somewhere, asks for directions, or wants to know distance/time between places. + + :param origin: Starting location — address, place name, or lat,lon coordinates + :param destination: Destination — address, place name, or lat,lon coordinates + :param mode: Travel mode: auto, pedestrian, bicycle, or truck (default: auto) + :return: Directions with distance, time, and turn-by-turn maneuvers + """ + if mode not in ("auto", "pedestrian", "bicycle", "truck"): + mode = "auto" + + # Geocode origin + orig_lat, orig_lon, orig_name = self._geocode(origin) + if orig_lat is None: + return json.dumps({"error": f"Could not find location: {origin}"}) + + # Geocode destination + dest_lat, dest_lon, dest_name = self._geocode(destination) + if dest_lat is None: + return json.dumps({"error": f"Could not find location: {destination}"}) + + # Route via Valhalla + try: + resp = requests.post( + f"{self.valves.valhalla_url}/route", + json={ + "locations": [ + {"lat": orig_lat, "lon": orig_lon}, + {"lat": dest_lat, "lon": dest_lon}, + ], + "costing": mode, + "directions_options": {"units": "miles"}, + }, + timeout=30, + ) + except requests.RequestException: + return json.dumps({"error": "Navigation service unavailable"}) + + if resp.status_code != 200: + return json.dumps({"error": "No route found between locations"}) + + trip = resp.json()["trip"] + summary = trip["summary"] + maneuvers = [] + for m in trip["legs"][0]["maneuvers"]: + streets = m.get("street_names", []) + entry = { + "instruction": m["instruction"], + "distance_miles": round(m.get("length", 0), 2), + } + if streets: + entry["street"] = streets[0] + maneuvers.append(entry) + + result = { + "origin": orig_name, + "destination": dest_name, + "distance_miles": round(summary["length"], 1), + "time_minutes": round(summary["time"] / 60, 1), + "mode": mode, + "maneuvers": maneuvers, + } + return json.dumps(result) diff --git a/lib/nav_tools.py b/lib/nav_tools.py new file mode 100644 index 0000000..f6db5e6 --- /dev/null +++ b/lib/nav_tools.py @@ -0,0 +1,153 @@ +"""Navigation tools: geocoding via Photon and routing via Valhalla.""" + +import re +import requests + +PHOTON_URL = "http://localhost:2322" +VALHALLA_URL = "http://localhost:8002" + +_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$') + +VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"} + + +def _parse_coords(text: str): + """Return (lat, lon) if text looks like coordinates, else None.""" + m = _COORD_RE.match(text.strip()) + if m: + return float(m.group(1)), float(m.group(2)) + return None + + +def _geocode(query: str): + """Geocode a place name via Photon. Returns (lat, lon, display_name) or raises.""" + coords = _parse_coords(query) + if coords: + return coords[0], coords[1], query + + try: + resp = requests.get( + f"{PHOTON_URL}/api", + params={"q": query, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + data = resp.json() + features = data.get("features", []) + if not features: + raise ValueError(f"Could not find location: {query}") + + props = features[0]["properties"] + coords = features[0]["geometry"]["coordinates"] # [lon, lat] + parts = [props.get("name", "")] + for key in ("city", "county", "state", "country"): + v = props.get(key) + if v and v != parts[-1]: + parts.append(v) + display = ", ".join(p for p in parts if p) + return coords[1], coords[0], display # lat, lon + + +def reverse_geocode(lat: float, lon: float) -> str: + """Reverse geocode coordinates via Photon. Returns formatted address string.""" + try: + resp = requests.get( + f"{PHOTON_URL}/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + data = resp.json() + features = data.get("features", []) + if not features: + return f"{lat}, {lon}" + + props = features[0]["properties"] + parts = [] + for key in ("name", "housenumber", "street", "city", "state", "country", "postcode"): + v = props.get(key) + if v: + parts.append(v) + return ", ".join(parts) if parts else f"{lat}, {lon}" + + +def route(origin: str, destination: str, mode: str = "auto") -> dict: + """ + Get a route between two locations. + + Args: + origin: Starting location — address, place name, or "lat,lon" + destination: Destination — address, place name, or "lat,lon" + mode: Travel mode — auto, pedestrian, bicycle, truck + + Returns: + dict with summary, maneuvers, origin/destination info, and raw shape + """ + if mode not in VALID_MODES: + mode = "auto" + + # Geocode both endpoints + orig_lat, orig_lon, orig_name = _geocode(origin) + dest_lat, dest_lon, dest_name = _geocode(destination) + + # Query Valhalla + valhalla_req = { + "locations": [ + {"lat": orig_lat, "lon": orig_lon}, + {"lat": dest_lat, "lon": dest_lon}, + ], + "costing": mode, + "directions_options": {"units": "miles"}, + } + + try: + resp = requests.post( + f"{VALHALLA_URL}/route", + json=valhalla_req, + timeout=30, + ) + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + if resp.status_code != 200: + try: + err = resp.json() + msg = err.get("error", "Unknown routing error") + except Exception: + msg = f"Routing error (HTTP {resp.status_code})" + raise RuntimeError(f"No route found between locations: {msg}") + + data = resp.json() + trip = data["trip"] + summary = trip["summary"] + leg = trip["legs"][0] + + # Build maneuver list + maneuvers = [] + for m in leg["maneuvers"]: + streets = m.get("street_names", []) + maneuvers.append({ + "instruction": m["instruction"], + "distance_miles": round(m.get("length", 0), 2), + "street_name": streets[0] if streets else "", + "type": m.get("type", 0), + "verbal_succinct": m.get("verbal_succinct_transition_instruction", ""), + }) + + return { + "origin": {"name": orig_name, "lat": orig_lat, "lon": orig_lon}, + "destination": {"name": dest_name, "lat": dest_lat, "lon": dest_lon}, + "summary": { + "distance_miles": round(summary["length"], 1), + "time_minutes": round(summary["time"] / 60, 1), + "mode": mode, + }, + "maneuvers": maneuvers, + "shape": leg.get("shape", ""), + } diff --git a/lib/nav_tools_test.py b/lib/nav_tools_test.py new file mode 100644 index 0000000..b987293 --- /dev/null +++ b/lib/nav_tools_test.py @@ -0,0 +1,77 @@ +"""Tests for nav_tools — run against live Photon + Valhalla services.""" + +import sys +import json + +from nav_tools import route, reverse_geocode + + +def test_route_named(): + """route("Buhl Idaho", "Boise Idaho", "auto") returns maneuvers.""" + print("TEST 1: route('Buhl Idaho', 'Boise Idaho', 'auto')") + r = route("Buhl Idaho", "Boise Idaho", "auto") + assert r["summary"]["distance_miles"] > 50, f"Expected >50 mi, got {r['summary']['distance_miles']}" + assert r["summary"]["time_minutes"] > 60, f"Expected >60 min, got {r['summary']['time_minutes']}" + assert len(r["maneuvers"]) > 5, f"Expected >5 maneuvers, got {len(r['maneuvers'])}" + assert r["shape"], "Missing polyline shape" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min, {len(r['maneuvers'])} maneuvers") + print(f" Origin: {r['origin']['name']}") + print(f" Destination: {r['destination']['name']}") + print(f" First maneuver: {r['maneuvers'][0]['instruction']}") + + +def test_route_coords(): + """route with raw lat,lon coordinates.""" + print("\nTEST 2: route('42.5991,-114.7636', '43.615,-116.2023', 'auto')") + r = route("42.5991,-114.7636", "43.615,-116.2023", "auto") + assert r["summary"]["distance_miles"] > 100, f"Expected >100 mi, got {r['summary']['distance_miles']}" + assert len(r["maneuvers"]) > 3, f"Expected >3 maneuvers" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min") + + +def test_route_pedestrian(): + """route with pedestrian mode.""" + print("\nTEST 3: route('Buhl Idaho', 'Boise Idaho', 'pedestrian')") + r = route("Buhl Idaho", "Boise Idaho", "pedestrian") + assert r["summary"]["mode"] == "pedestrian" + assert r["summary"]["time_minutes"] > r["summary"]["distance_miles"], "Walking should take more min than miles" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min (pedestrian)") + + +def test_reverse_geocode(): + """reverse_geocode near Buhl, Idaho.""" + print("\nTEST 4: reverse_geocode(42.5991, -114.7636)") + result = reverse_geocode(42.5991, -114.7636) + assert "Buhl" in result or "Twin Falls" in result or "Idaho" in result, f"Expected Buhl/Idaho, got: {result}" + print(f" OK — {result}") + + +def test_route_bad_origin(): + """route with nonexistent place returns clean error.""" + print("\nTEST 5: route('nonexistent place xyz123abc', 'Boise Idaho')") + try: + r = route("nonexistent place xyz123abc", "Boise Idaho") + print(f" FAIL — expected error, got result: {r['summary']}") + return False + except ValueError as e: + print(f" OK — clean error: {e}") + except RuntimeError as e: + print(f" OK — runtime error: {e}") + + +if __name__ == "__main__": + passed = 0 + failed = 0 + tests = [test_route_named, test_route_coords, test_route_pedestrian, test_reverse_geocode, test_route_bad_origin] + + for test in tests: + try: + test() + passed += 1 + except Exception as e: + print(f" FAIL — {e}") + failed += 1 + + print(f"\n{'='*40}") + print(f"Results: {passed} passed, {failed} failed out of {len(tests)}") + sys.exit(1 if failed else 0) From 9841c38011d76f00cb84b5da432c2333f983cd50 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 19 Apr 2026 22:42:17 +0000 Subject: [PATCH 02/72] fix(navi): format tool output as human-readable directions --- lib/aurora_nav_tool.py | 55 +++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/lib/aurora_nav_tool.py b/lib/aurora_nav_tool.py index ef4b604..2b7285d 100644 --- a/lib/aurora_nav_tool.py +++ b/lib/aurora_nav_tool.py @@ -1,7 +1,7 @@ """ title: Navigation author: Echo6 -version: 1.0.0 +version: 1.1.0 description: Turn-by-turn directions and geocoding via Photon + Valhalla on recon-vm. Supports driving, walking, cycling, and truck routing with worldwide coverage (281M places). """ @@ -57,28 +57,24 @@ class Tools: mode: str = "auto", ) -> str: """ - Get turn-by-turn driving, walking, or cycling directions between two locations. - Use this when someone asks how to get somewhere, asks for directions, or wants to know distance/time between places. + Get turn-by-turn directions between two locations. When this tool returns results, present the directions exactly as returned — do not summarize or rephrase. Include all steps. :param origin: Starting location — address, place name, or lat,lon coordinates :param destination: Destination — address, place name, or lat,lon coordinates :param mode: Travel mode: auto, pedestrian, bicycle, or truck (default: auto) - :return: Directions with distance, time, and turn-by-turn maneuvers + :return: Formatted turn-by-turn directions """ if mode not in ("auto", "pedestrian", "bicycle", "truck"): mode = "auto" - # Geocode origin orig_lat, orig_lon, orig_name = self._geocode(origin) if orig_lat is None: - return json.dumps({"error": f"Could not find location: {origin}"}) + return f"Could not find location: {origin}" - # Geocode destination dest_lat, dest_lon, dest_name = self._geocode(destination) if dest_lat is None: - return json.dumps({"error": f"Could not find location: {destination}"}) + return f"Could not find location: {destination}" - # Route via Valhalla try: resp = requests.post( f"{self.valves.valhalla_url}/route", @@ -93,30 +89,29 @@ class Tools: timeout=30, ) except requests.RequestException: - return json.dumps({"error": "Navigation service unavailable"}) + return "Navigation service unavailable" if resp.status_code != 200: - return json.dumps({"error": "No route found between locations"}) + return "No route found between locations" trip = resp.json()["trip"] summary = trip["summary"] - maneuvers = [] - for m in trip["legs"][0]["maneuvers"]: - streets = m.get("street_names", []) - entry = { - "instruction": m["instruction"], - "distance_miles": round(m.get("length", 0), 2), - } - if streets: - entry["street"] = streets[0] - maneuvers.append(entry) + legs = trip["legs"][0]["maneuvers"] - result = { - "origin": orig_name, - "destination": dest_name, - "distance_miles": round(summary["length"], 1), - "time_minutes": round(summary["time"] / 60, 1), - "mode": mode, - "maneuvers": maneuvers, - } - return json.dumps(result) + miles = round(summary["length"], 1) + minutes = round(summary["time"] / 60, 1) + + lines = [ + f"Directions from {orig_name} to {dest_name} ({mode}):", + f"Distance: {miles} miles | Time: {minutes} minutes", + "", + ] + for i, m in enumerate(legs, 1): + inst = m["instruction"] + dist = m.get("length", 0) + if dist > 0: + lines.append(f"{i}. {inst} — {round(dist, 1)} mi") + else: + lines.append(f"{i}. {inst}") + + return "\n".join(lines) From 3243f2f252c9193fda7f8cf4fb165706686edd3d Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 19 Apr 2026 23:50:35 +0000 Subject: [PATCH 03/72] feat(navi): semantic query router for intelligent tool selection - Phase H2b Add centroid-based query classifier that routes Aurora queries to the appropriate handler (nav_route, nav_reverse_geocode, direct_answer, rag_search) before the RAG pipeline runs. Uses TEI embeddings against pre-computed route centroids from 38 example queries. - query_router.py: standalone module with lazy centroid init - query_router_test.py: 7-query test suite (all passing) - Corresponding recon_rag_tool.py v4.2.0 deployed to Open WebUI DB Co-Authored-By: Claude Opus 4.6 --- lib/query_router.py | 161 +++++++++++++++++++++++++++++++++++++++ lib/query_router_test.py | 49 ++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 lib/query_router.py create mode 100644 lib/query_router_test.py diff --git a/lib/query_router.py b/lib/query_router.py new file mode 100644 index 0000000..dda14a2 --- /dev/null +++ b/lib/query_router.py @@ -0,0 +1,161 @@ +"""Semantic query router for Aurora. + +Classifies user queries into routes (nav_route, nav_reverse_geocode, +direct_answer, rag_search) by comparing query embeddings against +pre-computed route centroids from example queries. + +TEI endpoint: http://100.64.0.14:8090/embed (cortex via Tailscale) +""" + +import math +import threading +import requests + +# ── Route examples ──────────────────────────────────────────────────────────── +ROUTE_EXAMPLES = { + "nav_route": [ + "how do I get to Boise", + "directions to Twin Falls", + "how do I get from Buhl to Boise", + "drive from Jerome to Sun Valley", + "route from Boise to McCall", + "what's the fastest way to Sun Valley", + "how far is it to Twin Falls", + "take me to Shoshone", + "navigate to the airport", + "how do I drive to Salt Lake City", + "walking directions to the park", + "bike route to downtown", + ], + "nav_reverse_geocode": [ + "what town is at 42.5, -114.7", + "where am I right now", + "what is at coordinates 43.6, -116.2", + "what location is 42.574, -114.607", + "where is this place 44.0, -114.3", + "what city is near 42.7, -114.5", + "reverse geocode 43.0, -115.0", + "what's at this location 42.9, -114.8", + ], + "direct_answer": [ + "hello", + "hey aurora", + "good morning", + "thanks", + "thank you", + "what's your name", + "who are you", + "tell me a joke", + "how are you", + "hi there", + ], + "rag_search": [ + "what does the survival manual say about water", + "how to purify water in the field", + "how to treat a gunshot wound", + "what is the ranger handbook chapter on patrolling", + "field manual water purification", + "how to build a shelter in the wilderness", + "tactical combat casualty care procedures", + "what does FM 21-76 say about fire starting", + ], +} + +# ── Module-level cache ──────────────────────────────────────────────────────── +_ROUTE_CENTROIDS: dict | None = None +_LOCK = threading.Lock() + + +def _embed_batch(texts: list[str], tei_url: str) -> list[list[float]]: + """Embed a batch of texts via TEI.""" + resp = requests.post(tei_url, json={"inputs": texts}, timeout=30) + resp.raise_for_status() + return resp.json() + + +def _compute_centroid(vectors: list[list[float]]) -> list[float]: + """Element-wise mean of vectors.""" + n = len(vectors) + dim = len(vectors[0]) + centroid = [0.0] * dim + for vec in vectors: + for i in range(dim): + centroid[i] += vec[i] + for i in range(dim): + centroid[i] /= n + return centroid + + +def _cosine_similarity(a: list[float], b: list[float]) -> float: + """Cosine similarity between two vectors (pure Python).""" + dot = 0.0 + norm_a = 0.0 + norm_b = 0.0 + for i in range(len(a)): + dot += a[i] * b[i] + norm_a += a[i] * a[i] + norm_b += b[i] * b[i] + denom = math.sqrt(norm_a) * math.sqrt(norm_b) + if denom == 0: + return 0.0 + return dot / denom + + +def _ensure_centroids(tei_url: str) -> dict[str, list[float]]: + """Lazy-init: embed all examples in one batch, compute centroids, cache.""" + global _ROUTE_CENTROIDS + if _ROUTE_CENTROIDS is not None: + return _ROUTE_CENTROIDS + + with _LOCK: + if _ROUTE_CENTROIDS is not None: + return _ROUTE_CENTROIDS + + # Flatten all examples into one batch + all_texts = [] + route_ranges: dict[str, tuple[int, int]] = {} + offset = 0 + for route, examples in ROUTE_EXAMPLES.items(): + route_ranges[route] = (offset, offset + len(examples)) + all_texts.extend(examples) + offset += len(examples) + + all_vectors = _embed_batch(all_texts, tei_url) + + centroids = {} + for route, (start, end) in route_ranges.items(): + centroids[route] = _compute_centroid(all_vectors[start:end]) + + _ROUTE_CENTROIDS = centroids + return _ROUTE_CENTROIDS + + +def classify( + query: str, + tei_url: str = "http://100.64.0.14:8090/embed", + threshold: float = 0.45, +) -> tuple[str, float]: + """Classify a query into a route. + + Returns (route_name, confidence). If no route exceeds the threshold, + returns ("rag_search", best_score) as the safe default. + """ + centroids = _ensure_centroids(tei_url) + + # Embed the query + vecs = _embed_batch([query], tei_url) + query_vec = vecs[0] + + # Compare against all centroids + best_route = "rag_search" + best_score = 0.0 + for route, centroid in centroids.items(): + sim = _cosine_similarity(query_vec, centroid) + if sim > best_score: + best_score = sim + best_route = route + + if best_score < threshold: + return ("rag_search", best_score) + + return (best_route, best_score) diff --git a/lib/query_router_test.py b/lib/query_router_test.py new file mode 100644 index 0000000..27ccefd --- /dev/null +++ b/lib/query_router_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Test suite for the semantic query router.""" + +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib.query_router import classify + +TEST_QUERIES = [ + ("how do I get from Buhl to Boise", "nav_route"), + ("what does the survival manual say about water", "rag_search"), + ("what town is at 42.5, -114.7", "nav_reverse_geocode"), + ("hey aurora", "direct_answer"), + ("what's the fastest way to Sun Valley", "nav_route"), + ("how to purify water in the field", "rag_search"), + ("good morning", "direct_answer"), +] + + +def main(): + print("Query Router Test Suite") + print("=" * 70) + + passed = 0 + failed = 0 + + for query, expected in TEST_QUERIES: + route, confidence = classify(query) + status = "PASS" if route == expected else "FAIL" + if status == "PASS": + passed += 1 + else: + failed += 1 + print(f" [{status}] {query!r}") + print(f" → {route} ({confidence:.3f}) expected={expected}") + + print("=" * 70) + print(f"Results: {passed}/{passed + failed} passed") + if failed: + print(f" {failed} FAILED") + sys.exit(1) + else: + print(" All tests passed!") + + +if __name__ == "__main__": + main() From 23483e81988304fac5e9ee5c7eb4f8ade3e18460 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 04:02:11 +0000 Subject: [PATCH 04/72] feat(navi): address book with geocoding integration - YAML-backed saved locations (config/address_book.yaml) - Exact/partial alias matching with case-insensitive lookup - Flask blueprint: /api/address_book/lookup, /api/address_book/list - Geocoder short-circuits Photon when address book has exact match - Test suite for lookup behavior Co-Authored-By: Claude Opus 4.6 --- config/address_book.yaml | 18 ++++++ lib/address_book.py | 132 +++++++++++++++++++++++++++++++++++++++ lib/address_book_api.py | 31 +++++++++ lib/address_book_test.py | 57 +++++++++++++++++ lib/api.py | 5 ++ lib/nav_tools.py | 21 ++++++- 6 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 config/address_book.yaml create mode 100644 lib/address_book.py create mode 100644 lib/address_book_api.py create mode 100644 lib/address_book_test.py diff --git a/config/address_book.yaml b/config/address_book.yaml new file mode 100644 index 0000000..24bc81c --- /dev/null +++ b/config/address_book.yaml @@ -0,0 +1,18 @@ +# RECON Address Book — saved locations for navigation shortcuts. +# Entries are matched by name and aliases (case-insensitive). +# Add new entries by appending to the list below. + +entries: + - id: home + name: Home + aliases: + - home + - matt's house + - 214 north st + - 214 north street + address: "214 North St, Filer, ID 83328" + lat: 42.5735833 + lon: -114.6066389 + tags: + - residence + - primary diff --git a/lib/address_book.py b/lib/address_book.py new file mode 100644 index 0000000..a9cfc40 --- /dev/null +++ b/lib/address_book.py @@ -0,0 +1,132 @@ +""" +RECON Address Book — YAML-backed saved-location lookup. + +Provides named locations (home, work, etc.) that short-circuit Photon +geocoding when an exact alias match is found. + +Config: /opt/recon/config/address_book.yaml +""" + +import os +import threading + +import yaml + +from .utils import setup_logging + +logger = setup_logging('recon.address_book') + +_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'config', 'address_book.yaml', +) + +_lock = threading.Lock() +_entries: list[dict] = [] +_mtime: float = 0.0 + + +def _reload_if_changed(): + """Reload the YAML file if its mtime has changed.""" + global _entries, _mtime + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + logger.warning("Address book not found: %s", _CONFIG_PATH) + _entries = [] + _mtime = 0.0 + return + + if st.st_mtime == _mtime: + return + + with _lock: + # Double-check after acquiring lock + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + _entries = [] + _mtime = 0.0 + return + if st.st_mtime == _mtime: + return + + with open(_CONFIG_PATH, 'r') as f: + data = yaml.safe_load(f) or {} + + raw = data.get('entries', []) + loaded = [] + for entry in raw: + # Normalise aliases to lowercase for matching + aliases = [a.lower() for a in entry.get('aliases', [])] + loaded.append({ + 'id': entry.get('id', ''), + 'name': entry.get('name', ''), + 'aliases': aliases, + 'address': entry.get('address', ''), + 'lat': entry.get('lat'), + 'lon': entry.get('lon'), + 'tags': entry.get('tags', []), + }) + _entries = loaded + _mtime = st.st_mtime + logger.info("Address book loaded: %d entries from %s", len(_entries), _CONFIG_PATH) + + +def load(): + """Ensure the address book is loaded (and refreshed if the file changed).""" + _reload_if_changed() + return _entries + + +def lookup(query: str): + """ + Look up a query against name and aliases. + + Returns dict with the matching entry plus a 'confidence' field: + - "exact": full name or alias match + - "partial": query is a substring of an alias or name (or vice versa) + - None if no match + """ + _reload_if_changed() + q = query.strip().lower() + if not q: + return None + + best = None + best_confidence = None + + for entry in _entries: + # Exact match on name + if q == entry['name'].lower(): + return {**entry, 'confidence': 'exact'} + + # Exact match on any alias + if q in entry['aliases']: + return {**entry, 'confidence': 'exact'} + + # Partial: query is substring of name/alias, or name/alias is substring of query + name_lower = entry['name'].lower() + if q in name_lower or name_lower in q: + if best is None: + best = entry + best_confidence = 'partial' + continue + + for alias in entry['aliases']: + if q in alias or alias in q: + if best is None: + best = entry + best_confidence = 'partial' + break + + if best is not None: + return {**best, 'confidence': best_confidence} + + return None + + +def list_all(): + """Return all address book entries.""" + _reload_if_changed() + return list(_entries) diff --git a/lib/address_book_api.py b/lib/address_book_api.py new file mode 100644 index 0000000..020828b --- /dev/null +++ b/lib/address_book_api.py @@ -0,0 +1,31 @@ +""" +RECON Address Book API — Flask Blueprint. + +GET /api/address_book/lookup?q= — best match or 404 +GET /api/address_book/list — all entries +""" + +from flask import Blueprint, request, jsonify + +from . import address_book + +address_book_bp = Blueprint('address_book', __name__) + + +@address_book_bp.route('/api/address_book/lookup') +def api_address_book_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + result = address_book.lookup(q) + if result is None: + return '', 404 + + return jsonify(result) + + +@address_book_bp.route('/api/address_book/list') +def api_address_book_list(): + entries = address_book.list_all() + return jsonify(entries) diff --git a/lib/address_book_test.py b/lib/address_book_test.py new file mode 100644 index 0000000..e7fa7ef --- /dev/null +++ b/lib/address_book_test.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Tests for RECON address book module.""" +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import address_book + +TESTS = [ + ("lookup('home') → exact", + lambda: address_book.lookup("home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('Home') → exact (case-insensitive)", + lambda: address_book.lookup("Home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st') → exact via alias", + lambda: address_book.lookup("214 north st"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North Street') → exact via alias", + lambda: address_book.lookup("214 North Street"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('nonexistent place') → None", + lambda: address_book.lookup("nonexistent place"), + lambda r: r is None), + + ("list_all() → 1 entry", + lambda: address_book.list_all(), + lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'), +] + +passed = 0 +failed = 0 +for name, fn, check in TESTS: + try: + result = fn() + ok = check(result) + except Exception as e: + ok = False + result = f"EXCEPTION: {e}" + + status = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + print(f" [{status}] {name}") + if not ok: + print(f" got: {result}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/api.py b/lib/api.py index a739ec0..49e7005 100644 --- a/lib/api.py +++ b/lib/api.py @@ -44,6 +44,11 @@ app = Flask(__name__, app.config['MAX_CONTENT_LENGTH'] = None # ZIM files can be multi-GB +# ── Address Book Blueprint ── +from .address_book_api import address_book_bp +app.register_blueprint(address_book_bp) + + # ── Navigation Constants ── KNOWLEDGE_SUBNAV = [ diff --git a/lib/nav_tools.py b/lib/nav_tools.py index f6db5e6..832ca2d 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -3,6 +3,10 @@ import re import requests +from .utils import setup_logging + +logger = setup_logging('recon.nav_tools') + PHOTON_URL = "http://localhost:2322" VALHALLA_URL = "http://localhost:8002" @@ -20,11 +24,26 @@ def _parse_coords(text: str): def _geocode(query: str): - """Geocode a place name via Photon. Returns (lat, lon, display_name) or raises.""" + """Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises.""" coords = _parse_coords(query) if coords: return coords[0], coords[1], query + # ── Address book lookup (before Photon) ── + try: + from . import address_book + match = address_book.lookup(query) + if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'): + logger.info("Address book exact match: %r → %s (%s, %s)", + query, match['name'], match['lat'], match['lon']) + return match['lat'], match['lon'], match.get('address') or match['name'] + elif match and match['confidence'] == 'partial': + logger.info("Address book partial match: %r → %s (falling through to Photon)", + query, match['name']) + except Exception as e: + logger.debug("Address book lookup failed: %s", e) + + # ── Photon geocoding ── try: resp = requests.get( f"{PHOTON_URL}/api", From dfab388769258de981c857110aa447c2ffd7fc28 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 07:24:09 +0000 Subject: [PATCH 05/72] feat(navi): add netsyms tier-2 geocoding + geocode API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Netsyms AddressDatabase2025 (159M US+CA addresses) as tier-2 in the geocode chain: address_book → netsyms → photon. - lib/netsyms.py: SQLite lookup module (lazy, read-only, thread-safe) - lib/netsyms_api.py: Flask blueprints for /api/netsyms/* and /api/geocode - lib/netsyms_test.py: 7 test cases (street, free-text, zipcode, health) - lib/nav_tools.py: new geocode() with consistent {name,lat,lon,source,raw} - lib/api.py: register netsyms_bp and geocode_bp Co-Authored-By: Claude Opus 4.6 --- lib/api.py | 5 + lib/nav_tools.py | 114 ++++++++++++++++++++++ lib/netsyms.py | 228 ++++++++++++++++++++++++++++++++++++++++++++ lib/netsyms_api.py | 48 ++++++++++ lib/netsyms_test.py | 80 ++++++++++++++++ 5 files changed, 475 insertions(+) create mode 100644 lib/netsyms.py create mode 100644 lib/netsyms_api.py create mode 100644 lib/netsyms_test.py diff --git a/lib/api.py b/lib/api.py index 49e7005..9dd8587 100644 --- a/lib/api.py +++ b/lib/api.py @@ -48,6 +48,11 @@ app.config['MAX_CONTENT_LENGTH'] = None # ZIM files can be multi-GB from .address_book_api import address_book_bp app.register_blueprint(address_book_bp) +# ── Netsyms + Geocode Blueprints ── +from .netsyms_api import netsyms_bp, geocode_bp +app.register_blueprint(netsyms_bp) +app.register_blueprint(geocode_bp) + # ── Navigation Constants ── diff --git a/lib/nav_tools.py b/lib/nav_tools.py index 832ca2d..6d7d343 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -70,6 +70,120 @@ def _geocode(query: str): return coords[1], coords[0], display # lat, lon + +def geocode(query: str): + """ + Three-tier geocode chain returning a consistent shape. + + Chain: address_book (exact) → netsyms → photon. + Returns dict with {name, lat, lon, source, raw} or None. + """ + coords = _parse_coords(query) + if coords: + return { + 'name': query, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'raw': None, + } + + # ── Tier 1: Address book (exact match only) ── + ab_partial = None + try: + from . import address_book + match = address_book.lookup(query) + if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'): + logger.info("geocode: address_book exact match: %r → %s", query, match['name']) + return { + 'name': match.get('address') or match['name'], + 'lat': match['lat'], + 'lon': match['lon'], + 'source': 'address_book', + 'raw': match, + } + elif match and match['confidence'] == 'partial': + logger.info("geocode: address_book partial match: %r → %s (continuing chain)", + query, match['name']) + ab_partial = match + except Exception as e: + logger.debug("geocode: address_book lookup failed: %s", e) + + # ── Tier 2: Netsyms (159M US+CA addresses) ── + netsyms_result = None + try: + from . import netsyms + results = netsyms.lookup_free_text(query) + if results: + # Prefer results with plus4 (more precise) + best = results[0] + for r in results: + if r.get('plus4') and not best.get('plus4'): + best = r + break + addr_parts = [best['number'], best['street']] + if best.get('street2'): + addr_parts.append(best['street2']) + addr_parts.extend([best['city'], best['state'], best['zipcode']]) + display = ' '.join(p for p in addr_parts if p) + netsyms_result = { + 'name': display, + 'lat': best['lat'], + 'lon': best['lon'], + 'source': 'netsyms', + 'raw': best, + } + logger.info("geocode: netsyms match: %r → %s", query, display) + return netsyms_result + except Exception as e: + logger.debug("geocode: netsyms lookup failed: %s", e) + + # ── Tier 3: Photon (global geocoding) ── + try: + resp = requests.get( + f"{PHOTON_URL}/api", + params={"q": query, "limit": 1}, + timeout=2, + ) + resp.raise_for_status() + data = resp.json() + features = data.get("features", []) + if features: + props = features[0]["properties"] + coords = features[0]["geometry"]["coordinates"] # [lon, lat] + parts = [props.get("name", "")] + for key in ("city", "county", "state", "country"): + v = props.get(key) + if v and v != parts[-1]: + parts.append(v) + display = ", ".join(p for p in parts if p) + logger.info("geocode: photon match: %r → %s", query, display) + return { + 'name': display, + 'lat': coords[1], + 'lon': coords[0], + 'source': 'photon', + 'raw': props, + } + except Exception as e: + logger.debug("geocode: photon lookup failed: %s", e) + + # ── Fallback: address book partial match ── + if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'): + logger.info("geocode: falling back to address_book partial: %r → %s", + query, ab_partial['name']) + return { + 'name': ab_partial.get('address') or ab_partial['name'], + 'lat': ab_partial['lat'], + 'lon': ab_partial['lon'], + 'source': 'address_book', + 'raw': ab_partial, + } + + logger.info("geocode: no match for %r across all tiers", query) + return None + + def reverse_geocode(lat: float, lon: float) -> str: """Reverse geocode coordinates via Photon. Returns formatted address string.""" try: diff --git a/lib/netsyms.py b/lib/netsyms.py new file mode 100644 index 0000000..d51162e --- /dev/null +++ b/lib/netsyms.py @@ -0,0 +1,228 @@ +""" +RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup. + +Provides 159.78M geocoded addresses as tier-2 between address book +(exact named locations) and Photon (full-text global geocoding). + +Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only) +""" + +import os +import re +import sqlite3 +import threading + +from .utils import setup_logging + +logger = setup_logging('recon.netsyms') + +_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite' + +_conn = None +_lock = threading.Lock() +_cached_row_count = None + +# US states + DC + territories, CA provinces, for free-text parsing +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', + 'DC', 'PR', 'VI', 'GU', 'AS', 'MP', + # Canadian provinces + 'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE', + 'QC', 'SK', 'YT', +} + +_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$') + + +def _get_conn(): + """Lazy-open a read-only SQLite connection.""" + global _conn + if _conn is not None: + return _conn + with _lock: + if _conn is not None: + return _conn + uri = f'file:{_DB_PATH}?mode=ro' + _conn = sqlite3.connect(uri, uri=True, check_same_thread=False) + _conn.row_factory = sqlite3.Row + logger.info("Netsyms DB opened: %s", _DB_PATH) + return _conn + + +def _row_to_dict(row): + """Convert a sqlite3.Row to a plain dict with lat/lon keys.""" + return { + 'zipcode': row['zipcode'], + 'number': row['number'], + 'street': row['street'], + 'street2': row['street2'], + 'city': row['city'], + 'state': row['state'], + 'plus4': row['plus4'], + 'country': row['country'], + 'lat': float(row['latitude']), + 'lon': float(row['longitude']), + 'source': row['source'], + } + + +def lookup_by_street(number, street, city=None, state=None, + zipcode=None, country=None, limit=20): + """Match on number + street, with optional qualifiers.""" + conn = _get_conn() + clauses = ['number = ?', 'street = ?'] + params = [str(number).strip().upper(), street.strip().upper()] + + if city: + clauses.append('city = ?') + params.append(city.strip().upper()) + if state: + clauses.append('state = ?') + params.append(state.strip().upper()) + if zipcode: + clauses.append('zipcode = ?') + params.append(zipcode.strip()) + if country: + clauses.append('country = ?') + params.append(country.strip().upper()) + + sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?" + params.append(limit) + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_street error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results", + number, street, city, state, len(results)) + return results + + +def lookup_free_text(query, country_hint=None): + """Parse a free-text address and look it up.""" + q = query.strip() + if not q: + return [] + + # Strip trailing zipcode if present + zipcode = None + zip_match = re.search(r'\b(\d{5})\s*$', q) + if zip_match: + zipcode = zip_match.group(1) + q = q[:zip_match.start()].strip().rstrip(',').strip() + + # Strip trailing state + tokens = re.split(r'[,\s]+', q) + tokens = [t for t in tokens if t] + if not tokens: + return [] + + state = None + if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: + state = tokens[-1].upper() + tokens = tokens[:-1] + + # Leading digits → number + number = None + if tokens and re.match(r'^\d', tokens[0]): + number = tokens[0] + tokens = tokens[1:] + + if not tokens: + # Only a number, or empty — try zipcode if we have one + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + return [] + + # If state was found and we have 2+ tokens remaining, last token is city + city = None + if state and len(tokens) >= 2: + city = tokens[-1] + tokens = tokens[:-1] + + street = ' '.join(tokens) + + if number: + results = lookup_by_street(number, street, city=city, state=state, + zipcode=zipcode, country=country_hint) + if results: + logger.debug("lookup_free_text(%r) → %d results via street match", + query, len(results)) + return results + + # Fallback: try zipcode only if available + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + + logger.debug("lookup_free_text(%r) → 0 results", query) + return [] + + +def lookup_by_zipcode(zipcode, limit=100): + """Direct zipcode lookup.""" + conn = _get_conn() + sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?" + params = [zipcode.strip(), limit] + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_zipcode error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results)) + return results + + +def health(): + """Health check with cached row count.""" + global _cached_row_count + + try: + file_size = os.path.getsize(_DB_PATH) + except OSError: + return {'ok': False, 'row_count': 0, 'file_size_bytes': 0, + 'indexed_countries': []} + + try: + conn = _get_conn() + except Exception: + return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size, + 'indexed_countries': []} + + if _cached_row_count is None: + with _lock: + if _cached_row_count is None: + try: + row = conn.execute( + "SELECT COUNT(*) AS cnt FROM addresses" + ).fetchone() + _cached_row_count = row['cnt'] + except sqlite3.Error: + _cached_row_count = 0 + + with _lock: + try: + rows = conn.execute( + "SELECT DISTINCT country FROM addresses" + ).fetchall() + countries = sorted(r['country'] for r in rows) + except sqlite3.Error: + countries = [] + + return { + 'ok': True, + 'row_count': _cached_row_count, + 'file_size_bytes': file_size, + 'indexed_countries': countries, + } diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py new file mode 100644 index 0000000..2d6e43d --- /dev/null +++ b/lib/netsyms_api.py @@ -0,0 +1,48 @@ +""" +RECON Netsyms API + Geocode chain — Flask Blueprints. + +GET /api/netsyms/lookup?q=&country= +GET /api/netsyms/health +GET /api/geocode?q= (full 3-tier chain: address_book → netsyms → photon) +""" + +from flask import Blueprint, request, jsonify + +from . import netsyms +from . import address_book +from . import nav_tools +from .utils import setup_logging + +logger = setup_logging('recon.netsyms_api') + +netsyms_bp = Blueprint('netsyms', __name__) +geocode_bp = Blueprint('geocode', __name__) + + +@netsyms_bp.route('/api/netsyms/lookup') +def api_netsyms_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + country = request.args.get('country', '').strip() or None + results = netsyms.lookup_free_text(q, country_hint=country) + return jsonify({'results': results, 'count': len(results), 'query': q}) + + +@netsyms_bp.route('/api/netsyms/health') +def api_netsyms_health(): + return jsonify(netsyms.health()) + + +@geocode_bp.route('/api/geocode') +def api_geocode(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + result = nav_tools.geocode(q) + if result is None: + return jsonify({'error': 'No results', 'query': q}), 404 + + return jsonify(result) diff --git a/lib/netsyms_test.py b/lib/netsyms_test.py new file mode 100644 index 0000000..ed70472 --- /dev/null +++ b/lib/netsyms_test.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Tests for Netsyms address database module.""" + +import sys +import os + +# Ensure the lib directory is importable +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import netsyms + + +def test_lookup_by_street_lowercase(): + results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}" + print(" PASS: lookup_by_street (lowercase)") + + +def test_lookup_by_street_uppercase(): + results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + print(" PASS: lookup_by_street (uppercase)") + + +def test_lookup_nonexistent(): + results = netsyms.lookup_by_street("999999", "Nonexistent Rd", + city="Filer", state="ID") + assert results == [], f"Expected empty list, got {len(results)} results" + print(" PASS: lookup_by_street (nonexistent)") + + +def test_free_text_with_commas(): + results = netsyms.lookup_free_text("214 North St, Filer, ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['city'] == 'FILER', f"City mismatch: {r['city']}" + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (commas)") + + +def test_free_text_no_commas(): + results = netsyms.lookup_free_text("214 North St Filer ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (no commas)") + + +def test_lookup_by_zipcode(): + results = netsyms.lookup_by_zipcode("83328", limit=5) + assert len(results) == 5, f"Expected 5 results, got {len(results)}" + for r in results: + assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}" + print(" PASS: lookup_by_zipcode") + + +def test_health(): + h = netsyms.health() + assert h['ok'] is True, f"Health not OK: {h}" + assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}" + assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}" + assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}" + print(" PASS: health") + + +if __name__ == '__main__': + print("Running Netsyms tests...") + test_lookup_by_street_lowercase() + test_lookup_by_street_uppercase() + test_lookup_nonexistent() + test_free_text_with_commas() + test_free_text_no_commas() + test_lookup_by_zipcode() + test_health() + print("All tests passed.") From a14501347b6d3b9a0b79d3d7d60a8e43a1e31ee9 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 07:54:32 +0000 Subject: [PATCH 06/72] fix(navi): address book prefix+boundary match for longer queries lookup() previously did exact-alias-only matching, so "214 north st filer" missed the home entry with alias "214 north st". Extend to match when the query begins with an alias followed by a word boundary, and when an alias appears as a contiguous token sequence inside the query. Short aliases ("home") keep matching exactly and also match with trailing text. Fixes the UX case where typing a known full address falls through to Netsyms instead of short-circuiting to address_book. Co-Authored-By: Claude Opus 4.6 --- lib/address_book.py | 80 +++++++++++++++++++++++++++------------- lib/address_book_test.py | 34 +++++++++++++++++ 2 files changed, 88 insertions(+), 26 deletions(-) diff --git a/lib/address_book.py b/lib/address_book.py index a9cfc40..f9827f6 100644 --- a/lib/address_book.py +++ b/lib/address_book.py @@ -8,6 +8,7 @@ Config: /opt/recon/config/address_book.yaml """ import os +import re import threading import yaml @@ -79,49 +80,76 @@ def load(): return _entries +def _normalize(text: str) -> str: + """Lowercase, strip, remove commas, collapse whitespace.""" + t = text.strip().lower() + t = t.replace(',', ' ') + return ' '.join(t.split()) + + def lookup(query: str): """ Look up a query against name and aliases. Returns dict with the matching entry plus a 'confidence' field: - - "exact": full name or alias match - - "partial": query is a substring of an alias or name (or vice versa) + - "exact": full name/alias match, OR query starts with alias + word boundary + - "partial": alias starts with query + word boundary, or alias appears + as a contiguous token sequence inside the query - None if no match + + Matching order (first exact wins, else first partial): + 1. normalized(query) == normalized(name or alias) → exact + 2. normalized(query) starts with normalized(alias) + " " → exact + 3. normalized(alias) starts with normalized(query) + " " → partial + 4. normalized(alias) is a contiguous token sub-sequence → partial """ _reload_if_changed() - q = query.strip().lower() + q = _normalize(query) if not q: return None - best = None - best_confidence = None + first_exact = None + first_partial = None for entry in _entries: - # Exact match on name - if q == entry['name'].lower(): - return {**entry, 'confidence': 'exact'} + norm_name = _normalize(entry['name']) + check_aliases = [_normalize(a) for a in entry.get('aliases', [])] + all_forms = [norm_name] + check_aliases - # Exact match on any alias - if q in entry['aliases']: - return {**entry, 'confidence': 'exact'} + for form in all_forms: + if not form: + continue - # Partial: query is substring of name/alias, or name/alias is substring of query - name_lower = entry['name'].lower() - if q in name_lower or name_lower in q: - if best is None: - best = entry - best_confidence = 'partial' - continue + # Rule 1: exact match + if q == form: + return {**entry, 'confidence': 'exact'} - for alias in entry['aliases']: - if q in alias or alias in q: - if best is None: - best = entry - best_confidence = 'partial' - break + # Rule 2: query starts with alias + word boundary + if q.startswith(form + ' '): + if first_exact is None: + first_exact = entry + continue - if best is not None: - return {**best, 'confidence': best_confidence} + # Rule 3: alias starts with query (user still typing) + if form.startswith(q) and len(q) < len(form): + if first_partial is None: + first_partial = entry + continue + + # Rule 4: alias is contiguous token sub-sequence in query + # Build regex: token1\s+token2\s+...tokenN + tokens = form.split() + if len(tokens) >= 1: + pattern = r'(?:^|\s)' + r'\s+'.join(re.escape(t) for t in tokens) + r'(?:\s|$)' + if re.search(pattern, q): + if first_partial is None: + first_partial = entry + + if first_exact is not None: + return {**first_exact, 'confidence': 'exact'} + + if first_partial is not None: + return {**first_partial, 'confidence': 'partial'} return None diff --git a/lib/address_book_test.py b/lib/address_book_test.py index e7fa7ef..75905f0 100644 --- a/lib/address_book_test.py +++ b/lib/address_book_test.py @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from lib import address_book TESTS = [ + # ── Existing tests ── ("lookup('home') → exact", lambda: address_book.lookup("home"), lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), @@ -32,6 +33,39 @@ TESTS = [ ("list_all() → 1 entry", lambda: address_book.list_all(), lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'), + + # ── New prefix+boundary tests ── + ("lookup('214 north st filer') → exact (query starts with alias)", + lambda: address_book.lookup("214 north st filer"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North St Filer ID') → exact (case + trailing state)", + lambda: address_book.lookup("214 North St Filer ID"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st, filer, id') → exact (commas stripped)", + lambda: address_book.lookup("214 north st, filer, id"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('home today') → exact (short alias + trailing text)", + lambda: address_book.lookup("home today"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214') → partial (query is prefix of alias)", + lambda: address_book.lookup("214"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('214 n') → partial (partial prefix of alias)", + lambda: address_book.lookup("214 n"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('completely unrelated query') → None", + lambda: address_book.lookup("completely unrelated query"), + lambda r: r is None), + + ("lookup('214 north streets of filer') → None (no word boundary after st)", + lambda: address_book.lookup("214 north streets of filer"), + lambda r: r is None), ] passed = 0 From c76d63b785399a9ed7741e5c891aa133b925ce07 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 15:48:03 +0000 Subject: [PATCH 07/72] refactor(navi): Photon-first geocoding with ranked results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inverts the /api/geocode chain. Photon is now the primary search engine; the hand-rolled Netsyms free-text parser is removed. Address book short-circuits nicknames only ("home", "work") — full-address queries flow through Photon and address book entries within 75m annotate matching results with labeled_as. Coordinate strings detected before search. Response shape: /api/geocode now returns a ranked candidates list (always 200 OK, empty list if no match). No more 404 for unmatched queries. Users can type messy input — wrong case, missing punctuation, abbreviations, typos — and get results or close matches. Netsyms preserved at /api/netsyms/lookup for direct access. USPS plus4 enrichment of Photon street-address hits is a planned follow-up. Co-Authored-By: Claude Opus 4.6 --- lib/geocode_test.py | 158 +++++++++++++++++++++ lib/nav_tools.py | 335 ++++++++++++++++++++++++++------------------ lib/netsyms_api.py | 30 ++-- 3 files changed, 380 insertions(+), 143 deletions(-) create mode 100644 lib/geocode_test.py diff --git a/lib/geocode_test.py b/lib/geocode_test.py new file mode 100644 index 0000000..ab26e55 --- /dev/null +++ b/lib/geocode_test.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +"""Tests for RECON Photon-first geocode chain.""" +import sys +import os +import json +import urllib.request +import urllib.parse + +BASE = "http://localhost:8420" + +TESTS = [ + { + "name": "home → nickname short-circuit", + "query": "home", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "address_book" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "nickname" + ), + }, + { + "name": "214 north st filer → photon results (multi-word, not nickname)", + "query": "214 north st filer", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + # labeled_as=Home may or may not appear depending on Photon's + # geocoding precision — the key invariant is that this multi-word + # query flows through Photon, not the address book shortcut. + ), + }, + { + "name": "214 North St, Filer, ID → photon (case/punctuation)", + "query": "214 North St, Filer, ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "214 NORTH ST FILER ID → photon (uppercase)", + "query": "214 NORTH ST FILER ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "1600 Pennsylvania Ave Washington DC → White House", + "query": "1600 Pennsylvania Ave Washington DC", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + ), + }, + { + "name": "1600 pennsylvania ave washington dc → lowercase", + "query": "1600 pennsylvania ave washington dc", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "starbucks filer → POI result", + "query": "starbucks filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "filer idaho → locality", + "query": "filer idaho", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + and r["results"][0]["type"] == "locality" + ), + }, + { + "name": "filer → partial query, at least 1 result", + "query": "filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "42.5736, -114.6066 → coordinates (with space)", + "query": "42.5736, -114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "coordinates" + ), + }, + { + "name": "42.5736,-114.6066 → coordinates (no space)", + "query": "42.5736,-114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + ), + }, + { + "name": "boise → at least 1 result", + "query": "boise", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "toronto → CA canary", + "query": "toronto", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "asdfghjklqwerty → empty results, 200 OK", + "query": "asdfghjklqwerty", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, + { + "name": "empty query → empty results", + "query": "", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, +] + +passed = 0 +failed = 0 + +for t in TESTS: + q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q=" + url = f"{BASE}/api/geocode?{q}" + try: + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=10) as resp: + status = resp.status + body = json.loads(resp.read()) + except urllib.error.HTTPError as e: + status = e.code + try: + body = json.loads(e.read()) + except Exception: + body = {} + except Exception as e: + status = 0 + body = {} + print(f" [FAIL] {t['name']}") + print(f" EXCEPTION: {e}") + failed += 1 + continue + + ok = status == 200 and t["check"](body) + tag = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + + top = body.get("results", [{}])[0] if body.get("results") else {} + top_summary = f"source={top.get('source','—')} type={top.get('type','—')} conf={top.get('confidence','—')} name={top.get('name','—')[:50]}" + print(f" [{tag}] {t['name']}") + if not ok: + print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}") + else: + labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else "" + print(f" → {top_summary}{labeled}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/nav_tools.py b/lib/nav_tools.py index 6d7d343..839c5d8 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -1,5 +1,6 @@ """Navigation tools: geocoding via Photon and routing via Valhalla.""" +import math import re import requests @@ -10,178 +11,242 @@ logger = setup_logging('recon.nav_tools') PHOTON_URL = "http://localhost:2322" VALHALLA_URL = "http://localhost:8002" -_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$') +# Regional bias for Photon searches (Idaho-centric for Matt's use case). +# Adjustable — Photon uses these to rank nearby results higher. +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 + +# Distance threshold (meters) for annotating Photon results with address +# book labels. 75m covers GPS jitter + geocoder imprecision. +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# Coordinate regex — handles comma-separated and space-separated forms. +_COORD_RE = re.compile( + r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$' +) VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"} def _parse_coords(text: str): - """Return (lat, lon) if text looks like coordinates, else None.""" + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" m = _COORD_RE.match(text.strip()) - if m: - return float(m.group(1)), float(m.group(2)) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon return None +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters between two (lat, lon) points.""" + R = 6_371_000 # Earth radius in meters + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _classify_photon_feature(props, index): + """Classify a Photon feature into (type, confidence).""" + osm_key = props.get('osm_key', '') + osm_value = props.get('osm_value', '') + feat_type = props.get('type', '') + has_housenumber = bool(props.get('housenumber')) + + # Type classification + if has_housenumber or osm_value in ('house', 'residential'): + result_type = 'street_address' + elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): + result_type = 'locality' + elif osm_key in ('amenity', 'shop', 'tourism', 'leisure') or osm_value: + result_type = 'poi' + else: + result_type = 'poi' + + # Confidence — simple positional heuristic + if index == 0: + confidence = 'high' + elif index <= 2: + confidence = 'medium' + else: + confidence = 'low' + + return result_type, confidence + + +def _photon_feature_to_name(props): + """Build a display name from a Photon feature's properties.""" + parts = [] + housenumber = props.get('housenumber') + street = props.get('street') + name = props.get('name', '') + + if housenumber and street: + parts.append(f"{housenumber} {street}") + if name and name != street: + parts.append(name) + elif name: + parts.append(name) + elif street: + parts.append(street) + + for key in ('city', 'county', 'state', 'country'): + v = props.get(key) + if v and (not parts or v != parts[-1]): + parts.append(v) + + return ', '.join(p for p in parts if p) or 'Unknown' + + +def _annotate_with_address_book(results): + """Add labeled_as to results within ADDRESS_BOOK_ANNOTATION_RADIUS_M of an address book entry.""" + try: + from . import address_book + entries = address_book.load() + except Exception: + return + + for result in results: + rlat, rlon = result.get('lat'), result.get('lon') + if rlat is None or rlon is None: + continue + for entry in entries: + elat, elon = entry.get('lat'), entry.get('lon') + if elat is None or elon is None: + continue + dist = _haversine_m(rlat, rlon, elat, elon) + if dist <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: + result['labeled_as'] = entry['name'] + break + + def _geocode(query: str): - """Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises.""" - coords = _parse_coords(query) - if coords: - return coords[0], coords[1], query + """Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises. - # ── Address book lookup (before Photon) ── - try: - from . import address_book - match = address_book.lookup(query) - if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'): - logger.info("Address book exact match: %r → %s (%s, %s)", - query, match['name'], match['lat'], match['lon']) - return match['lat'], match['lon'], match.get('address') or match['name'] - elif match and match['confidence'] == 'partial': - logger.info("Address book partial match: %r → %s (falling through to Photon)", - query, match['name']) - except Exception as e: - logger.debug("Address book lookup failed: %s", e) - - # ── Photon geocoding ── - try: - resp = requests.get( - f"{PHOTON_URL}/api", - params={"q": query, "limit": 1}, - timeout=10, - ) - resp.raise_for_status() - except requests.RequestException: - raise RuntimeError("Navigation service unavailable") - - data = resp.json() - features = data.get("features", []) - if not features: + Used internally by route() — returns a simple (lat, lon, name) tuple. + For the full ranked-results API, use geocode() instead. + """ + result = geocode(query, limit=1) + results = result.get('results', []) + if not results: raise ValueError(f"Could not find location: {query}") - - props = features[0]["properties"] - coords = features[0]["geometry"]["coordinates"] # [lon, lat] - parts = [props.get("name", "")] - for key in ("city", "county", "state", "country"): - v = props.get(key) - if v and v != parts[-1]: - parts.append(v) - display = ", ".join(p for p in parts if p) - return coords[1], coords[0], display # lat, lon + top = results[0] + return top['lat'], top['lon'], top['name'] -def geocode(query: str): +def geocode(query: str, limit: int = 10): """ - Three-tier geocode chain returning a consistent shape. + Photon-first geocoding with ranked results. - Chain: address_book (exact) → netsyms → photon. - Returns dict with {name, lat, lon, source, raw} or None. + Chain: + 1. Coordinate detection (pre-search) + 2. Address book nickname short-circuit (single-word queries only) + 3. Photon search (primary, biased to Idaho region) + 4. Address book proximity annotation (post-Photon, 75m radius) + + Returns dict: {query, results: [...], count: N} + Always 200-safe — empty results list is valid, never raises. + + Netsyms is preserved at /api/netsyms/lookup for direct structured + access. Enrichment of Photon street-address hits with USPS plus4 + from Netsyms is a planned follow-up (not wired here). """ - coords = _parse_coords(query) + limit = max(1, min(limit, 20)) + q = (query or '').strip() + empty = {'query': q, 'results': [], 'count': 0} + + if not q: + return empty + + # ── 1. Coordinate detection ── + coords = _parse_coords(q) if coords: return { - 'name': query, - 'lat': coords[0], - 'lon': coords[1], - 'source': 'coordinates', - 'raw': None, + 'query': q, + 'results': [{ + 'name': q, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'confidence': 'exact', + 'type': 'coordinates', + 'raw': None, + }], + 'count': 1, } - # ── Tier 1: Address book (exact match only) ── - ab_partial = None + # ── 2. Address book nickname short-circuit ── + # Only short-circuit on single-word queries ("home", "work"). + # Multi-word queries fall through to Photon for proper ranking. + normalized_q = ' '.join(q.lower().replace(',', ' ').split()) + is_single_word = ' ' not in normalized_q try: from . import address_book - match = address_book.lookup(query) - if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'): - logger.info("geocode: address_book exact match: %r → %s", query, match['name']) + ab_match = address_book.lookup(q) + if (ab_match + and ab_match['confidence'] == 'exact' + and ab_match.get('lat') and ab_match.get('lon') + and is_single_word): + logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) return { - 'name': match.get('address') or match['name'], - 'lat': match['lat'], - 'lon': match['lon'], - 'source': 'address_book', - 'raw': match, + 'query': q, + 'results': [{ + 'name': ab_match.get('address') or ab_match['name'], + 'lat': ab_match['lat'], + 'lon': ab_match['lon'], + 'source': 'address_book', + 'confidence': 'exact', + 'type': 'nickname', + 'raw': ab_match, + }], + 'count': 1, } - elif match and match['confidence'] == 'partial': - logger.info("geocode: address_book partial match: %r → %s (continuing chain)", - query, match['name']) - ab_partial = match except Exception as e: logger.debug("geocode: address_book lookup failed: %s", e) - # ── Tier 2: Netsyms (159M US+CA addresses) ── - netsyms_result = None + # ── 3. Photon search (primary) ── + results = [] try: - from . import netsyms - results = netsyms.lookup_free_text(query) - if results: - # Prefer results with plus4 (more precise) - best = results[0] - for r in results: - if r.get('plus4') and not best.get('plus4'): - best = r - break - addr_parts = [best['number'], best['street']] - if best.get('street2'): - addr_parts.append(best['street2']) - addr_parts.extend([best['city'], best['state'], best['zipcode']]) - display = ' '.join(p for p in addr_parts if p) - netsyms_result = { - 'name': display, - 'lat': best['lat'], - 'lon': best['lon'], - 'source': 'netsyms', - 'raw': best, - } - logger.info("geocode: netsyms match: %r → %s", query, display) - return netsyms_result - except Exception as e: - logger.debug("geocode: netsyms lookup failed: %s", e) - - # ── Tier 3: Photon (global geocoding) ── - try: - resp = requests.get( - f"{PHOTON_URL}/api", - params={"q": query, "limit": 1}, - timeout=2, - ) + params = { + 'q': q, + 'limit': limit, + 'lat': GEOCODE_BIAS_LAT, + 'lon': GEOCODE_BIAS_LON, + 'zoom': GEOCODE_BIAS_ZOOM, + } + resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) resp.raise_for_status() data = resp.json() - features = data.get("features", []) - if features: - props = features[0]["properties"] - coords = features[0]["geometry"]["coordinates"] # [lon, lat] - parts = [props.get("name", "")] - for key in ("city", "county", "state", "country"): - v = props.get(key) - if v and v != parts[-1]: - parts.append(v) - display = ", ".join(p for p in parts if p) - logger.info("geocode: photon match: %r → %s", query, display) - return { - 'name': display, - 'lat': coords[1], - 'lon': coords[0], + + for i, feature in enumerate(data.get('features', [])): + props = feature.get('properties', {}) + geom_coords = feature.get('geometry', {}).get('coordinates', [0, 0]) + result_type, confidence = _classify_photon_feature(props, i) + name = _photon_feature_to_name(props) + results.append({ + 'name': name, + 'lat': geom_coords[1], + 'lon': geom_coords[0], 'source': 'photon', + 'confidence': confidence, + 'type': result_type, 'raw': props, - } + }) + except requests.RequestException as e: + logger.warning("geocode: Photon request failed: %s", e) except Exception as e: - logger.debug("geocode: photon lookup failed: %s", e) + logger.warning("geocode: Photon parse error: %s", e) - # ── Fallback: address book partial match ── - if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'): - logger.info("geocode: falling back to address_book partial: %r → %s", - query, ab_partial['name']) - return { - 'name': ab_partial.get('address') or ab_partial['name'], - 'lat': ab_partial['lat'], - 'lon': ab_partial['lon'], - 'source': 'address_book', - 'raw': ab_partial, - } + # ── 4. Address book annotation (post-Photon) ── + _annotate_with_address_book(results) - logger.info("geocode: no match for %r across all tiers", query) - return None + logger.info("geocode: %r → %d results", q, len(results)) + return {'query': q, 'results': results, 'count': len(results)} def reverse_geocode(lat: float, lon: float) -> str: diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 2d6e43d..09e0d4a 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -1,9 +1,9 @@ """ -RECON Netsyms API + Geocode chain — Flask Blueprints. +RECON Netsyms API + Geocode — Flask Blueprints. GET /api/netsyms/lookup?q=&country= GET /api/netsyms/health -GET /api/geocode?q= (full 3-tier chain: address_book → netsyms → photon) +GET /api/geocode?q=&limit= (Photon-first search with ranked results) """ from flask import Blueprint, request, jsonify @@ -37,12 +37,26 @@ def api_netsyms_health(): @geocode_bp.route('/api/geocode') def api_geocode(): + """ + Photon-first geocoding with ranked candidates. + + GET /api/geocode?q=&limit= + + Always returns 200 OK with: + {query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count} + + - source: "address_book" | "coordinates" | "photon" + - confidence: "exact" | "high" | "medium" | "low" + - type: "nickname" | "coordinates" | "street_address" | "poi" | "locality" + - labeled_as: present when result is within 75m of an address book entry + - Empty results array is valid (no match). No 404s. + """ q = request.args.get('q', '').strip() - if not q: - return jsonify({'error': 'Missing q parameter'}), 400 - - result = nav_tools.geocode(q) - if result is None: - return jsonify({'error': 'No results', 'query': q}), 404 + limit = request.args.get('limit', '10') + try: + limit = max(1, min(int(limit), 20)) + except (ValueError, TypeError): + limit = 10 + result = nav_tools.geocode(q, limit=limit) return jsonify(result) From 87b230dcba5d7cc2b0791896d0ce682e4e70d416 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 16:29:47 +0000 Subject: [PATCH 08/72] feat(navi): structured geocode with usaddress parsing and reranker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add lib/geocode.py — multi-source retrieval pipeline: - usaddress CRF parsing with intent classification - Netsyms structured lookup (uses raw street abbreviations) - Photon /structured + /api freetext retrieval - Weighted 10-signal reranker (housenumber, street fuzz, locality, source authority, etc.) - match_code annotations + address book proximity labeling - Trace log at /tmp/geocode_rerank_trace.log nav_tools.py now delegates geocode() to the new module. Tests updated: US address queries correctly return Netsyms results. Co-Authored-By: Claude Opus 4.6 --- lib/geocode.py | 708 ++++++++++++++++++++++++++++++++++++++++++++ lib/geocode_test.py | 17 +- lib/nav_tools.py | 193 +----------- 3 files changed, 721 insertions(+), 197 deletions(-) create mode 100644 lib/geocode.py diff --git a/lib/geocode.py b/lib/geocode.py new file mode 100644 index 0000000..21a2403 --- /dev/null +++ b/lib/geocode.py @@ -0,0 +1,708 @@ +""" +RECON geocode — structured preprocessing, multi-source retrieval, reranking. + +Replaces the naive Photon-only search with: + 1. usaddress parsing + intent classification (ADDRESS / POI / LOCALITY / COORD / POSTCODE) + 2. Multi-source retrieval: ADDRESS → Netsyms + Photon; POI/LOCALITY → Photon /api + 3. Python reranker with weighted signals + +Public entry point: geocode(query, limit) → {query, results, count} +""" + +import math +import re +import logging + +import requests +import usaddress +from rapidfuzz import fuzz + +from .utils import setup_logging + +logger = setup_logging('recon.geocode') + +# ── Trace logger for reranking audit ── +_trace_logger = logging.getLogger('recon.geocode.trace') +_trace_handler = logging.FileHandler('/tmp/geocode_rerank_trace.log') +_trace_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s')) +_trace_logger.addHandler(_trace_handler) +_trace_logger.setLevel(logging.DEBUG) + +# ── Config constants ── +PHOTON_URL = "http://localhost:2322" +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# ── Reranker weights ── +# Derived from research analysis of failure modes: +# housenumber_exact is the strongest signal because Photon's soft-boost +# lets wrong-number results bubble up. street_name_fuzz and locality_fuzz +# handle abbreviation/case variation. source_authority gives Netsyms a +# boost for US addresses since it has USPS-verified data. +W_HOUSENUMBER_EXACT = 6.0 # exact housenumber match +W_HOUSENUMBER_MISMATCH = -5.0 # housenumber present but wrong +W_STREET_NAME_FUZZ = 3.0 # fuzzy street name similarity [0..1] * weight +W_TOKEN_COVERAGE = 2.0 # fraction of query tokens found in result +W_STREET_TYPE_MATCH = 1.5 # "st" matches "street", etc. +W_LOCALITY_FUZZ = 2.0 # city/state fuzzy match +W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses +W_LAYER_RANK = 1.0 # type-appropriate results ranked higher +W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position) +W_STATE_EXACT = 1.0 # exact state code match + +# ── US abbreviation expansions ── +# Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals. +_STREET_TYPE_ABBREVS = { + 'st': 'street', 'ave': 'avenue', 'blvd': 'boulevard', 'dr': 'drive', + 'rd': 'road', 'ln': 'lane', 'ct': 'court', 'cir': 'circle', + 'pl': 'place', 'way': 'way', 'pkwy': 'parkway', 'hwy': 'highway', + 'trl': 'trail', 'ter': 'terrace', 'sq': 'square', +} +_DIRECTIONAL_ABBREVS = { + 'n': 'north', 's': 'south', 'e': 'east', 'w': 'west', + 'ne': 'northeast', 'nw': 'northwest', 'se': 'southeast', 'sw': 'southwest', +} +_ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE) + +# ── US state codes ── +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC', +} + +# Coordinate regex +_COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$') + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 1: PREPROCESSING +# ═══════════════════════════════════════════════════════════════════ + +def _parse_coords(text): + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" + m = _COORD_RE.match(text.strip()) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon + return None + + +def _classify_and_parse(query): + """ + Parse query with usaddress, classify intent, expand abbreviations. + + Returns (intent, parsed_dict) where: + intent: 'ADDRESS' | 'POI' | 'LOCALITY' | 'POSTCODE' | 'COORD' | 'UNKNOWN' + parsed_dict: {number, street, city, state, zipcode, raw_query, expanded_query} + """ + q = query.strip() + parsed = { + 'number': None, 'street': None, 'street_raw': None, + 'city': None, 'state': None, + 'zipcode': None, 'raw_query': q, 'expanded_query': q, + } + + # Coordinate check first + if _parse_coords(q): + return 'COORD', parsed + + # Try usaddress + try: + tagged, addr_type = usaddress.tag(q) + except usaddress.RepeatedLabelError: + # Ambiguous input — fall back to free-text Photon + return 'UNKNOWN', parsed + + # Extract components + number = tagged.get('AddressNumber', '').strip() + street_name = tagged.get('StreetName', '').strip() + street_pre_dir = tagged.get('StreetNamePreDirectional', '').strip() + street_post_type = tagged.get('StreetNamePostType', '').strip() + place = tagged.get('PlaceName', '').strip() + state = tagged.get('StateName', '').strip() + zipcode = tagged.get('ZipCode', '').strip() + + # ── Fix usaddress edge case: "214 N St Filer" ── + # usaddress reads single-letter directional + "St" as PreDirectional + empty, + # mashing "St Filer" into StreetName. Detect: PreDirectional is single letter, + # StreetName has 2+ tokens where the first is a street type. + if (street_pre_dir and len(street_pre_dir) <= 2 + and not street_name.strip().startswith(street_pre_dir) + and ' ' in street_name): + name_tokens = street_name.split() + first_lower = name_tokens[0].lower() + if first_lower in _STREET_TYPE_ABBREVS or first_lower in _STREET_TYPE_ABBREVS.values(): + # "N" is actually the street name, "St" is the post-type + street_name = street_pre_dir + street_post_type = name_tokens[0] + if len(name_tokens) > 1: + place = ' '.join(name_tokens[1:]) + street_pre_dir = '' + + # ── Expand abbreviations (guard ordinals) ── + expanded_parts = [] + + if number: + parsed['number'] = number + expanded_parts.append(number) + + if street_pre_dir: + exp = _DIRECTIONAL_ABBREVS.get(street_pre_dir.lower(), street_pre_dir) + expanded_parts.append(exp) + + if street_name: + # Don't expand ordinals: "21st" stays "21st" + if _ORDINAL_RE.match(street_name): + expanded_parts.append(street_name) + else: + # Expand directional abbreviation if it IS the street name + exp = _DIRECTIONAL_ABBREVS.get(street_name.lower(), street_name) + expanded_parts.append(exp) + parsed['street'] = street_name + + if street_post_type: + if _ORDINAL_RE.match(street_post_type): + expanded_parts.append(street_post_type) + else: + exp = _STREET_TYPE_ABBREVS.get(street_post_type.lower(), street_post_type) + expanded_parts.append(exp) + + # Build raw street (original abbreviations, for Netsyms) and expanded (for Photon) + raw_street_parts = [] + if street_pre_dir: + raw_street_parts.append(street_pre_dir) + if street_name: + raw_street_parts.append(street_name) + if street_post_type: + raw_street_parts.append(street_post_type) + parsed['street_raw'] = ' '.join(raw_street_parts) + + # Build the full expanded street + if expanded_parts: + # The street is everything after the number + street_full = ' '.join(expanded_parts[1:] if number else expanded_parts) + parsed['street'] = street_full + + if place: + parsed['city'] = place + expanded_parts.append(place) + if state: + parsed['state'] = state.upper() + expanded_parts.append(state) + if zipcode: + parsed['zipcode'] = zipcode + expanded_parts.append(zipcode) + + parsed['expanded_query'] = ' '.join(expanded_parts) + + # ── Intent classification ── + if addr_type == 'Street Address' and number: + return 'ADDRESS', parsed + elif zipcode and not number and not street_name: + return 'POSTCODE', parsed + elif addr_type == 'Ambiguous': + # Check if it looks like a locality: 2 tokens, second is a state code + tokens = q.replace(',', ' ').split() + if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = tokens[-1].upper() + return 'LOCALITY', parsed + return 'UNKNOWN', parsed + else: + return 'UNKNOWN', parsed + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 2: RETRIEVAL +# ═══════════════════════════════════════════════════════════════════ + +def _retrieve_netsyms(parsed, limit=10): + """Query Netsyms for structured address lookup. Returns list of candidate dicts.""" + try: + from . import netsyms + except Exception: + return [] + + results = [] + number = parsed.get('number', '') + street = parsed.get('street_raw') or parsed.get('street', '') + city = parsed.get('city', '') + state = parsed.get('state', '') + zipcode = parsed.get('zipcode', '') + + if number and street: + rows = netsyms.lookup_by_street( + number, street, city=city, state=state, zipcode=zipcode, limit=limit + ) + elif zipcode: + rows = netsyms.lookup_by_zipcode(zipcode, limit=limit) + else: + return [] + + for row in rows: + addr_parts = [row['number'], row['street']] + if row.get('street2'): + addr_parts.append(row['street2']) + addr_parts.extend([row['city'], row['state'], row['zipcode']]) + display = ' '.join(p for p in addr_parts if p) + results.append({ + 'name': display, + 'lat': row['lat'], + 'lon': row['lon'], + 'source': 'netsyms', + 'type': 'street_address', + 'raw': row, + '_number': row.get('number', ''), + '_street': row.get('street', ''), + '_city': row.get('city', ''), + '_state': row.get('state', ''), + }) + return results + + +def _retrieve_photon_structured(parsed, limit=10): + """Query Photon /structured endpoint for address lookup.""" + params = {'limit': limit, 'countrycode': 'US'} + if parsed.get('street'): + params['street'] = parsed['street'] + if parsed.get('number'): + params['housenumber'] = parsed['number'] + if parsed.get('city'): + params['city'] = parsed['city'] + if parsed.get('state'): + params['state'] = parsed['state'] + + if 'street' not in params: + return [] + + try: + resp = requests.get(f"{PHOTON_URL}/structured", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + logger.debug("Photon /structured failed: %s", e) + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _retrieve_photon_freetext(query, limit=10): + """Query Photon /api for free-text search with location bias.""" + try: + params = { + 'q': query, + 'limit': limit, + 'lat': GEOCODE_BIAS_LAT, + 'lon': GEOCODE_BIAS_LON, + 'zoom': GEOCODE_BIAS_ZOOM, + } + resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + logger.debug("Photon /api failed: %s", e) + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _parse_photon_features(features, source): + """Convert Photon GeoJSON features to candidate dicts.""" + results = [] + for i, feature in enumerate(features): + props = feature.get('properties', {}) + coords = feature.get('geometry', {}).get('coordinates', [0, 0]) + + osm_key = props.get('osm_key', '') + osm_value = props.get('osm_value', '') + feat_type = props.get('type', '') + has_hn = bool(props.get('housenumber')) + + if has_hn or osm_value in ('house', 'residential'): + rtype = 'street_address' + elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): + rtype = 'locality' + elif osm_key in ('amenity', 'shop', 'tourism', 'leisure'): + rtype = 'poi' + else: + rtype = 'poi' + + # Build display name + parts = [] + hn = props.get('housenumber') + street = props.get('street') + name = props.get('name', '') + if hn and street: + parts.append(f"{hn} {street}") + if name and name != street: + parts.append(name) + elif name: + parts.append(name) + elif street: + parts.append(street) + for key in ('city', 'county', 'state', 'country'): + v = props.get(key) + if v and (not parts or v != parts[-1]): + parts.append(v) + display = ', '.join(p for p in parts if p) or 'Unknown' + + results.append({ + 'name': display, + 'lat': coords[1], + 'lon': coords[0], + 'source': source, + 'type': rtype, + 'raw': props, + '_photon_rank': i, + '_number': props.get('housenumber', ''), + '_street': props.get('street', ''), + '_city': props.get('city', ''), + '_state': props.get('state', ''), + }) + return results + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 3: RERANKER +# ═══════════════════════════════════════════════════════════════════ + +def _expand_street_type(s): + """Expand a street type abbreviation for comparison.""" + return _STREET_TYPE_ABBREVS.get(s.lower(), s.lower()) + + +def _score_candidate(candidate, parsed, intent): + """ + Score a candidate against the parsed query. + Returns (total_score, signal_breakdown_dict). + """ + signals = {} + total = 0.0 + + query_number = (parsed.get('number') or '').strip().upper() + query_street = (parsed.get('street') or '').strip().upper() + query_city = (parsed.get('city') or '').strip().upper() + query_state = (parsed.get('state') or '').strip().upper() + + cand_number = (candidate.get('_number') or '').strip().upper() + cand_street = (candidate.get('_street') or '').strip().upper() + cand_city = (candidate.get('_city') or '').strip().upper() + cand_state = (candidate.get('_state') or '').strip().upper() + + # ── Housenumber ── + if intent == 'ADDRESS' and query_number: + if cand_number == query_number: + signals['housenumber_exact'] = W_HOUSENUMBER_EXACT + total += W_HOUSENUMBER_EXACT + elif cand_number and cand_number != query_number: + signals['housenumber_mismatch'] = W_HOUSENUMBER_MISMATCH + total += W_HOUSENUMBER_MISMATCH + + # ── Street name fuzz ── + if query_street and cand_street: + # Expand both for comparison + q_expanded = ' '.join(_expand_street_type(t) for t in query_street.split()) + c_expanded = ' '.join(_expand_street_type(t) for t in cand_street.split()) + ratio = fuzz.token_sort_ratio(q_expanded, c_expanded) / 100.0 + score = ratio * W_STREET_NAME_FUZZ + signals['street_name_fuzz'] = round(score, 2) + total += score + + # ── Street type match ── + if query_street and cand_street: + q_tokens = set(_expand_street_type(t) for t in query_street.split()) + c_tokens = set(_expand_street_type(t) for t in cand_street.split()) + # Check if the street type words overlap + street_types = set(_STREET_TYPE_ABBREVS.values()) + q_types = q_tokens & street_types + c_types = c_tokens & street_types + if q_types and q_types & c_types: + signals['street_type_match'] = W_STREET_TYPE_MATCH + total += W_STREET_TYPE_MATCH + + # ── Token coverage ── + raw_q = parsed.get('raw_query', '').upper() + q_tokens = set(raw_q.replace(',', ' ').split()) + if q_tokens: + cand_text = candidate.get('name', '').upper() + matched = sum(1 for t in q_tokens if t in cand_text) + coverage = matched / len(q_tokens) + score = coverage * W_TOKEN_COVERAGE + signals['token_coverage'] = round(score, 2) + total += score + + # ── Locality fuzz ── + if query_city and cand_city: + ratio = fuzz.ratio(query_city, cand_city) / 100.0 + score = ratio * W_LOCALITY_FUZZ + signals['locality_fuzz'] = round(score, 2) + total += score + + # ── State exact ── + if query_state and cand_state: + if cand_state == query_state: + signals['state_exact'] = W_STATE_EXACT + total += W_STATE_EXACT + + # ── Source authority ── + if candidate.get('source') == 'netsyms' and intent == 'ADDRESS': + signals['source_authority'] = W_SOURCE_AUTHORITY + total += W_SOURCE_AUTHORITY + + # ── Layer rank (type-appropriate bonus) ── + cand_type = candidate.get('type', '') + if intent == 'ADDRESS' and cand_type == 'street_address': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'LOCALITY' and cand_type == 'locality': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'POI' and cand_type == 'poi': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + + # ── Photon position normalization ── + photon_rank = candidate.get('_photon_rank') + if photon_rank is not None: + # Top result gets full bonus, decays linearly + score = max(0, (1.0 - photon_rank / 10.0)) * W_PHOTON_POSITION_NORM + signals['photon_position'] = round(score, 2) + total += score + + return round(total, 2), signals + + +def _build_match_code(candidate, parsed, intent): + """Build a match_code dict indicating match quality for each field.""" + mc = {} + if intent == 'ADDRESS': + q_num = (parsed.get('number') or '').strip().upper() + c_num = (candidate.get('_number') or '').strip().upper() + if q_num and c_num == q_num: + mc['housenumber'] = 'matched' + elif q_num and c_num: + mc['housenumber'] = 'unmatched' + elif q_num and not c_num: + mc['housenumber'] = 'inferred' + + q_street = (parsed.get('street') or '').strip().upper() + c_street = (candidate.get('_street') or '').strip().upper() + if q_street and c_street: + q_exp = ' '.join(_expand_street_type(t) for t in q_street.split()) + c_exp = ' '.join(_expand_street_type(t) for t in c_street.split()) + ratio = fuzz.token_sort_ratio(q_exp, c_exp) / 100.0 + mc['street'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_street: + mc['street'] = 'inferred' + + q_city = (parsed.get('city') or '').strip().upper() + c_city = (candidate.get('_city') or '').strip().upper() + if q_city and c_city: + ratio = fuzz.ratio(q_city, c_city) / 100.0 + mc['city'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_city: + mc['city'] = 'inferred' + + return mc + + +def _rerank(candidates, parsed, intent, query, limit): + """Score, sort, and trim candidates. Trace-log top 3.""" + scored = [] + for c in candidates: + total, signals = _score_candidate(c, parsed, intent) + c['_score'] = total + c['_signals'] = signals + scored.append(c) + + scored.sort(key=lambda c: c['_score'], reverse=True) + + # Trace log for audit + _trace_logger.debug("─── Query: %r intent=%s ───", query, intent) + for i, c in enumerate(scored[:3]): + _trace_logger.debug( + " #%d score=%.2f src=%s name=%s", + i, c['_score'], c.get('source', '?'), c.get('name', '?')[:60] + ) + _trace_logger.debug(" signals=%s", c.get('_signals', {})) + + # Clean internal fields and add match_code + result = [] + for c in scored[:limit]: + mc = _build_match_code(c, parsed, intent) + + # Assign confidence from score + score = c.get('_score', 0) + if score >= 10: + confidence = 'exact' + elif score >= 5: + confidence = 'high' + elif score >= 2: + confidence = 'medium' + else: + confidence = 'low' + + entry = { + 'name': c['name'], + 'lat': c['lat'], + 'lon': c['lon'], + 'source': c['source'], + 'confidence': confidence, + 'type': c.get('type', 'poi'), + 'raw': c.get('raw'), + } + if mc: + entry['match_code'] = mc + result.append(entry) + + return result + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 4: ANNOTATION +# ═══════════════════════════════════════════════════════════════════ + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters.""" + R = 6_371_000 + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _annotate_with_address_book(results): + """Add labeled_as to results within radius of an address book entry.""" + try: + from . import address_book + entries = address_book.load() + except Exception: + return + for result in results: + rlat, rlon = result.get('lat'), result.get('lon') + if rlat is None or rlon is None: + continue + for entry in entries: + elat, elon = entry.get('lat'), entry.get('lon') + if elat is None or elon is None: + continue + if _haversine_m(rlat, rlon, elat, elon) <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: + result['labeled_as'] = entry['name'] + break + + +# ═══════════════════════════════════════════════════════════════════ +# PUBLIC API +# ═══════════════════════════════════════════════════════════════════ + +def geocode(query, limit=10): + """ + Structured geocoding with multi-source retrieval and reranking. + + Returns {query, results: [...], count} — always 200-safe. + """ + limit = max(1, min(limit, 20)) + q = (query or '').strip() + empty = {'query': q, 'results': [], 'count': 0} + + if not q: + return empty + + # ── Coordinate detection ── + coords = _parse_coords(q) + if coords: + return { + 'query': q, + 'results': [{ + 'name': q, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'confidence': 'exact', + 'type': 'coordinates', + 'raw': None, + }], + 'count': 1, + } + + # ── Address book nickname short-circuit ── + normalized_q = ' '.join(q.lower().replace(',', ' ').split()) + is_single_word = ' ' not in normalized_q + try: + from . import address_book + ab_match = address_book.lookup(q) + if (ab_match + and ab_match['confidence'] == 'exact' + and ab_match.get('lat') and ab_match.get('lon') + and is_single_word): + logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) + return { + 'query': q, + 'results': [{ + 'name': ab_match.get('address') or ab_match['name'], + 'lat': ab_match['lat'], + 'lon': ab_match['lon'], + 'source': 'address_book', + 'confidence': 'exact', + 'type': 'nickname', + 'raw': ab_match, + }], + 'count': 1, + } + except Exception as e: + logger.debug("geocode: address_book lookup failed: %s", e) + + # ── Classify intent + parse ── + intent, parsed = _classify_and_parse(q) + logger.debug("geocode: intent=%s parsed=%s", intent, parsed) + + # ── Retrieve candidates ── + candidates = [] + + if intent == 'ADDRESS': + # Parallel: Netsyms (structured) + Photon (freetext with expanded query) + netsyms_results = _retrieve_netsyms(parsed, limit=limit) + photon_results = _retrieve_photon_freetext( + parsed.get('expanded_query', q), limit=limit + ) + # Also try Photon /structured for addresses + photon_struct = _retrieve_photon_structured(parsed, limit=5) + candidates = netsyms_results + photon_results + photon_struct + + elif intent == 'POSTCODE': + netsyms_results = _retrieve_netsyms(parsed, limit=limit) + photon_results = _retrieve_photon_freetext(q, limit=limit) + candidates = netsyms_results + photon_results + + elif intent in ('LOCALITY', 'POI', 'UNKNOWN'): + candidates = _retrieve_photon_freetext(q, limit=limit) + + # ── Deduplicate by (lat, lon) proximity ── + deduped = [] + for c in candidates: + is_dup = False + for existing in deduped: + if (_haversine_m(c['lat'], c['lon'], existing['lat'], existing['lon']) < 50 + and c.get('source') == existing.get('source')): + is_dup = True + break + if not is_dup: + deduped.append(c) + candidates = deduped + + # ── Rerank ── + results = _rerank(candidates, parsed, intent, q, limit) + + # ── Address book annotation ── + _annotate_with_address_book(results) + + logger.info("geocode: %r → intent=%s, %d results", q, intent, len(results)) + return {'query': q, 'results': results, 'count': len(results)} diff --git a/lib/geocode_test.py b/lib/geocode_test.py index ab26e55..4717b1e 100644 --- a/lib/geocode_test.py +++ b/lib/geocode_test.py @@ -20,25 +20,24 @@ TESTS = [ ), }, { - "name": "214 north st filer → photon results (multi-word, not nickname)", + "name": "214 north st filer → netsyms exact match (multi-word, not nickname)", "query": "214 north st filer", "check": lambda r: ( r["count"] >= 1 - and r["results"][0]["source"] == "photon" - # labeled_as=Home may or may not appear depending on Photon's - # geocoding precision — the key invariant is that this multi-word - # query flows through Photon, not the address book shortcut. + and r["results"][0]["source"] == "netsyms" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "street_address" ), }, { - "name": "214 North St, Filer, ID → photon (case/punctuation)", + "name": "214 North St, Filer, ID → netsyms (case/punctuation)", "query": "214 North St, Filer, ID", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", }, { - "name": "214 NORTH ST FILER ID → photon (uppercase)", + "name": "214 NORTH ST FILER ID → netsyms (uppercase)", "query": "214 NORTH ST FILER ID", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", }, { "name": "1600 Pennsylvania Ave Washington DC → White House", diff --git a/lib/nav_tools.py b/lib/nav_tools.py index 839c5d8..2f91616 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -50,86 +50,14 @@ def _haversine_m(lat1, lon1, lat2, lon2): return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) -def _classify_photon_feature(props, index): - """Classify a Photon feature into (type, confidence).""" - osm_key = props.get('osm_key', '') - osm_value = props.get('osm_value', '') - feat_type = props.get('type', '') - has_housenumber = bool(props.get('housenumber')) - - # Type classification - if has_housenumber or osm_value in ('house', 'residential'): - result_type = 'street_address' - elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): - result_type = 'locality' - elif osm_key in ('amenity', 'shop', 'tourism', 'leisure') or osm_value: - result_type = 'poi' - else: - result_type = 'poi' - - # Confidence — simple positional heuristic - if index == 0: - confidence = 'high' - elif index <= 2: - confidence = 'medium' - else: - confidence = 'low' - - return result_type, confidence - - -def _photon_feature_to_name(props): - """Build a display name from a Photon feature's properties.""" - parts = [] - housenumber = props.get('housenumber') - street = props.get('street') - name = props.get('name', '') - - if housenumber and street: - parts.append(f"{housenumber} {street}") - if name and name != street: - parts.append(name) - elif name: - parts.append(name) - elif street: - parts.append(street) - - for key in ('city', 'county', 'state', 'country'): - v = props.get(key) - if v and (not parts or v != parts[-1]): - parts.append(v) - - return ', '.join(p for p in parts if p) or 'Unknown' - - -def _annotate_with_address_book(results): - """Add labeled_as to results within ADDRESS_BOOK_ANNOTATION_RADIUS_M of an address book entry.""" - try: - from . import address_book - entries = address_book.load() - except Exception: - return - - for result in results: - rlat, rlon = result.get('lat'), result.get('lon') - if rlat is None or rlon is None: - continue - for entry in entries: - elat, elon = entry.get('lat'), entry.get('lon') - if elat is None or elon is None: - continue - dist = _haversine_m(rlat, rlon, elat, elon) - if dist <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: - result['labeled_as'] = entry['name'] - break +def geocode(query: str, limit: int = 10): + """Delegate to the structured geocode module. See lib/geocode.py.""" + from . import geocode as geocode_mod + return geocode_mod.geocode(query, limit=limit) def _geocode(query: str): - """Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises. - - Used internally by route() — returns a simple (lat, lon, name) tuple. - For the full ranked-results API, use geocode() instead. - """ + """Internal: returns (lat, lon, display_name) tuple for route().""" result = geocode(query, limit=1) results = result.get('results', []) if not results: @@ -138,117 +66,6 @@ def _geocode(query: str): return top['lat'], top['lon'], top['name'] - -def geocode(query: str, limit: int = 10): - """ - Photon-first geocoding with ranked results. - - Chain: - 1. Coordinate detection (pre-search) - 2. Address book nickname short-circuit (single-word queries only) - 3. Photon search (primary, biased to Idaho region) - 4. Address book proximity annotation (post-Photon, 75m radius) - - Returns dict: {query, results: [...], count: N} - Always 200-safe — empty results list is valid, never raises. - - Netsyms is preserved at /api/netsyms/lookup for direct structured - access. Enrichment of Photon street-address hits with USPS plus4 - from Netsyms is a planned follow-up (not wired here). - """ - limit = max(1, min(limit, 20)) - q = (query or '').strip() - empty = {'query': q, 'results': [], 'count': 0} - - if not q: - return empty - - # ── 1. Coordinate detection ── - coords = _parse_coords(q) - if coords: - return { - 'query': q, - 'results': [{ - 'name': q, - 'lat': coords[0], - 'lon': coords[1], - 'source': 'coordinates', - 'confidence': 'exact', - 'type': 'coordinates', - 'raw': None, - }], - 'count': 1, - } - - # ── 2. Address book nickname short-circuit ── - # Only short-circuit on single-word queries ("home", "work"). - # Multi-word queries fall through to Photon for proper ranking. - normalized_q = ' '.join(q.lower().replace(',', ' ').split()) - is_single_word = ' ' not in normalized_q - try: - from . import address_book - ab_match = address_book.lookup(q) - if (ab_match - and ab_match['confidence'] == 'exact' - and ab_match.get('lat') and ab_match.get('lon') - and is_single_word): - logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) - return { - 'query': q, - 'results': [{ - 'name': ab_match.get('address') or ab_match['name'], - 'lat': ab_match['lat'], - 'lon': ab_match['lon'], - 'source': 'address_book', - 'confidence': 'exact', - 'type': 'nickname', - 'raw': ab_match, - }], - 'count': 1, - } - except Exception as e: - logger.debug("geocode: address_book lookup failed: %s", e) - - # ── 3. Photon search (primary) ── - results = [] - try: - params = { - 'q': q, - 'limit': limit, - 'lat': GEOCODE_BIAS_LAT, - 'lon': GEOCODE_BIAS_LON, - 'zoom': GEOCODE_BIAS_ZOOM, - } - resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) - resp.raise_for_status() - data = resp.json() - - for i, feature in enumerate(data.get('features', [])): - props = feature.get('properties', {}) - geom_coords = feature.get('geometry', {}).get('coordinates', [0, 0]) - result_type, confidence = _classify_photon_feature(props, i) - name = _photon_feature_to_name(props) - results.append({ - 'name': name, - 'lat': geom_coords[1], - 'lon': geom_coords[0], - 'source': 'photon', - 'confidence': confidence, - 'type': result_type, - 'raw': props, - }) - except requests.RequestException as e: - logger.warning("geocode: Photon request failed: %s", e) - except Exception as e: - logger.warning("geocode: Photon parse error: %s", e) - - # ── 4. Address book annotation (post-Photon) ── - _annotate_with_address_book(results) - - logger.info("geocode: %r → %d results", q, len(results)) - return {'query': q, 'results': results, 'count': len(results)} - - def reverse_geocode(lat: float, lon: float) -> str: """Reverse geocode coordinates via Photon. Returns formatted address string.""" try: From ac69e2761d620671686a6d9f729b273aba599287 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 21:26:35 +0000 Subject: [PATCH 09/72] feat(navi): add /api/reverse endpoint for map-click reverse geocoding Accepts lat/lon query params, calls Photon /reverse, returns same response shape as /api/geocode. Returns 200 with empty results on no match (graceful degradation for ocean/unmapped areas). Co-Authored-By: Claude Opus 4.6 --- lib/netsyms_api.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 09e0d4a..92c8b6e 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -60,3 +60,49 @@ def api_geocode(): result = nav_tools.geocode(q, limit=limit) return jsonify(result) + + +@geocode_bp.route('/api/reverse') +def api_reverse(): + """ + Reverse geocode coordinates via Photon. + + GET /api/reverse?lat=X&lon=Y + + Returns same shape as /api/geocode: + {query: "lat,lon", results: [{name, lat, lon, source, type, raw, ...}], count} + + Returns 200 OK with empty results on no match. 400 on invalid coords. + """ + try: + lat = float(request.args.get('lat', '')) + lon = float(request.args.get('lon', '')) + except (ValueError, TypeError): + return jsonify({'error': 'Missing or invalid lat/lon parameters'}), 400 + + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'Coordinates out of range'}), 400 + + query_str = f"{lat},{lon}" + + try: + import requests as http_requests + resp = http_requests.get( + "http://localhost:2322/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + data = resp.json() + features = data.get("features", []) + except Exception: + logger.warning("Photon reverse geocode failed for %s", query_str) + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + if not features: + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + from .geocode import _parse_photon_features + results = _parse_photon_features(features, source='photon_reverse') + + return jsonify({'query': query_str, 'results': results, 'count': len(results)}) From e6b81db52019e6acfe037d3837eb69b811e48e0e Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Apr 2026 23:35:39 +0000 Subject: [PATCH 10/72] feat(navi): deployment profiles + /api/config endpoint Add profile-driven config infrastructure: - config/profiles/{home,regional_pi,minimal_pi}.yaml templates - lib/deployment_config.py loader (reads RECON_PROFILE env var) - GET /api/config returns active profile as JSON (5min cache) Frontend reads this on startup to determine tile source, defaults, and feature flags. No existing behavior changed. Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 31 +++++++++++++++++++++ config/profiles/minimal_pi.yaml | 31 +++++++++++++++++++++ config/profiles/regional_pi.yaml | 31 +++++++++++++++++++++ lib/api.py | 10 +++++++ lib/deployment_config.py | 46 ++++++++++++++++++++++++++++++++ 5 files changed, 149 insertions(+) create mode 100644 config/profiles/home.yaml create mode 100644 config/profiles/minimal_pi.yaml create mode 100644 config/profiles/regional_pi.yaml create mode 100644 lib/deployment_config.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml new file mode 100644 index 0000000..e4eb3de --- /dev/null +++ b/config/profiles/home.yaml @@ -0,0 +1,31 @@ +# Deployment profile: Home (VM 1130) +# Active on the main Echo6 deployment. Full stack with planet-scale NA tiles. +# Override via RECON_PROFILE env var in /etc/systemd/system/recon.service + +profile: home +region_name: "North America" + +tileset: + url: "/tiles/na.pmtiles" + bounds: [-168, 14, -52, 72] + max_zoom: 15 + attribution: "Protomaps © OSM" + +services: + geocode: "/api/geocode" + reverse: "/api/reverse" + address_book: "/api/address_book" + valhalla: "/valhalla" + +features: + has_nominatim_details: false + has_kiwix_wiki: false + has_hillshade: false + has_3d_terrain: false + has_traffic_overlay: false + has_landclass: false + has_address_book_write: false + +defaults: + center: [42.5736, -114.6066] + zoom: 10 diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml new file mode 100644 index 0000000..5e26b59 --- /dev/null +++ b/config/profiles/minimal_pi.yaml @@ -0,0 +1,31 @@ +# Deployment profile: Minimal Pi (single-state pocket deployment) +# Template for the lightest possible field kit — Idaho only. +# Override via RECON_PROFILE env var. + +profile: minimal_pi +region_name: "Idaho" + +tileset: + url: "/tiles/idaho.pmtiles" + bounds: [-117.5, 42.0, -111.0, 49.0] + max_zoom: 15 + attribution: "Protomaps © OSM" + +services: + geocode: "/api/geocode" + reverse: "/api/reverse" + address_book: "/api/address_book" + valhalla: "/valhalla" + +features: + has_nominatim_details: false + has_kiwix_wiki: false + has_hillshade: false + has_3d_terrain: false + has_traffic_overlay: false + has_landclass: false + has_address_book_write: true + +defaults: + center: [44.0, -114.0] + zoom: 7 diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml new file mode 100644 index 0000000..e2b469c --- /dev/null +++ b/config/profiles/regional_pi.yaml @@ -0,0 +1,31 @@ +# Deployment profile: Regional Pi (multi-state field kit) +# Template for a Raspberry Pi covering Idaho + surrounding states. +# Override via RECON_PROFILE env var. + +profile: regional_pi +region_name: "Idaho + Neighbors" + +tileset: + url: "/tiles/regional.pmtiles" + bounds: [-125, 40, -104, 49] + max_zoom: 15 + attribution: "Protomaps © OSM" + +services: + geocode: "/api/geocode" + reverse: "/api/reverse" + address_book: "/api/address_book" + valhalla: "/valhalla" + +features: + has_nominatim_details: false + has_kiwix_wiki: false + has_hillshade: true + has_3d_terrain: false + has_traffic_overlay: false + has_landclass: true + has_address_book_write: true + +defaults: + center: [44.0, -114.0] + zoom: 7 diff --git a/lib/api.py b/lib/api.py index 7c54fe8..297a680 100644 --- a/lib/api.py +++ b/lib/api.py @@ -24,6 +24,7 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB +from .deployment_config import get_deployment_config logger = setup_logging('recon.api') @@ -1165,6 +1166,15 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) +@app.route('/api/config') +def api_config(): + """Return deployment profile config for frontend consumption.""" + config = get_deployment_config() + resp = jsonify(config) + resp.headers['Cache-Control'] = 'public, max-age=300' + return resp + + @app.route('/api/health') def api_health(): """Health check endpoint for monitoring.""" diff --git a/lib/deployment_config.py b/lib/deployment_config.py new file mode 100644 index 0000000..978b8a0 --- /dev/null +++ b/lib/deployment_config.py @@ -0,0 +1,46 @@ +""" +Deployment profile loader. + +Reads RECON_PROFILE env var (default: "home"), loads the matching YAML +from config/profiles/.yaml, and caches the parsed dict in memory. +Provides get_deployment_config() for use by the /api/config endpoint. +""" +import os +import yaml +from .utils import setup_logging + +logger = setup_logging('recon.deployment_config') + +_config_cache = None + + +def load_deployment_config(): + """Load and cache the deployment profile. Called once at import time.""" + global _config_cache + + profile = os.environ.get('RECON_PROFILE', 'home') + config_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'config', 'profiles') + config_path = os.path.join(config_dir, f'{profile}.yaml') + + if not os.path.exists(config_path): + raise FileNotFoundError( + f"Deployment profile '{profile}' not found at {config_path}. " + f"Available profiles: {', '.join(f.replace('.yaml','') for f in os.listdir(config_dir) if f.endswith('.yaml'))}" + ) + + with open(config_path, 'r') as f: + _config_cache = yaml.safe_load(f) + + logger.info(f"Loaded deployment profile: {profile} ({_config_cache.get('region_name', 'unknown')})") + return _config_cache + + +def get_deployment_config(): + """Return the cached deployment config dict.""" + if _config_cache is None: + load_deployment_config() + return _config_cache + + +# Load on import so startup fails fast if profile is missing +load_deployment_config() From 64605b38bb926f2bb57f122b248b0b3e748d9c9d Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Apr 2026 00:52:04 +0000 Subject: [PATCH 11/72] Add TomTom traffic proxy and update profiles for hillshade/traffic layers - Add /api/traffic/flow proxy route to hide TomTom API key from frontend - Add tileset_hillshade and traffic config blocks to all three profiles - Flip has_hillshade and has_traffic_overlay flags in home and regional profiles - Minimal profile has config blocks but flags remain false (dormant) Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 13 +++++++++++-- config/profiles/minimal_pi.yaml | 9 +++++++++ config/profiles/regional_pi.yaml | 11 ++++++++++- lib/api.py | 22 +++++++++++++++++++++- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index e4eb3de..49dc373 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -11,6 +11,15 @@ tileset: max_zoom: 15 attribution: "Protomaps © OSM" +tileset_hillshade: + url: "/tiles/hillshade-na.pmtiles" + encoding: "terrarium" + max_zoom: 12 + +traffic: + provider: "tomtom" + proxy_url: "/api/traffic/flow/{z}/{x}/{y}.png" + services: geocode: "/api/geocode" reverse: "/api/reverse" @@ -20,9 +29,9 @@ services: features: has_nominatim_details: false has_kiwix_wiki: false - has_hillshade: false + has_hillshade: true has_3d_terrain: false - has_traffic_overlay: false + has_traffic_overlay: true has_landclass: false has_address_book_write: false diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index 5e26b59..e4fe651 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -11,6 +11,15 @@ tileset: max_zoom: 15 attribution: "Protomaps © OSM" +tileset_hillshade: + url: "/tiles/hillshade-idaho.pmtiles" + encoding: "terrarium" + max_zoom: 12 + +traffic: + provider: "tomtom" + proxy_url: "/api/traffic/flow/{z}/{x}/{y}.png" + services: geocode: "/api/geocode" reverse: "/api/reverse" diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index e2b469c..89d56f3 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -11,6 +11,15 @@ tileset: max_zoom: 15 attribution: "Protomaps © OSM" +tileset_hillshade: + url: "/tiles/hillshade-regional.pmtiles" + encoding: "terrarium" + max_zoom: 12 + +traffic: + provider: "tomtom" + proxy_url: "/api/traffic/flow/{z}/{x}/{y}.png" + services: geocode: "/api/geocode" reverse: "/api/reverse" @@ -22,7 +31,7 @@ features: has_kiwix_wiki: false has_hillshade: true has_3d_terrain: false - has_traffic_overlay: false + has_traffic_overlay: true has_landclass: true has_address_book_write: true diff --git a/lib/api.py b/lib/api.py index 297a680..5a72889 100644 --- a/lib/api.py +++ b/lib/api.py @@ -17,7 +17,7 @@ import shutil import tempfile import requests as http_requests -from flask import Flask, request, jsonify, redirect, render_template +from flask import Flask, request, jsonify, redirect, render_template, make_response from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue from werkzeug.utils import secure_filename @@ -1166,6 +1166,26 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) + +@app.route('/api/traffic/flow///.png') +def api_traffic_flow(z, x, y): + """Proxy TomTom traffic flow tiles to hide API key from frontend.""" + key = os.environ.get('TOMTOM_API_KEY') + if not key: + return 'Traffic service not configured', 503 + url = f'https://api.tomtom.com/traffic/map/4/tile/flow/relative/{z}/{x}/{y}.png?key={key}' + try: + resp = http_requests.get(url, timeout=10) + if resp.status_code != 200: + return 'Upstream error', 502 + r = make_response(resp.content) + r.headers['Content-Type'] = 'image/png' + r.headers['Cache-Control'] = 'public, max-age=120' + return r + except Exception: + return 'Upstream timeout', 504 + + @app.route('/api/config') def api_config(): """Return deployment profile config for frontend consumption.""" From 2121ee4936232da00fe5cd32afe66708ec5ace4c Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Apr 2026 03:06:51 +0000 Subject: [PATCH 12/72] Add place detail proxy with Nominatim-first routing and Overpass fallback New /api/place// endpoint returns cleaned OSM tag data for PlaceDetail panel enrichment. Routes to local Nominatim (Idaho coverage) first, falls back to Overpass public API for out-of-region queries. Responses cached in SQLite (data/place_cache.db) with no expiry. New modules: lib/place_detail.py (proxy + cache), lib/osm_categories.py (~50 category humanization mappings). Profile YAMLs updated with place_details config block and has_nominatim_details flag. Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 7 +- config/profiles/regional_pi.yaml | 7 +- lib/api.py | 8 + lib/osm_categories.py | 143 +++++++++++ lib/place_detail.py | 411 +++++++++++++++++++++++++++++++ 5 files changed, 574 insertions(+), 2 deletions(-) create mode 100644 lib/osm_categories.py create mode 100644 lib/place_detail.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 49dc373..d894d81 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -20,6 +20,11 @@ traffic: provider: "tomtom" proxy_url: "/api/traffic/flow/{z}/{x}/{y}.png" +place_details: + local_source: "nominatim" + local_bbox: [-117.2, 41.98, -111.04, 49.0] + fallback_source: "overpass" + services: geocode: "/api/geocode" reverse: "/api/reverse" @@ -27,7 +32,7 @@ services: valhalla: "/valhalla" features: - has_nominatim_details: false + has_nominatim_details: true has_kiwix_wiki: false has_hillshade: true has_3d_terrain: false diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 89d56f3..043e9e7 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -20,6 +20,11 @@ traffic: provider: "tomtom" proxy_url: "/api/traffic/flow/{z}/{x}/{y}.png" +place_details: + local_source: "nominatim" + local_bbox: [-125.0, 40.0, -104.0, 49.0] + fallback_source: "overpass" + services: geocode: "/api/geocode" reverse: "/api/reverse" @@ -27,7 +32,7 @@ services: valhalla: "/valhalla" features: - has_nominatim_details: false + has_nominatim_details: true has_kiwix_wiki: false has_hillshade: true has_3d_terrain: false diff --git a/lib/api.py b/lib/api.py index 5a72889..476c1af 100644 --- a/lib/api.py +++ b/lib/api.py @@ -25,6 +25,7 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB from .deployment_config import get_deployment_config +from .place_detail import get_place_detail logger = setup_logging('recon.api') @@ -1186,6 +1187,13 @@ def api_traffic_flow(z, x, y): return 'Upstream timeout', 504 +@app.route('/api/place//') +def api_place_detail(osm_type, osm_id): + """Proxy place details from local Nominatim or Overpass API.""" + result, status = get_place_detail(osm_type, osm_id) + return jsonify(result), status + + @app.route('/api/config') def api_config(): """Return deployment profile config for frontend consumption.""" diff --git a/lib/osm_categories.py b/lib/osm_categories.py new file mode 100644 index 0000000..dd5217c --- /dev/null +++ b/lib/osm_categories.py @@ -0,0 +1,143 @@ +""" +Human-readable category names for OSM class/type pairs. + +Used by the place detail proxy to turn ("amenity", "cafe") into "Coffee shop". +Covers ~50 common categories; unmapped pairs fall back to title-cased class:type. +""" + +# Exact (class, type) → label +CATEGORY_MAP = { + # Amenity + ("amenity", "cafe"): "Coffee shop", + ("amenity", "restaurant"): "Restaurant", + ("amenity", "fast_food"): "Fast food restaurant", + ("amenity", "bar"): "Bar", + ("amenity", "pub"): "Pub", + ("amenity", "biergarten"): "Beer garden", + ("amenity", "ice_cream"): "Ice cream shop", + ("amenity", "fuel"): "Gas station", + ("amenity", "charging_station"): "EV charging station", + ("amenity", "parking"): "Parking", + ("amenity", "bank"): "Bank", + ("amenity", "atm"): "ATM", + ("amenity", "pharmacy"): "Pharmacy", + ("amenity", "hospital"): "Hospital", + ("amenity", "clinic"): "Clinic", + ("amenity", "dentist"): "Dentist", + ("amenity", "doctors"): "Doctor's office", + ("amenity", "veterinary"): "Veterinarian", + ("amenity", "school"): "School", + ("amenity", "university"): "University", + ("amenity", "college"): "College", + ("amenity", "library"): "Library", + ("amenity", "post_office"): "Post office", + ("amenity", "fire_station"): "Fire station", + ("amenity", "police"): "Police station", + ("amenity", "townhall"): "Town hall", + ("amenity", "place_of_worship"): "Place of worship", + ("amenity", "theatre"): "Theatre", + ("amenity", "cinema"): "Cinema", + ("amenity", "community_centre"): "Community center", + ("amenity", "toilets"): "Restrooms", + ("amenity", "drinking_water"): "Drinking water", + ("amenity", "shelter"): "Shelter", + ("amenity", "camping"): "Campground", + # Shop + ("shop", "supermarket"): "Supermarket", + ("shop", "convenience"): "Convenience store", + ("shop", "hardware"): "Hardware store", + ("shop", "clothes"): "Clothing store", + ("shop", "car_repair"): "Auto repair", + ("shop", "car"): "Car dealership", + ("shop", "bakery"): "Bakery", + ("shop", "butcher"): "Butcher", + # Leisure + ("leisure", "park"): "Park", + ("leisure", "playground"): "Playground", + ("leisure", "sports_centre"): "Sports center", + ("leisure", "swimming_pool"): "Swimming pool", + ("leisure", "golf_course"): "Golf course", + ("leisure", "nature_reserve"): "Nature reserve", + ("leisure", "campsite"): "Campsite", + # Tourism + ("tourism", "hotel"): "Hotel", + ("tourism", "motel"): "Motel", + ("tourism", "guest_house"): "Guest house", + ("tourism", "hostel"): "Hostel", + ("tourism", "camp_site"): "Campsite", + ("tourism", "viewpoint"): "Viewpoint", + ("tourism", "museum"): "Museum", + ("tourism", "information"): "Information", + ("tourism", "attraction"): "Tourist attraction", + ("tourism", "picnic_site"): "Picnic site", + # Natural + ("natural", "peak"): "Peak", + ("natural", "spring"): "Spring", + ("natural", "hot_spring"): "Hot spring", + ("natural", "lake"): "Lake", + ("natural", "water"): "Water body", + ("natural", "cliff"): "Cliff", + ("natural", "cave_entrance"): "Cave", + # Highway + ("highway", "bus_stop"): "Bus stop", + ("highway", "rest_area"): "Rest area", + # Boundary + ("boundary", "administrative"): "Administrative boundary", + ("boundary", "protected_area"): "Protected area", + ("boundary", "national_park"): "National park", + # Place + ("place", "city"): "City", + ("place", "town"): "Town", + ("place", "village"): "Village", + ("place", "hamlet"): "Hamlet", + ("place", "suburb"): "Suburb", + ("place", "neighbourhood"): "Neighborhood", + # Building + ("building", "yes"): "Building", + # Waterway + ("waterway", "river"): "River", + ("waterway", "stream"): "Stream", + ("waterway", "waterfall"): "Waterfall", + # Landuse + ("landuse", "cemetery"): "Cemetery", + ("landuse", "forest"): "Forest", + # Historic + ("historic", "monument"): "Monument", + ("historic", "memorial"): "Memorial", + ("historic", "ruins"): "Ruins", +} + +# Class-level wildcard fallbacks (when exact type isn't mapped) +CLASS_FALLBACKS = { + "shop": "Shop", + "amenity": "Amenity", + "leisure": "Leisure", + "tourism": "Tourism", + "natural": "Natural feature", + "historic": "Historic site", +} + + +def humanize_category(osm_class, osm_type): + """Return a human-readable category string for an OSM class/type pair.""" + if not osm_class or not osm_type: + return "Place" + + osm_class = osm_class.lower() + osm_type = osm_type.lower() + + # Exact match + label = CATEGORY_MAP.get((osm_class, osm_type)) + if label: + return label + + # Class-level wildcard with formatted type + prefix = CLASS_FALLBACKS.get(osm_class) + if prefix: + nice_type = osm_type.replace("_", " ").title() + return f"{prefix}: {nice_type}" if prefix != nice_type else prefix + + # Generic fallback + nice_class = osm_class.replace("_", " ").title() + nice_type = osm_type.replace("_", " ").title() + return f"{nice_class}: {nice_type}" diff --git a/lib/place_detail.py b/lib/place_detail.py new file mode 100644 index 0000000..f225a08 --- /dev/null +++ b/lib/place_detail.py @@ -0,0 +1,411 @@ +""" +Place detail proxy — local Nominatim first, Overpass API fallback, SQLite cache. + +Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict +matching the response shape for /api/place//. +""" +import json +import os +import sqlite3 +import time + +import requests as http_requests + +from .osm_categories import humanize_category +from .utils import setup_logging + +logger = setup_logging('recon.place_detail') + +NOMINATIM_URL = "http://localhost:8010/details.php" +OVERPASS_URL = "https://overpass-api.de/api/interpreter" +OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" +VALID_OSM_TYPES = {"N", "W", "R"} + +_db_conn = None + + +# ── SQLite cache ──────────────────────────────────────────────────────── + +def _get_db(): + """Return a module-level SQLite connection (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + os.makedirs(db_dir, exist_ok=True) + db_path = os.path.join(db_dir, 'place_cache.db') + + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS place_cache ( + osm_type TEXT NOT NULL, + osm_id INTEGER NOT NULL, + data TEXT NOT NULL, + source TEXT NOT NULL, + cached_at INTEGER NOT NULL, + PRIMARY KEY (osm_type, osm_id) + ) + """) + _db_conn.commit() + logger.info(f"Place cache DB ready at {db_path}") + return _db_conn + + +def cache_get(osm_type, osm_id): + """Return cached place dict or None.""" + db = _get_db() + row = db.execute( + "SELECT data FROM place_cache WHERE osm_type=? AND osm_id=?", + (osm_type, osm_id) + ).fetchone() + if row: + try: + result = json.loads(row[0]) + result['source'] = 'cache' + return result + except (json.JSONDecodeError, TypeError): + pass + return None + + +def cache_put(osm_type, osm_id, data, source): + """Store a place detail result in the cache.""" + db = _get_db() + db.execute( + "INSERT OR REPLACE INTO place_cache (osm_type, osm_id, data, source, cached_at) " + "VALUES (?, ?, ?, ?, ?)", + (osm_type, osm_id, json.dumps(data), source, int(time.time())) + ) + db.commit() + + +# ── Nominatim parsing ─────────────────────────────────────────────────── + +# Nominatim address array uses rank_address to indicate what each entry is. +# We map rank ranges to our flat address fields. +RANK_TO_FIELD = { + 4: 'country', + 5: 'postcode', + 6: 'state', # rank 6 = county in US, but we try name matching + 8: 'state', + 12: 'county', + 16: 'city', + 20: 'neighbourhood', + 22: 'neighbourhood', + 26: 'road', + 28: 'house_number', +} + + +def _parse_nominatim_address(address_array, country_code=None): + """Parse Nominatim's ranked address array into a flat address dict.""" + addr = { + 'house_number': None, + 'road': None, + 'neighbourhood': None, + 'city': None, + 'county': None, + 'state': None, + 'postcode': None, + 'country': None, + 'country_code': country_code, + } + + if not address_array: + return addr + + for entry in address_array: + if not entry.get('isaddress', False): + continue + + name = entry.get('localname', '') + rank = entry.get('rank_address', 0) + etype = entry.get('type', '') + eclass = entry.get('class', '') + + # Explicit type-based assignments (more reliable than rank alone) + if etype == 'country' and eclass == 'place': + addr['country'] = name + elif etype == 'state' or (eclass == 'boundary' and etype == 'administrative' and rank == 8): + if not addr['state']: + addr['state'] = name + elif etype == 'county' or (eclass == 'boundary' and etype == 'administrative' and rank in (10, 12)): + if not addr['county']: + addr['county'] = name + elif etype in ('city', 'town', 'village', 'hamlet') and eclass == 'place': + if not addr['city']: + addr['city'] = name + elif eclass == 'boundary' and etype == 'administrative' and rank == 16: + # City-level admin boundary (common in US) + if not addr['city']: + addr['city'] = name + elif etype == 'postcode': + addr['postcode'] = name + elif eclass == 'highway' or rank == 26: + if not addr['road']: + addr['road'] = name + elif etype == 'house_number' or rank == 28: + addr['house_number'] = name + elif rank in (20, 22) and not addr['neighbourhood']: + addr['neighbourhood'] = name + + # Remove county from output (not in spec) + addr.pop('county', None) + + return addr + + +def _parse_nominatim(data): + """Parse a Nominatim /details response into our canonical shape.""" + osm_type = data.get('osm_type', '') + osm_id = data.get('osm_id', 0) + osm_class = data.get('category', '') + osm_type_tag = data.get('type', '') + + # Centroid + centroid_geom = data.get('centroid', {}) + coords = centroid_geom.get('coordinates', [0, 0]) + centroid = {'lat': coords[1], 'lon': coords[0]} if len(coords) >= 2 else {'lat': 0, 'lon': 0} + + # Names + names = data.get('names', {}) + display_name = data.get('localname', '') or names.get('name', '') + + # Address + address = _parse_nominatim_address( + data.get('address', []), + country_code=data.get('country_code') + ) + + # Use calculated_postcode if address parse didn't find one + if not address.get('postcode') and data.get('calculated_postcode'): + address['postcode'] = data['calculated_postcode'] + + # Extratags + raw_extra = data.get('extratags', {}) + extratags = { + 'opening_hours': raw_extra.get('opening_hours'), + 'phone': raw_extra.get('phone') or raw_extra.get('contact:phone'), + 'website': raw_extra.get('website') or raw_extra.get('contact:website') or raw_extra.get('url'), + 'email': raw_extra.get('email') or raw_extra.get('contact:email'), + 'wikipedia': raw_extra.get('wikipedia'), + 'wikidata': raw_extra.get('wikidata'), + 'cuisine': raw_extra.get('cuisine'), + 'operator': raw_extra.get('operator'), + 'wheelchair': raw_extra.get('wheelchair'), + 'fee': raw_extra.get('fee'), + 'takeaway': raw_extra.get('takeaway'), + } + + # Category: use extratags.place for boundaries (e.g. "city"), else class/type + effective_class = osm_class + effective_type = osm_type_tag + if osm_class == 'boundary' and osm_type_tag == 'administrative': + place_tag = raw_extra.get('place') or raw_extra.get('linked_place') + if place_tag: + effective_class = 'place' + effective_type = place_tag + + category = humanize_category(effective_class, effective_type) + + # Filter names: only include extra name tags, not the bare "name" + extra_names = {k: v for k, v in names.items() if k != 'name'} if names else {} + + return { + 'osm_type': osm_type, + 'osm_id': osm_id, + 'name': display_name, + 'category': category, + 'class': osm_class, + 'type': osm_type_tag, + 'address': address, + 'centroid': centroid, + 'extratags': extratags, + 'names': extra_names if extra_names else None, + 'source': 'nominatim_local', + } + + +# ── Overpass parsing ──────────────────────────────────────────────────── + +OVERPASS_TYPE_MAP = {'N': 'node', 'W': 'way', 'R': 'relation'} + + +def _build_overpass_query(osm_type, osm_id): + """Build an Overpass QL query for a single element.""" + elem = OVERPASS_TYPE_MAP.get(osm_type) + if not elem: + return None + return f"[out:json][timeout:10];{elem}({osm_id});out tags center;" + + +def _parse_overpass(data, osm_type, osm_id): + """Parse an Overpass API response into our canonical shape.""" + elements = data.get('elements', []) + if not elements: + return None + + elem = elements[0] + tags = elem.get('tags', {}) + + # Centroid: Overpass returns lat/lon for nodes, center for ways/relations + lat = elem.get('lat') or (elem.get('center', {}).get('lat')) + lon = elem.get('lon') or (elem.get('center', {}).get('lon')) + centroid = {'lat': lat, 'lon': lon} if lat and lon else {'lat': 0, 'lon': 0} + + # Determine class/type from tags — Overpass doesn't have a canonical class field + # Use the first recognized class tag + osm_class = '' + osm_type_tag = '' + for cls in ('amenity', 'shop', 'leisure', 'tourism', 'natural', 'highway', + 'boundary', 'place', 'building', 'waterway', 'landuse', 'historic'): + if cls in tags: + osm_class = cls + osm_type_tag = tags[cls] + break + + category = humanize_category(osm_class, osm_type_tag) + + # Address from addr:* tags + address = { + 'house_number': tags.get('addr:housenumber'), + 'road': tags.get('addr:street'), + 'neighbourhood': tags.get('addr:suburb') or tags.get('addr:neighbourhood'), + 'city': tags.get('addr:city'), + 'state': tags.get('addr:state'), + 'postcode': tags.get('addr:postcode'), + 'country': tags.get('addr:country'), + 'country_code': tags.get('addr:country_code', + tags.get('addr:country', '')).lower()[:2] or None, + } + + # Extratags + extratags = { + 'opening_hours': tags.get('opening_hours'), + 'phone': tags.get('phone') or tags.get('contact:phone'), + 'website': tags.get('website') or tags.get('contact:website') or tags.get('url'), + 'email': tags.get('email') or tags.get('contact:email'), + 'wikipedia': tags.get('wikipedia'), + 'wikidata': tags.get('wikidata'), + 'cuisine': tags.get('cuisine'), + 'operator': tags.get('operator'), + 'wheelchair': tags.get('wheelchair'), + 'fee': tags.get('fee'), + 'takeaway': tags.get('takeaway'), + } + + # Names + name = tags.get('name', '') + extra_names = {} + for k, v in tags.items(): + if k.startswith('name:') or k in ('alt_name', 'old_name', 'short_name', 'official_name'): + extra_names[k] = v + + return { + 'osm_type': osm_type, + 'osm_id': osm_id, + 'name': name, + 'category': category, + 'class': osm_class, + 'type': osm_type_tag, + 'address': address, + 'centroid': centroid, + 'extratags': extratags, + 'names': extra_names if extra_names else None, + 'source': 'overpass', + } + + +# ── Public API ────────────────────────────────────────────────────────── + +def get_place_detail(osm_type, osm_id): + """ + Fetch place details for an OSM element. + + Returns (dict, status_code): + - (data, 200) on success + - (error_dict, 404) if not found in any source + - (error_dict, 502) if both sources error + """ + osm_type = osm_type.upper() + if osm_type not in VALID_OSM_TYPES: + return {'error': f'Invalid osm_type: {osm_type}. Must be N, W, or R.'}, 400 + + if osm_id <= 0: + return {'error': 'osm_id must be a positive integer'}, 400 + + # 1. Check cache + cached = cache_get(osm_type, osm_id) + if cached: + logger.debug(f"Cache hit: {osm_type}/{osm_id}") + return cached, 200 + + # 2. Try local Nominatim first + nominatim_result = None + nominatim_error = None + try: + resp = http_requests.get(NOMINATIM_URL, params={ + 'osmtype': osm_type, + 'osmid': osm_id, + 'format': 'json', + 'addressdetails': 1, + 'hierarchy': 0, + 'keywords': 0, + }, timeout=5) + + if resp.status_code == 200: + data = resp.json() + # Nominatim returns a result even for IDs not in its DB, + # but they'll have empty/minimal data. Check for osm_id match. + if data.get('osm_id') == osm_id: + nominatim_result = _parse_nominatim(data) + logger.debug(f"Nominatim hit: {osm_type}/{osm_id}") + except Exception as e: + nominatim_error = str(e) + logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}") + + if nominatim_result: + cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') + return nominatim_result, 200 + + # 3. Fallback to Overpass + overpass_result = None + overpass_error = None + try: + query = _build_overpass_query(osm_type, osm_id) + if query: + resp = http_requests.post( + OVERPASS_URL, + data={'data': query}, + headers={'User-Agent': OVERPASS_UA}, + timeout=10, + ) + if resp.status_code == 200: + data = resp.json() + overpass_result = _parse_overpass(data, osm_type, osm_id) + if overpass_result: + logger.debug(f"Overpass hit: {osm_type}/{osm_id}") + elif resp.status_code == 429: + overpass_error = "Overpass rate limited" + logger.warning(f"Overpass 429 for {osm_type}/{osm_id}") + else: + overpass_error = f"Overpass HTTP {resp.status_code}" + except Exception as e: + overpass_error = str(e) + logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}") + + if overpass_result: + cache_put(osm_type, osm_id, overpass_result, 'overpass') + return overpass_result, 200 + + # 4. Both failed + if nominatim_error and overpass_error: + logger.error(f"Both sources failed for {osm_type}/{osm_id}: " + f"Nominatim={nominatim_error}, Overpass={overpass_error}") + return {'error': 'Both data sources unavailable'}, 502 + + # Not found in either source (no errors, just empty results) + return {'error': f'{osm_type}/{osm_id} not found'}, 404 From 65693d15aaa7e9b47566c9d5fcf6476cc4b7749a Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Apr 2026 16:51:25 +0000 Subject: [PATCH 13/72] Add Overture Maps POI enrichment layer for place details Ingests 20.9M North America places from Overture Maps Foundation (release 2026-04-15.0) into PostgreSQL. Enriches /api/place responses with phone, website, and brand data via spatial + fuzzy name matching when OSM extratags are sparse. Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 1 + config/profiles/minimal_pi.yaml | 1 + config/profiles/regional_pi.yaml | 1 + lib/overture.py | 170 +++++++++++++++ lib/place_detail.py | 74 +++++++ scripts/overture_import.py | 350 +++++++++++++++++++++++++++++++ 6 files changed, 597 insertions(+) create mode 100644 lib/overture.py create mode 100644 scripts/overture_import.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index d894d81..99430a8 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -39,6 +39,7 @@ features: has_traffic_overlay: true has_landclass: false has_address_book_write: false + has_overture_enrichment: true defaults: center: [42.5736, -114.6066] diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index e4fe651..07a61d5 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -34,6 +34,7 @@ features: has_traffic_overlay: false has_landclass: false has_address_book_write: true + has_overture_enrichment: false defaults: center: [44.0, -114.0] diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 043e9e7..291ee81 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -39,6 +39,7 @@ features: has_traffic_overlay: true has_landclass: true has_address_book_write: true + has_overture_enrichment: false defaults: center: [44.0, -114.0] diff --git a/lib/overture.py b/lib/overture.py new file mode 100644 index 0000000..fcbdd18 --- /dev/null +++ b/lib/overture.py @@ -0,0 +1,170 @@ +""" +Overture Maps enrichment layer. + +Provides lookup functions against the local PostgreSQL Overture Places database. +Two strategies: + 1. find_by_osm_id — exact match via OSM cross-reference index + 2. find_by_coords_and_name — spatial + fuzzy name fallback + +Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, +functions return None gracefully (feature degrades, doesn't crash). +""" +import json +import os + +import psycopg2 +import psycopg2.pool + +from .utils import setup_logging + +logger = setup_logging('recon.overture') + +_pool = None +_pool_failed = False + +# Map full OSM type names to single-letter codes used in Overture sources +OSM_TYPE_MAP = { + 'N': 'n', 'W': 'w', 'R': 'r', + 'node': 'n', 'way': 'w', 'relation': 'r', + 'n': 'n', 'w': 'w', 'r': 'r', +} + + +def _get_pool(): + """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" + global _pool, _pool_failed + if _pool is not None: + return _pool + if _pool_failed: + return None + + try: + _pool = psycopg2.pool.SimpleConnectionPool( + minconn=1, + maxconn=3, + host=os.environ.get('OVERTURE_DB_HOST', 'localhost'), + port=int(os.environ.get('OVERTURE_DB_PORT', '5432')), + dbname=os.environ.get('OVERTURE_DB_NAME', 'overture'), + user=os.environ.get('OVERTURE_DB_USER', 'overture'), + password=os.environ.get('OVERTURE_DB_PASSWORD', ''), + connect_timeout=5, + ) + logger.info("Overture PostgreSQL connection pool initialized") + return _pool + except Exception as e: + _pool_failed = True + logger.warning(f"Overture PostgreSQL unavailable, enrichment disabled: {e}") + return None + + +def _query(sql, params): + """Execute a query and return the first row as a dict, or None.""" + pool = _get_pool() + if pool is None: + return None + + conn = None + try: + conn = pool.getconn() + with conn.cursor() as cur: + cur.execute(sql, params) + row = cur.fetchone() + if row is None: + return None + cols = [desc[0] for desc in cur.description] + return dict(zip(cols, row)) + except Exception as e: + logger.warning(f"Overture query error: {e}") + if conn: + try: + conn.rollback() + except Exception: + pass + return None + finally: + if conn: + try: + pool.putconn(conn) + except Exception: + pass + + +def _format_result(row, match_method): + """Convert a database row dict to the enrichment result shape.""" + if not row: + return None + + socials = row.get('socials') + if isinstance(socials, str): + try: + socials = json.loads(socials) + except (json.JSONDecodeError, TypeError): + socials = None + + return { + 'phone': row.get('phone'), + 'website': row.get('website'), + 'socials': socials, + 'brand_name': row.get('brand_name'), + 'brand_wikidata': row.get('brand_wikidata'), + 'basic_category': row.get('basic_category'), + 'confidence': row.get('confidence'), + 'gers_id': row.get('id'), + 'match_method': match_method, + } + + +def find_by_osm_id(osm_type, osm_id): + """ + Look up an Overture place by its OSM cross-reference. + + Args: + osm_type: OSM type — 'N', 'W', 'R', 'node', 'way', 'relation', or single letter + osm_id: OSM numeric ID + + Returns: + Enrichment dict or None + """ + type_letter = OSM_TYPE_MAP.get(osm_type) + if not type_letter: + return None + + row = _query( + """SELECT id, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata + FROM places + WHERE osm_type = %s AND osm_id = %s + LIMIT 1""", + (type_letter, int(osm_id)) + ) + return _format_result(row, 'osm_xref') + + +def find_by_coords_and_name(lat, lon, name, radius_m=100): + """ + Look up an Overture place by spatial proximity + fuzzy name match. + + Args: + lat: Latitude + lon: Longitude + name: Place name to fuzzy-match + radius_m: Search radius in meters (default 100) + + Returns: + Enrichment dict or None + """ + if not name or not lat or not lon: + return None + + row = _query( + """SELECT id, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata, + similarity(name, %s) AS sim + FROM places + WHERE ST_DWithin(geometry::geography, ST_MakePoint(%s, %s)::geography, %s) + AND similarity(name, %s) > 0.4 + ORDER BY sim DESC, ST_Distance(geometry::geography, ST_MakePoint(%s, %s)::geography) ASC + LIMIT 1""", + (name, lon, lat, radius_m, name, lon, lat) + ) + return _format_result(row, 'coord_name_fuzzy') diff --git a/lib/place_detail.py b/lib/place_detail.py index f225a08..8ca2781 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -1,5 +1,6 @@ """ Place detail proxy — local Nominatim first, Overpass API fallback, SQLite cache. +Overture Maps enrichment layer fills sparse extratags (phone, website, brand). Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict matching the response shape for /api/place//. @@ -82,6 +83,77 @@ def cache_put(osm_type, osm_id, data, source): db.commit() +# ── Overture enrichment ───────────────────────────────────────────────── + +def _enrich_with_overture(result, osm_type, osm_id): + """ + Attempt to enrich a place result with Overture Maps data. + Fills sparse extratags (phone, website, brand) without overwriting existing values. + Returns the (possibly enriched) result dict. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_overture_enrichment', False): + return result + except Exception: + return result + + try: + from .overture import find_by_osm_id, find_by_coords_and_name + except ImportError: + logger.debug("Overture module not available") + return result + + enrichment = None + match_method = None + + # Strategy 1: OSM cross-reference (exact) + enrichment = find_by_osm_id(osm_type, osm_id) + if enrichment: + match_method = 'osm_xref' + + # Strategy 2: Coordinate + name fuzzy (fallback) + if not enrichment and result.get('centroid') and result.get('name'): + centroid = result['centroid'] + if centroid.get('lat') and centroid.get('lon'): + enrichment = find_by_coords_and_name( + centroid['lat'], centroid['lon'], result['name'] + ) + if enrichment: + match_method = 'coord_name_fuzzy' + + if not enrichment: + return result + + # Fill sparse extratags (never overwrite existing non-null values) + extratags = result.get('extratags', {}) + fill_map = [ + ('phone', 'phone'), + ('website', 'website'), + ('brand', 'brand_name'), + ('brand:wikidata', 'brand_wikidata'), + ] + for osm_key, overture_key in fill_map: + if not extratags.get(osm_key) and enrichment.get(overture_key): + extratags[osm_key] = enrichment[overture_key] + result['extratags'] = extratags + + # Add source metadata + result['sources'] = { + 'primary': result.get('source', 'unknown'), + 'enrichment': 'overture', + 'overture_match_method': match_method, + 'overture_gers_id': enrichment.get('gers_id'), + 'overture_confidence': enrichment.get('confidence'), + 'overture_basic_category': enrichment.get('basic_category'), + } + + logger.debug(f"Overture enrichment for {osm_type}/{osm_id}: {match_method}") + return result + + # ── Nominatim parsing ─────────────────────────────────────────────────── # Nominatim address array uses rank_address to indicate what each entry is. @@ -368,6 +440,7 @@ def get_place_detail(osm_type, osm_id): logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}") if nominatim_result: + nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -398,6 +471,7 @@ def get_place_detail(osm_type, osm_id): logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}") if overpass_result: + overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200 diff --git a/scripts/overture_import.py b/scripts/overture_import.py new file mode 100644 index 0000000..0b6ba67 --- /dev/null +++ b/scripts/overture_import.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +"""Overture Maps Places → PostgreSQL import script (v2). + +Downloads Overture Places Parquet from S3 via DuckDB (public bucket, no credentials), +filters to North America bounding box, and inserts into local PostgreSQL with PostGIS. + +Usage: + cd /opt/recon && venv/bin/python scripts/overture_import.py + +Re-runnable (idempotent via UPSERT). +""" + +import json +import logging +import os +import re +import sys +import time + +import duckdb +import psycopg2 +import psycopg2.extras + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%H:%M:%S' +) +log = logging.getLogger('overture_import') + +# --- Config --- +OVERTURE_RELEASE = '2026-04-15.0' +S3_PATH = f's3://overturemaps-us-west-2/release/{OVERTURE_RELEASE}/theme=places/type=place/*' + +# North America bounding box (generous — includes Hawaii, Puerto Rico, Canada) +BBOX = { + 'xmin': -170.0, + 'xmax': -50.0, + 'ymin': 15.0, + 'ymax': 85.0, +} + +BATCH_SIZE = 50_000 +OSM_RECORD_RE = re.compile(r'^([nwr])(\d+)@\d+$') + +DB_CONFIG = { + 'host': os.environ.get('OVERTURE_DB_HOST', 'localhost'), + 'port': int(os.environ.get('OVERTURE_DB_PORT', '5432')), + 'dbname': os.environ.get('OVERTURE_DB_NAME', 'overture'), + 'user': os.environ.get('OVERTURE_DB_USER', 'overture'), + 'password': os.environ.get('OVERTURE_DB_PASSWORD', ''), +} + + +def create_table(conn): + """Create places table and indexes if they don't exist.""" + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS places ( + id TEXT PRIMARY KEY, + geometry GEOMETRY(Point, 4326), + name TEXT, + basic_category TEXT, + confidence REAL, + phone TEXT, + website TEXT, + socials JSONB, + brand_name TEXT, + brand_wikidata TEXT, + osm_type CHAR(1), + osm_id BIGINT, + source_record_id TEXT, + raw_sources JSONB + ); + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_osm + ON places(osm_type, osm_id) WHERE osm_type IS NOT NULL; + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_geom + ON places USING GIST(geometry); + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_name_trgm + ON places USING GIN(name gin_trgm_ops); + """) + conn.commit() + log.info('Table and indexes ready') + + +def parse_osm_ref(sources): + """Extract OSM type letter and ID from Overture sources array.""" + if not sources: + return None, None, None + for src in sources: + record_id = None + if isinstance(src, dict): + record_id = src.get('record_id', '') + elif hasattr(src, '__getitem__'): + # DuckDB struct — try attribute access + try: + record_id = src['record_id'] + except (KeyError, TypeError, IndexError): + pass + if not record_id: + continue + m = OSM_RECORD_RE.match(str(record_id)) + if m: + return m.group(1), int(m.group(2)), str(record_id) + return None, None, None + + +def run_import(): + """Main import: DuckDB reads S3 Parquet → PostgreSQL via chunked OFFSET/LIMIT.""" + log.info(f'Overture release: {OVERTURE_RELEASE}') + log.info(f'S3 path: {S3_PATH}') + log.info(f'Bounding box: {BBOX}') + + # Connect to PostgreSQL + conn = psycopg2.connect(**DB_CONFIG) + conn.autocommit = False + create_table(conn) + + # Set up DuckDB with httpfs and spatial for S3 access + duck = duckdb.connect() + duck.execute("INSTALL httpfs; LOAD httpfs;") + duck.execute("INSTALL spatial; LOAD spatial;") + duck.execute("SET s3_region='us-west-2';") + + # Use a materialized approach: DuckDB query → Arrow → iterate in Python + query = f""" + SELECT + id, + ST_X(geometry) AS lon, + ST_Y(geometry) AS lat, + names.primary AS name, + basic_category, + confidence, + phones, + websites, + socials, + brand, + sources + FROM read_parquet('{S3_PATH}', hive_partitioning=true) + WHERE bbox.xmin >= {BBOX['xmin']} + AND bbox.xmax <= {BBOX['xmax']} + AND bbox.ymin >= {BBOX['ymin']} + AND bbox.ymax <= {BBOX['ymax']} + """ + + log.info('Starting DuckDB query against S3 (this will take several minutes)...') + t_start = time.time() + + # Execute and fetch all as Arrow for efficient iteration + result_rel = duck.sql(query) + + upsert_sql = """ + INSERT INTO places (id, geometry, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata, + osm_type, osm_id, source_record_id, raw_sources) + VALUES %s + ON CONFLICT (id) DO UPDATE SET + geometry = EXCLUDED.geometry, + name = EXCLUDED.name, + basic_category = EXCLUDED.basic_category, + confidence = EXCLUDED.confidence, + phone = EXCLUDED.phone, + website = EXCLUDED.website, + socials = EXCLUDED.socials, + brand_name = EXCLUDED.brand_name, + brand_wikidata = EXCLUDED.brand_wikidata, + osm_type = EXCLUDED.osm_type, + osm_id = EXCLUDED.osm_id, + source_record_id = EXCLUDED.source_record_id, + raw_sources = EXCLUDED.raw_sources + """ + + template = """( + %(id)s, + ST_SetSRID(ST_MakePoint(%(lon)s, %(lat)s), 4326), + %(name)s, + %(basic_category)s, + %(confidence)s, + %(phone)s, + %(website)s, + %(socials)s::jsonb, + %(brand_name)s, + %(brand_wikidata)s, + %(osm_type)s, + %(osm_id)s, + %(source_record_id)s, + %(raw_sources)s::jsonb + )""" + + total = 0 + osm_refs = 0 + batch = [] + + log.info('DuckDB query executing, fetching results in chunks...') + + # Fetch in chunks using fetchmany on the relation + chunk_size = BATCH_SIZE + while True: + chunk = result_rel.fetchmany(chunk_size) + if not chunk: + break + + for row in chunk: + row_id = row[0] + lon = row[1] + lat = row[2] + name = row[3] + basic_cat = row[4] + conf = row[5] + phones = row[6] + websites = row[7] + socials_raw = row[8] + brand_raw = row[9] + sources_raw = row[10] + + if lon is None or lat is None: + continue + + # Phone: first element of VARCHAR[] + phone = None + if phones and len(phones) > 0: + phone = str(phones[0]) if phones[0] else None + + # Website: first element of VARCHAR[] + website = None + if websites and len(websites) > 0: + website = str(websites[0]) if websites[0] else None + + # Socials: VARCHAR[] → JSON array of strings + socials_json = None + if socials_raw and len(socials_raw) > 0: + socials_json = json.dumps([str(s) for s in socials_raw if s]) + + # Brand: struct with wikidata and names.primary + brand_name = None + brand_wikidata = None + if brand_raw: + try: + if isinstance(brand_raw, dict): + brand_wikidata = brand_raw.get('wikidata') + names_struct = brand_raw.get('names') + if names_struct and isinstance(names_struct, dict): + brand_name = names_struct.get('primary') + else: + # DuckDB struct — access by key + brand_wikidata = brand_raw['wikidata'] if 'wikidata' in dir(brand_raw) else None + try: + brand_wikidata = brand_raw[0] # wikidata is first field + names_struct = brand_raw[1] # names is second field + if names_struct: + brand_name = names_struct[0] # primary is first field + except (IndexError, TypeError): + pass + except Exception: + pass + + # Sources: parse OSM cross-reference + sources_list = None + if sources_raw: + if isinstance(sources_raw, (list, tuple)): + sources_list = [] + for s in sources_raw: + if isinstance(s, dict): + sources_list.append(s) + else: + # DuckDB struct tuple — convert + try: + sources_list.append({ + 'dataset': s[1] if len(s) > 1 else None, + 'record_id': s[3] if len(s) > 3 else None, + }) + except (TypeError, IndexError): + pass + + osm_type_letter, osm_id_val, source_record_id = parse_osm_ref(sources_list) + if osm_type_letter: + osm_refs += 1 + + raw_sources_json = json.dumps(sources_list) if sources_list else None + + batch.append({ + 'id': row_id, + 'lon': float(lon), + 'lat': float(lat), + 'name': name, + 'basic_category': basic_cat, + 'confidence': float(conf) if conf is not None else None, + 'phone': phone, + 'website': website, + 'socials': socials_json, + 'brand_name': brand_name, + 'brand_wikidata': brand_wikidata, + 'osm_type': osm_type_letter, + 'osm_id': osm_id_val, + 'source_record_id': source_record_id, + 'raw_sources': raw_sources_json, + }) + + if len(batch) >= BATCH_SIZE: + with conn.cursor() as cur: + psycopg2.extras.execute_values( + cur, upsert_sql, batch, + template=template, + page_size=BATCH_SIZE + ) + conn.commit() + total += len(batch) + elapsed = time.time() - t_start + rate = total / elapsed if elapsed > 0 else 0 + log.info(f'Inserted {total:,} rows ({osm_refs:,} OSM xrefs) ' + f'[{rate:.0f} rows/sec, {elapsed:.0f}s elapsed]') + batch = [] + + # Flush remaining + if batch: + with conn.cursor() as cur: + psycopg2.extras.execute_values( + cur, upsert_sql, batch, + template=template, + page_size=BATCH_SIZE + ) + conn.commit() + total += len(batch) + + duck.close() + + # Final stats + elapsed = time.time() - t_start + log.info(f'Import complete: {total:,} rows, {osm_refs:,} OSM cross-refs, ' + f'{elapsed:.0f}s total ({total/elapsed:.0f} rows/sec)') + + # Verify + with conn.cursor() as cur: + cur.execute("SELECT count(*) FROM places") + count = cur.fetchone()[0] + cur.execute("SELECT count(*) FROM places WHERE osm_type IS NOT NULL") + osm_count = cur.fetchone()[0] + log.info(f'Final table: {count:,} total rows, {osm_count:,} with OSM cross-references') + + conn.close() + + +if __name__ == '__main__': + run_import() From d460f0e202812be332b7ae36bf268e2922fd52d7 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Apr 2026 19:08:04 +0000 Subject: [PATCH 14/72] Fix type classifier: POI check takes precedence over street_address Businesses with housenumbers (e.g. M&W Markets at 130 US-30) were classified as street_address because the housenumber check fired before the osm_key check. Reorder so osm_key in amenity/shop/tourism/leisure/office is evaluated first, ensuring businesses get type=poi regardless of whether they have a street address. Also adds office to the POI key set. Co-Authored-By: Claude Opus 4.6 --- lib/geocode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/geocode.py b/lib/geocode.py index 21a2403..bb9c97f 100644 --- a/lib/geocode.py +++ b/lib/geocode.py @@ -325,12 +325,12 @@ def _parse_photon_features(features, source): feat_type = props.get('type', '') has_hn = bool(props.get('housenumber')) - if has_hn or osm_value in ('house', 'residential'): + if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office'): + rtype = 'poi' + elif has_hn or osm_value in ('house', 'residential'): rtype = 'street_address' elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): rtype = 'locality' - elif osm_key in ('amenity', 'shop', 'tourism', 'leisure'): - rtype = 'poi' else: rtype = 'poi' From 620f99c762fc0fa303dc0e38bc465270ef85bff1 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Apr 2026 19:39:37 +0000 Subject: [PATCH 15/72] Add business_intent_poi_boost reranker signal When a query contains no road-type keywords (st, blvd, ave, etc.), boost amenity/shop/tourism/leisure/office/craft results (+3.0) and penalize highway/route results (-4.0). This fixes searches like "starbucks twin falls" where a named service road outranked the actual business POI due to Photon position tiebreaking. Also fixes: - Intent classifier now recognizes full state names ("idaho" not just "ID") for LOCALITY classification - Locality-type Photon results now populate _city from name field so they participate in locality_fuzz scoring - Trace logging expanded to all candidates with osm_key/value Co-Authored-By: Claude Opus 4.6 --- lib/geocode.py | 78 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/lib/geocode.py b/lib/geocode.py index bb9c97f..22acdf9 100644 --- a/lib/geocode.py +++ b/lib/geocode.py @@ -51,6 +51,8 @@ W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses W_LAYER_RANK = 1.0 # type-appropriate results ranked higher W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position) W_STATE_EXACT = 1.0 # exact state code match +W_POI_CLASS_BOOST = 3.0 # amenity/shop/etc boost for business-name queries +W_HIGHWAY_CLASS_PENALTY = -4.0 # highway/route penalty for business-name queries # ── US abbreviation expansions ── # Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals. @@ -66,6 +68,13 @@ _DIRECTIONAL_ABBREVS = { } _ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE) +# ── Road keywords (for detecting when query is about a road vs a business) ── +_ROAD_KEYWORDS = ( + set(_STREET_TYPE_ABBREVS.keys()) + | set(_STREET_TYPE_ABBREVS.values()) + | {'route', 'rte', 'pass'} +) + # ── US state codes ── _STATE_CODES = { 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', @@ -75,6 +84,24 @@ _STATE_CODES = { 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC', } +# ── Full state name → code (for intent classifier) ── +_STATE_NAME_TO_CODE = { + 'alabama': 'AL', 'alaska': 'AK', 'arizona': 'AZ', 'arkansas': 'AR', + 'california': 'CA', 'colorado': 'CO', 'connecticut': 'CT', 'delaware': 'DE', + 'florida': 'FL', 'georgia': 'GA', 'hawaii': 'HI', 'idaho': 'ID', + 'illinois': 'IL', 'indiana': 'IN', 'iowa': 'IA', 'kansas': 'KS', + 'kentucky': 'KY', 'louisiana': 'LA', 'maine': 'ME', 'maryland': 'MD', + 'massachusetts': 'MA', 'michigan': 'MI', 'minnesota': 'MN', + 'mississippi': 'MS', 'missouri': 'MO', 'montana': 'MT', 'nebraska': 'NE', + 'nevada': 'NV', 'new hampshire': 'NH', 'new jersey': 'NJ', + 'new mexico': 'NM', 'new york': 'NY', 'north carolina': 'NC', + 'north dakota': 'ND', 'ohio': 'OH', 'oklahoma': 'OK', 'oregon': 'OR', + 'pennsylvania': 'PA', 'rhode island': 'RI', 'south carolina': 'SC', + 'south dakota': 'SD', 'tennessee': 'TN', 'texas': 'TX', 'utah': 'UT', + 'vermont': 'VT', 'virginia': 'VA', 'washington': 'WA', + 'west virginia': 'WV', 'wisconsin': 'WI', 'wyoming': 'WY', +} + # Coordinate regex _COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$') @@ -208,12 +235,26 @@ def _classify_and_parse(query): elif zipcode and not number and not street_name: return 'POSTCODE', parsed elif addr_type == 'Ambiguous': - # Check if it looks like a locality: 2 tokens, second is a state code + # Check if it looks like a locality: last token(s) are a state code or name tokens = q.replace(',', ' ').split() - if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: - parsed['city'] = ' '.join(tokens[:-1]) - parsed['state'] = tokens[-1].upper() - return 'LOCALITY', parsed + if len(tokens) >= 2: + last_upper = tokens[-1].upper() + if last_upper in _STATE_CODES: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = last_upper + return 'LOCALITY', parsed + # Check full state names (single-word like "idaho" or two-word like "new york") + last_lower = tokens[-1].lower() + if last_lower in _STATE_NAME_TO_CODE: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = _STATE_NAME_TO_CODE[last_lower] + return 'LOCALITY', parsed + if len(tokens) >= 3: + two_word = f"{tokens[-2].lower()} {last_lower}" + if two_word in _STATE_NAME_TO_CODE: + parsed['city'] = ' '.join(tokens[:-2]) + parsed['state'] = _STATE_NAME_TO_CODE[two_word] + return 'LOCALITY', parsed return 'UNKNOWN', parsed else: return 'UNKNOWN', parsed @@ -363,7 +404,8 @@ def _parse_photon_features(features, source): '_photon_rank': i, '_number': props.get('housenumber', ''), '_street': props.get('street', ''), - '_city': props.get('city', ''), + # For locality results, the name IS the city (Photon omits 'city' on city-type features) + '_city': props.get('city', '') or (props.get('name', '') if rtype == 'locality' else ''), '_state': props.get('state', ''), }) return results @@ -476,6 +518,21 @@ def _score_candidate(candidate, parsed, intent): signals['photon_position'] = round(score, 2) total += score + # ── Business intent POI boost ── + # When the query has no road keywords (likely a business/POI search), + # boost amenity/shop/etc results and penalize highway/route results. + # Skipped for LOCALITY, POSTCODE, COORD queries where class is irrelevant. + if intent not in ('LOCALITY', 'POSTCODE', 'COORD'): + q_tokens_lower = set(parsed.get('raw_query', '').lower().replace(',', ' ').split()) + if not (q_tokens_lower & _ROAD_KEYWORDS): + osm_key = (candidate.get('raw') or {}).get('osm_key', '') + if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'): + signals['poi_class_boost'] = W_POI_CLASS_BOOST + total += W_POI_CLASS_BOOST + elif osm_key in ('highway', 'route'): + signals['highway_class_penalty'] = W_HIGHWAY_CLASS_PENALTY + total += W_HIGHWAY_CLASS_PENALTY + return round(total, 2), signals @@ -526,10 +583,13 @@ def _rerank(candidates, parsed, intent, query, limit): # Trace log for audit _trace_logger.debug("─── Query: %r intent=%s ───", query, intent) - for i, c in enumerate(scored[:3]): + for i, c in enumerate(scored): + osm_key = (c.get('raw') or {}).get('osm_key', '—') + osm_val = (c.get('raw') or {}).get('osm_value', '—') _trace_logger.debug( - " #%d score=%.2f src=%s name=%s", - i, c['_score'], c.get('source', '?'), c.get('name', '?')[:60] + " #%d score=%.2f src=%s key=%s/%s name=%s", + i, c['_score'], c.get('source', '?'), osm_key, osm_val, + c.get('name', '?')[:60] ) _trace_logger.debug(" signals=%s", c.get('_signals', {})) From 095bf8c2af35c1d4d0dabb7dd1492e808f6cf485 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 22 Apr 2026 04:08:12 +0000 Subject: [PATCH 16/72] Add Google Places (New) tertiary enrichment for business POIs Fills opening_hours, phone, and website gaps when OSM + Overture data is incomplete. Only fires for business-class POIs (amenity, shop, tourism, leisure, office, craft). Daily API call cap with SQLite tracking. cache_put now preserves google columns across cache refreshes. Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 3 +- config/profiles/minimal_pi.yaml | 1 + config/profiles/regional_pi.yaml | 1 + lib/google_places.py | 397 +++++++++++++++++++++++++++++++ lib/place_detail.py | 132 +++++++++- 5 files changed, 527 insertions(+), 7 deletions(-) create mode 100644 lib/google_places.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 99430a8..f44a58b 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -22,7 +22,7 @@ traffic: place_details: local_source: "nominatim" - local_bbox: [-117.2, 41.98, -111.04, 49.0] + local_bbox: [-125.0, 31.3, -104.0, 49.0] fallback_source: "overpass" services: @@ -40,6 +40,7 @@ features: has_landclass: false has_address_book_write: false has_overture_enrichment: true + has_google_places_enrichment: true defaults: center: [42.5736, -114.6066] diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index 07a61d5..108fdfd 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -35,6 +35,7 @@ features: has_landclass: false has_address_book_write: true has_overture_enrichment: false + has_google_places_enrichment: false defaults: center: [44.0, -114.0] diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 291ee81..eaf7956 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -40,6 +40,7 @@ features: has_landclass: true has_address_book_write: true has_overture_enrichment: false + has_google_places_enrichment: false defaults: center: [44.0, -114.0] diff --git a/lib/google_places.py b/lib/google_places.py new file mode 100644 index 0000000..8272b81 --- /dev/null +++ b/lib/google_places.py @@ -0,0 +1,397 @@ +""" +Google Places (New) API client for tertiary enrichment. + +Searches for business POIs and fetches details (opening hours, phone, website) +when OSM + Overture data is incomplete. Uses field masks to minimize cost. + +API docs: https://developers.google.com/maps/documentation/places/web-service +""" +import json +import os +import sqlite3 +import time +from datetime import date, timezone, datetime + +import requests + +from .utils import setup_logging + +logger = setup_logging('recon.google_places') + +API_BASE = 'https://places.googleapis.com/v1' +DEFAULT_DAILY_CAP = 500 +REQUEST_TIMEOUT = 3 # seconds + +# Google day index → OSM abbreviation +_DAY_ABBR = ['Su', 'Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa'] + +_db_conn = None + + +def _get_db(): + """Return a module-level SQLite connection (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + db_path = os.path.join(db_dir, 'place_cache.db') + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + # Ensure google_api_calls table exists + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS google_api_calls ( + call_date TEXT PRIMARY KEY, + call_count INTEGER NOT NULL DEFAULT 0 + ) + """) + _db_conn.commit() + return _db_conn + + +def _get_api_key(): + """Return the Google Places API key from environment.""" + key = os.environ.get('GOOGLE_PLACES_API_KEY') + if not key: + logger.error("GOOGLE_PLACES_API_KEY not set in environment") + return key + + +def _get_daily_cap(): + """Return the daily API call cap (configurable via deployment config).""" + try: + from .deployment_config import get_deployment_config + config = get_deployment_config() + return config.get('google_places', {}).get('daily_cap', DEFAULT_DAILY_CAP) + except Exception: + return DEFAULT_DAILY_CAP + + +# ── Daily call counter ────────────────────────────────────────────────── + +def check_daily_cap(): + """Return True if under daily cap, False if limit reached.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + current = row[0] if row else 0 + cap = _get_daily_cap() + if current >= cap: + logger.info(f"google_places: daily_cap_reached count={current} cap={cap}") + return False + return True + + +def get_daily_count(): + """Return today's API call count.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + return row[0] if row else 0 + + +def increment_call_counter(): + """Atomically increment today's API call counter.""" + db = _get_db() + today = date.today().isoformat() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, 1) + ON CONFLICT(call_date) DO UPDATE SET call_count = call_count + 1 + """, (today,)) + db.commit() + + +def _set_daily_count_to_cap(): + """Set today's counter to the cap value (soft-stop on quota error).""" + db = _get_db() + today = date.today().isoformat() + cap = _get_daily_cap() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, ?) + ON CONFLICT(call_date) DO UPDATE SET call_count = ? + """, (today, cap, cap)) + db.commit() + + +# ── Google Places cache (on place_cache table) ───────────────────────── + +def cache_get_google(osm_type, osm_id): + """Return (google_place_id, google_data_dict) or (None, None).""" + db = _get_db() + row = db.execute( + "SELECT google_place_id, google_data FROM place_cache WHERE osm_type=? AND osm_id=?", + (osm_type, osm_id) + ).fetchone() + if row and row[0]: + data = None + if row[1]: + try: + data = json.loads(row[1]) + except (json.JSONDecodeError, TypeError): + pass + return row[0], data + return None, None + + +def cache_put_google(osm_type, osm_id, place_id, data): + """Store Google Places data for a cache entry (UPSERT on google columns).""" + db = _get_db() + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at, google_place_id, google_data, google_fetched_at) + VALUES (?, ?, '', 'pending', 0, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + google_place_id = excluded.google_place_id, + google_data = excluded.google_data, + google_fetched_at = excluded.google_fetched_at + """, (osm_type, osm_id, place_id, json.dumps(data) if data else None, now)) + db.commit() + + +# ── API calls ─────────────────────────────────────────────────────────── + +def search_place(name, lat, lon, radius_m=200): + """ + Search Google Places (New) for a business by name + location. + Returns the Google Place ID of the best match, or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.post( + f'{API_BASE}/places:searchText', + headers={ + 'Content-Type': 'application/json', + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'places.id,places.displayName,places.location', + }, + json={ + 'textQuery': name, + 'locationBias': { + 'circle': { + 'center': {'latitude': lat, 'longitude': lon}, + 'radius': float(radius_m), + } + }, + 'maxResultCount': 1, + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=search place=%s result=rate_limited", name) + _set_daily_count_to_cap() + return None + + if resp.status_code == 403: + logger.error("google_places: action=search place=%s result=forbidden (invalid key?)", name) + return None + + if resp.status_code != 200: + logger.warning("google_places: action=search place=%s result=error status=%d", name, resp.status_code) + return None + + data = resp.json() + places = data.get('places', []) + if not places: + logger.info("google_places: action=search place=%s result=miss", name) + return None + + place_id = places[0].get('id') + display = places[0].get('displayName', {}).get('text', '?') + logger.info("google_places: action=search place=%s result=hit google_name=%s id=%s", name, display, place_id) + return place_id + + except requests.exceptions.Timeout: + logger.warning("google_places: action=search place=%s result=timeout", name) + return None + except Exception as e: + logger.error("google_places: action=search place=%s result=error err=%s", name, e) + return None + + +def get_place_details(place_id): + """ + Fetch details for a Google Place ID. + Returns dict with {opening_hours, phone_number, website} or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.get( + f'{API_BASE}/places/{place_id}', + headers={ + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'regularOpeningHours,internationalPhoneNumber,websiteUri', + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=details id=%s result=rate_limited", place_id) + _set_daily_count_to_cap() + return None + + if resp.status_code != 200: + logger.warning("google_places: action=details id=%s result=error status=%d", place_id, resp.status_code) + return None + + data = resp.json() + result = { + 'opening_hours': None, + 'opening_hours_raw': None, + 'phone_number': None, + 'website': None, + } + + # Phone + phone = data.get('internationalPhoneNumber') + if phone: + result['phone_number'] = phone.replace(' ', '').replace('-', '') + + # Website + result['website'] = data.get('websiteUri') + + # Opening hours + hours = data.get('regularOpeningHours') + if hours: + # Try OSM-compatible format from periods + periods = hours.get('periods', []) + if periods: + osm_str = _periods_to_osm(periods) + if osm_str: + result['opening_hours'] = osm_str + + # Fallback: weekday descriptions (human-readable) + if not result['opening_hours']: + descriptions = hours.get('weekdayDescriptions') + if descriptions: + result['opening_hours_raw'] = descriptions + + logger.info("google_places: action=details id=%s result=hit hours=%s phone=%s website=%s", + place_id, + 'yes' if result['opening_hours'] or result['opening_hours_raw'] else 'no', + 'yes' if result['phone_number'] else 'no', + 'yes' if result['website'] else 'no') + return result + + except requests.exceptions.Timeout: + logger.warning("google_places: action=details id=%s result=timeout", place_id) + return None + except Exception as e: + logger.error("google_places: action=details id=%s result=error err=%s", place_id, e) + return None + + +# ── Opening hours conversion ──────────────────────────────────────────── + +def _periods_to_osm(periods): + """ + Convert Google Places periods array to OSM opening_hours string. + + Google periods: [{"open": {"day": 0-6, "hour": H, "minute": M}, + "close": {"day": 0-6, "hour": H, "minute": M}}, ...] + Where day 0 = Sunday. + + OSM format: "Mo-Fr 06:00-23:00; Sa-Su 07:00-23:00" + """ + if not periods: + return None + + # Check for 24/7: single period with no close, or open 00:00 close 00:00 next day + if len(periods) == 1: + p = periods[0] + o = p.get('open', {}) + c = p.get('close') + if c is None and o.get('hour', 0) == 0 and o.get('minute', 0) == 0: + return '24/7' + + # Build a map: day_index → "HH:MM-HH:MM" + day_hours = {} # day_index → time_range string + for p in periods: + o = p.get('open', {}) + c = p.get('close', {}) + day = o.get('day', 0) + open_time = f"{o.get('hour', 0):02d}:{o.get('minute', 0):02d}" + + if c: + close_time = f"{c.get('hour', 0):02d}:{c.get('minute', 0):02d}" + # Handle midnight closing (00:00 means end of day) + if close_time == '00:00': + close_time = '24:00' + else: + close_time = '24:00' + + time_range = f"{open_time}-{close_time}" + + # A day can have multiple periods (e.g., lunch break) + if day in day_hours: + day_hours[day] = day_hours[day] + ',' + time_range + else: + day_hours[day] = time_range + + if not day_hours: + return None + + # Check if all 7 days have same hours + unique_ranges = set(day_hours.values()) + if len(day_hours) == 7 and len(unique_ranges) == 1: + hours = unique_ranges.pop() + if hours == '00:00-24:00': + return '24/7' + return hours # implicit "every day" + + # Group consecutive days with same hours + # Reorder to OSM convention: Mo(1) Tu(2) We(3) Th(4) Fr(5) Sa(6) Su(0) + osm_day_order = [1, 2, 3, 4, 5, 6, 0] + groups = [] + current_days = [] + current_hours = None + + for day_idx in osm_day_order: + hours = day_hours.get(day_idx) + if hours == current_hours: + current_days.append(day_idx) + else: + if current_days and current_hours: + groups.append((current_days, current_hours)) + current_days = [day_idx] + current_hours = hours + + if current_days and current_hours: + groups.append((current_days, current_hours)) + + if not groups: + return None + + # Format each group + parts = [] + for days, hours in groups: + if len(days) == 1: + day_str = _DAY_ABBR[days[0]] + elif len(days) == 2: + day_str = f"{_DAY_ABBR[days[0]]},{_DAY_ABBR[days[1]]}" + else: + day_str = f"{_DAY_ABBR[days[0]]}-{_DAY_ABBR[days[-1]]}" + parts.append(f"{day_str} {hours}") + + return '; '.join(parts) diff --git a/lib/place_detail.py b/lib/place_detail.py index 8ca2781..9c71b3b 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -73,13 +73,17 @@ def cache_get(osm_type, osm_id): def cache_put(osm_type, osm_id, data, source): - """Store a place detail result in the cache.""" + """Store a place detail result in the cache (preserves google columns).""" db = _get_db() - db.execute( - "INSERT OR REPLACE INTO place_cache (osm_type, osm_id, data, source, cached_at) " - "VALUES (?, ?, ?, ?, ?)", - (osm_type, osm_id, json.dumps(data), source, int(time.time())) - ) + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + data = excluded.data, + source = excluded.source, + cached_at = excluded.cached_at + """, (osm_type, osm_id, json.dumps(data), source, now)) db.commit() @@ -154,6 +158,120 @@ def _enrich_with_overture(result, osm_type, osm_id): return result + +# ── Google Places enrichment (tertiary, gap-fill only) ────────────── + +# Business POI classes eligible for Google enrichment +_BUSINESS_CLASSES = {'amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'} + +# Fields Google can fill +_GOOGLE_GAP_FIELDS = ('opening_hours', 'phone', 'website') + + +def _enrich_with_google(result, osm_type, osm_id): + """ + Tertiary enrichment via Google Places (New) API. + Only fires for business-type POIs when opening_hours, phone, or website + are still missing after OSM + Overture enrichment. + Fills only empty fields — never overwrites existing values. + """ + # Check feature flag + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_google_places_enrichment', False): + return result + except Exception: + return result + + # Only enrich business-type POIs + poi_class = result.get('class', '') + if poi_class not in _BUSINESS_CLASSES: + return result + + # Check if any gap fields are missing + extratags = result.get('extratags', {}) + gaps = [f for f in _GOOGLE_GAP_FIELDS if not extratags.get(f)] + if not gaps: + logger.debug(f"google_places: skip {osm_type}/{osm_id} — no gaps") + return result + + try: + from . import google_places + except ImportError: + logger.debug("google_places module not available") + return result + + # Check Google cache first + cached_pid, cached_data = google_places.cache_get_google(osm_type, osm_id) + if cached_pid and cached_data: + _apply_google_data(result, cached_data, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': cached_pid, + 'source': 'cache', + } + logger.debug(f"google_places: cache hit for {osm_type}/{osm_id}") + return result + + # Skip if already looked up and found nothing (cached_pid is None) + if cached_pid is not None: + return result + + # Daily cap check + if not google_places.check_daily_cap(): + return result + + # Search for the place + name = result.get('name', '') + centroid = result.get('centroid', {}) + lat = centroid.get('lat') + lon = centroid.get('lon') + if not name or not lat or not lon: + return result + + place_id = google_places.search_place(name, lat, lon) + if not place_id: + # Cache the miss to avoid repeated lookups + google_places.cache_put_google(osm_type, osm_id, '__miss__', None) + return result + + # Get details + details = google_places.get_place_details(place_id) + if not details: + google_places.cache_put_google(osm_type, osm_id, place_id, None) + return result + + # Cache the result + google_places.cache_put_google(osm_type, osm_id, place_id, details) + + # Apply to result + _apply_google_data(result, details, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': place_id, + 'source': 'api', + 'daily_count': google_places.get_daily_count(), + } + + return result + + +def _apply_google_data(result, google_data, gaps): + """Apply Google Places data to fill gap fields only.""" + extratags = result.get('extratags', {}) + if 'opening_hours' in gaps: + osm_hours = google_data.get('opening_hours') + if osm_hours: + extratags['opening_hours'] = osm_hours + elif google_data.get('opening_hours_raw'): + extratags['opening_hours_raw'] = google_data['opening_hours_raw'] + if 'phone' in gaps and google_data.get('phone_number'): + extratags['phone'] = google_data['phone_number'] + if 'website' in gaps and google_data.get('website'): + extratags['website'] = google_data['website'] + result['extratags'] = extratags + + # ── Nominatim parsing ─────────────────────────────────────────────────── # Nominatim address array uses rank_address to indicate what each entry is. @@ -441,6 +559,7 @@ def get_place_detail(osm_type, osm_id): if nominatim_result: nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) + nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -472,6 +591,7 @@ def get_place_detail(osm_type, osm_id): if overpass_result: overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) + overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200 From a4288c0cd87ddb88a55956b76d80c477811136df Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 22 Apr 2026 05:29:54 +0000 Subject: [PATCH 17/72] Add contacts/phone book system with per-user scoping New files: - lib/auth.py: Authentik forward-auth helpers (get_user_id, @require_auth) - lib/contacts.py: ContactsDB with CRUD, soft delete, restore, purge, find_nearby - lib/contacts_api.py: Flask Blueprint with 9 API endpoints at /api/contacts - templates/knowledge/deleted_contacts.html: Dashboard recovery page Modified: - lib/api.py: Register contacts_bp, add KNOWLEDGE_SUBNAV entry, /deleted-contacts route - config/profiles: has_contacts feature flag (true for home, false for pi profiles) Separate SQLite DB at data/contacts.db. Per-user isolation via X-Authentik-Username. Home/Work labels enforced unique per user. Haversine proximity queries (75m default). Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 1 + config/profiles/minimal_pi.yaml | 1 + config/profiles/regional_pi.yaml | 1 + lib/api.py | 17 ++ lib/auth.py | 22 +++ lib/contacts.py | 211 ++++++++++++++++++++++ lib/contacts_api.py | 114 ++++++++++++ templates/knowledge/deleted_contacts.html | 56 ++++++ 8 files changed, 423 insertions(+) create mode 100644 lib/auth.py create mode 100644 lib/contacts.py create mode 100644 lib/contacts_api.py create mode 100644 templates/knowledge/deleted_contacts.html diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index f44a58b..848a640 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -41,6 +41,7 @@ features: has_address_book_write: false has_overture_enrichment: true has_google_places_enrichment: true + has_contacts: true defaults: center: [42.5736, -114.6066] diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index 108fdfd..e855382 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -36,6 +36,7 @@ features: has_address_book_write: true has_overture_enrichment: false has_google_places_enrichment: false + has_contacts: false defaults: center: [44.0, -114.0] diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index eaf7956..9a80e51 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -41,6 +41,7 @@ features: has_address_book_write: true has_overture_enrichment: false has_google_places_enrichment: false + has_contacts: false defaults: center: [44.0, -114.0] diff --git a/lib/api.py b/lib/api.py index 476c1af..c95b39a 100644 --- a/lib/api.py +++ b/lib/api.py @@ -63,6 +63,10 @@ app.request_class = _LargeZimRequest from .address_book_api import address_book_bp app.register_blueprint(address_book_bp) +# ── Contacts Blueprint ── +from .contacts_api import contacts_bp +app.register_blueprint(contacts_bp) + # ── Netsyms + Geocode Blueprints ── from .netsyms_api import netsyms_bp, geocode_bp app.register_blueprint(netsyms_bp) @@ -78,6 +82,7 @@ KNOWLEDGE_SUBNAV = [ {'href': '/upload', 'label': 'Upload'}, {'href': '/web-ingest', 'label': 'Web Ingest'}, {'href': '/failures', 'label': 'Failures'}, + {'href': '/deleted-contacts', 'label': 'Deleted Contacts'}, ] PEERTUBE_SUBNAV = [ @@ -323,6 +328,18 @@ def failures_page(): failures=failures) +@app.route("/deleted-contacts") +def deleted_contacts_page(): + from .auth import get_user_id + from .contacts import ContactsDB + user_id = get_user_id() or "anonymous" + db = ContactsDB() + contacts = db.list_deleted(user_id) + return render_template("knowledge/deleted_contacts.html", + domain="knowledge", subnav=KNOWLEDGE_SUBNAV, active_page="/deleted-contacts", + contacts=contacts) + + @app.route('/peertube') def peertube_dashboard(): return render_template('peertube/dashboard.html', diff --git a/lib/auth.py b/lib/auth.py new file mode 100644 index 0000000..22b08d2 --- /dev/null +++ b/lib/auth.py @@ -0,0 +1,22 @@ +""" +RECON Auth Helper — extract user identity from Authentik forward-auth headers. +""" +from functools import wraps +from flask import request, jsonify + + +def get_user_id(): + """Return X-Authentik-Username or None.""" + return request.headers.get('X-Authentik-Username') + + +def require_auth(f): + """Decorator: 401 if no Authentik auth header.""" + @wraps(f) + def wrapper(*args, **kwargs): + user_id = get_user_id() + if not user_id: + return jsonify({'error': 'Authentication required'}), 401 + request.user_id = user_id + return f(*args, **kwargs) + return wrapper diff --git a/lib/contacts.py b/lib/contacts.py new file mode 100644 index 0000000..fd7c451 --- /dev/null +++ b/lib/contacts.py @@ -0,0 +1,211 @@ +""" +RECON Contacts Database — per-user phone book with soft delete and proximity queries. + +Separate DB at data/contacts.db. Thread-local connections with WAL mode (StatusDB pattern). +""" +import math +import os +import sqlite3 +import threading +from datetime import datetime, timezone + +_local = threading.local() + +_SCHEMA = """ +CREATE TABLE IF NOT EXISTS contacts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id TEXT NOT NULL, + label TEXT NOT NULL, + name TEXT, + call_sign TEXT, + phone TEXT, + email TEXT, + category TEXT, + notes TEXT, + lat REAL, + lon REAL, + osm_type TEXT, + osm_id INTEGER, + address TEXT, + show_proximity INTEGER DEFAULT 0, + created_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), + updated_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), + deleted_at TEXT, + deleted_by TEXT +); + +CREATE INDEX IF NOT EXISTS idx_contacts_user ON contacts(user_id); +CREATE INDEX IF NOT EXISTS idx_contacts_user_category ON contacts(user_id, category); +CREATE INDEX IF NOT EXISTS idx_contacts_user_deleted ON contacts(user_id, deleted_at); +CREATE INDEX IF NOT EXISTS idx_contacts_geo ON contacts(lat, lon); +CREATE UNIQUE INDEX IF NOT EXISTS idx_contacts_home_work + ON contacts(user_id, label) + WHERE label IN ('Home', 'Work') AND deleted_at IS NULL; +""" + + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters.""" + R = 6_371_000 + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _row_to_dict(row): + """Convert sqlite3.Row to dict, casting show_proximity to bool.""" + d = dict(row) + d['show_proximity'] = bool(d.get('show_proximity', 0)) + return d + + +class ContactsDB: + def __init__(self, db_path=None): + if db_path is None: + db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'contacts.db') + self.db_path = db_path + os.makedirs(os.path.dirname(db_path), exist_ok=True) + self._init_db() + + def _get_conn(self): + if not hasattr(_local, 'contacts_conn') or _local.contacts_conn is None: + _local.contacts_conn = sqlite3.connect(self.db_path, timeout=30) + _local.contacts_conn.row_factory = sqlite3.Row + _local.contacts_conn.execute("PRAGMA journal_mode=WAL") + _local.contacts_conn.execute("PRAGMA busy_timeout=5000") + return _local.contacts_conn + + def _init_db(self): + conn = self._get_conn() + conn.executescript(_SCHEMA) + conn.commit() + + def list_all(self, user_id, category=None, search=None): + conn = self._get_conn() + sql = "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NULL" + params = [user_id] + if category: + sql += " AND category = ?" + params.append(category) + if search: + sql += " AND (label LIKE ? OR name LIKE ? OR call_sign LIKE ? OR phone LIKE ?)" + like = f"%{search}%" + params.extend([like, like, like, like]) + sql += " ORDER BY label" + return [_row_to_dict(r) for r in conn.execute(sql, params).fetchall()] + + def list_deleted(self, user_id): + conn = self._get_conn() + rows = conn.execute( + "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NOT NULL ORDER BY deleted_at DESC", + (user_id,) + ).fetchall() + return [_row_to_dict(r) for r in rows] + + def get(self, user_id, contact_id, include_deleted=False): + conn = self._get_conn() + sql = "SELECT * FROM contacts WHERE id = ? AND user_id = ?" + if not include_deleted: + sql += " AND deleted_at IS NULL" + row = conn.execute(sql, (contact_id, user_id)).fetchone() + return _row_to_dict(row) if row else None + + def create(self, user_id, **fields): + conn = self._get_conn() + fields.pop('id', None) + fields.pop('user_id', None) + fields.pop('created_at', None) + fields.pop('updated_at', None) + fields.pop('deleted_at', None) + fields.pop('deleted_by', None) + if 'show_proximity' in fields: + fields['show_proximity'] = 1 if fields['show_proximity'] else 0 + columns = ['user_id'] + list(fields.keys()) + placeholders = ', '.join(['?'] * len(columns)) + col_str = ', '.join(columns) + values = [user_id] + list(fields.values()) + try: + cur = conn.execute(f"INSERT INTO contacts ({col_str}) VALUES ({placeholders})", values) + conn.commit() + return self.get(user_id, cur.lastrowid), None + except sqlite3.IntegrityError: + return None, 'conflict' + + def update(self, user_id, contact_id, **fields): + conn = self._get_conn() + fields.pop('id', None) + fields.pop('user_id', None) + fields.pop('created_at', None) + fields.pop('deleted_at', None) + fields.pop('deleted_by', None) + if 'show_proximity' in fields: + fields['show_proximity'] = 1 if fields['show_proximity'] else 0 + fields['updated_at'] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') + sets = ', '.join(f"{k} = ?" for k in fields) + values = list(fields.values()) + [contact_id, user_id] + conn.execute(f"UPDATE contacts SET {sets} WHERE id = ? AND user_id = ? AND deleted_at IS NULL", values) + conn.commit() + return self.get(user_id, contact_id) + + def soft_delete(self, user_id, contact_id): + conn = self._get_conn() + now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') + conn.execute( + "UPDATE contacts SET deleted_at = ?, deleted_by = ? WHERE id = ? AND user_id = ? AND deleted_at IS NULL", + (now, user_id, contact_id, user_id) + ) + conn.commit() + return self.get(user_id, contact_id, include_deleted=True) + + def restore(self, user_id, contact_id): + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row or not row.get('deleted_at'): + return None, 'not_found' + if row.get('label') in ('Home', 'Work'): + existing = conn.execute( + "SELECT id FROM contacts WHERE user_id = ? AND label = ? AND deleted_at IS NULL AND id != ?", + (user_id, row['label'], contact_id) + ).fetchone() + if existing: + return None, 'conflict' + conn.execute( + "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL WHERE id = ? AND user_id = ?", + (contact_id, user_id) + ) + conn.commit() + return self.get(user_id, contact_id), None + + def purge(self, user_id, contact_id): + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row: + return False, 'not_found' + if not row.get('deleted_at'): + return False, 'not_deleted' + conn.execute("DELETE FROM contacts WHERE id = ? AND user_id = ?", (contact_id, user_id)) + conn.commit() + return True, None + + def find_nearby(self, user_id, lat, lon, radius_m=75): + conn = self._get_conn() + # Bounding box pre-filter (~111km per degree lat) + dlat = radius_m / 111_000 + dlon = radius_m / (111_000 * math.cos(math.radians(lat))) + rows = conn.execute( + """SELECT * FROM contacts + WHERE user_id = ? AND deleted_at IS NULL AND show_proximity = 1 + AND lat BETWEEN ? AND ? AND lon BETWEEN ? AND ?""", + (user_id, lat - dlat, lat + dlat, lon - dlon, lon + dlon) + ).fetchall() + results = [] + for r in rows: + dist = _haversine_m(lat, lon, r['lat'], r['lon']) + if dist <= radius_m: + d = _row_to_dict(r) + d['distance_m'] = round(dist, 1) + results.append(d) + results.sort(key=lambda x: x['distance_m']) + return results diff --git a/lib/contacts_api.py b/lib/contacts_api.py new file mode 100644 index 0000000..4e50605 --- /dev/null +++ b/lib/contacts_api.py @@ -0,0 +1,114 @@ +""" +RECON Contacts API — Flask Blueprint. + +Per-user phone book with soft delete, restore, purge, and proximity queries. +All endpoints require Authentik forward-auth (X-Authentik-Username header). +""" +from flask import Blueprint, request, jsonify + +from .auth import require_auth +from .contacts import ContactsDB + +contacts_bp = Blueprint('contacts', __name__) + +_db = None + +def _get_db(): + global _db + if _db is None: + _db = ContactsDB() + return _db + + +@contacts_bp.route('/api/contacts', methods=['GET']) +@require_auth +def list_contacts(): + db = _get_db() + category = request.args.get('category') + search = request.args.get('search') + return jsonify(db.list_all(request.user_id, category=category, search=search)) + + +@contacts_bp.route('/api/contacts', methods=['POST']) +@require_auth +def create_contact(): + db = _get_db() + data = request.get_json(force=True) + contact, err = db.create(request.user_id, **data) + if err == 'conflict': + return jsonify({'error': 'You already have a Home/Work contact'}), 409 + return jsonify(contact), 201 + + +@contacts_bp.route('/api/contacts/nearby', methods=['GET']) +@require_auth +def nearby_contacts(): + db = _get_db() + lat = request.args.get('lat', type=float) + lon = request.args.get('lon', type=float) + radius_m = request.args.get('radius_m', 75, type=float) + if lat is None or lon is None: + return jsonify({'error': 'lat and lon required'}), 400 + return jsonify(db.find_nearby(request.user_id, lat, lon, radius_m)) + + +@contacts_bp.route('/api/contacts/deleted', methods=['GET']) +@require_auth +def list_deleted(): + db = _get_db() + return jsonify(db.list_deleted(request.user_id)) + + +@contacts_bp.route('/api/contacts/', methods=['GET']) +@require_auth +def get_contact(contact_id): + db = _get_db() + contact = db.get(request.user_id, contact_id) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts/', methods=['PATCH']) +@require_auth +def update_contact(contact_id): + db = _get_db() + data = request.get_json(force=True) + contact = db.update(request.user_id, contact_id, **data) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts/', methods=['DELETE']) +@require_auth +def delete_contact(contact_id): + db = _get_db() + contact = db.soft_delete(request.user_id, contact_id) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts//restore', methods=['POST']) +@require_auth +def restore_contact(contact_id): + db = _get_db() + contact, err = db.restore(request.user_id, contact_id) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'conflict': + return jsonify({'error': 'You already have a Home/Work contact'}), 409 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts//purge', methods=['DELETE']) +@require_auth +def purge_contact(contact_id): + db = _get_db() + ok, err = db.purge(request.user_id, contact_id) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'not_deleted': + return jsonify({'error': 'Contact must be deleted before purging'}), 400 + return jsonify({'ok': True}) diff --git a/templates/knowledge/deleted_contacts.html b/templates/knowledge/deleted_contacts.html new file mode 100644 index 0000000..58a9ff5 --- /dev/null +++ b/templates/knowledge/deleted_contacts.html @@ -0,0 +1,56 @@ +{% extends "base.html" %} +{% block content %} +

Deleted Contacts

+{% if not contacts %} +

No deleted contacts.

+{% else %} + + + {% for c in contacts %} + + + + + + + + + {% endfor %} +
LabelNameCategoryPhoneDeleted AtActions
{{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} + + +
+{% endif %} +{% endblock %} +{% block scripts %} + +{% endblock %} From 3280e34718609420bb69b037409517798a9488b2 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 22 Apr 2026 06:26:25 +0000 Subject: [PATCH 18/72] Add Nav-I dashboard section with restore-as conflict resolution - Create Nav-I top-level section in dashboard navigation - Move Deleted Contacts from Knowledge subnav to Nav-I - Add Nav-I landing page with card grid (deleted count, API keys stub) - Add /nav-i/api-keys placeholder page - Add restore-as endpoint for Home/Work conflict resolution - Conflict modal in deleted contacts template for label rename on restore Co-Authored-By: Claude Opus 4.6 --- lib/api.py | 29 ++++++- lib/contacts.py | 19 +++++ lib/contacts_api.py | 18 +++++ templates/base.html | 1 + templates/navi/api_keys.html | 8 ++ templates/navi/deleted_contacts.html | 116 +++++++++++++++++++++++++++ templates/navi/landing.html | 22 +++++ 7 files changed, 210 insertions(+), 3 deletions(-) create mode 100644 templates/navi/api_keys.html create mode 100644 templates/navi/deleted_contacts.html create mode 100644 templates/navi/landing.html diff --git a/lib/api.py b/lib/api.py index c95b39a..86e3fc8 100644 --- a/lib/api.py +++ b/lib/api.py @@ -82,7 +82,6 @@ KNOWLEDGE_SUBNAV = [ {'href': '/upload', 'label': 'Upload'}, {'href': '/web-ingest', 'label': 'Web Ingest'}, {'href': '/failures', 'label': 'Failures'}, - {'href': '/deleted-contacts', 'label': 'Deleted Contacts'}, ] PEERTUBE_SUBNAV = [ @@ -102,6 +101,12 @@ SETTINGS_SUBNAV = [ {'href': '/settings/health', 'label': 'Service Health'}, ] +NAVI_SUBNAV = [ + {'href': '/nav-i', 'label': 'Overview'}, + {'href': '/deleted-contacts', 'label': 'Deleted Contacts'}, + {'href': '/nav-i/api-keys', 'label': 'API Keys'}, +] + def _format_source_citation(payload): """Format a human-readable citation from a search result payload.""" @@ -335,11 +340,29 @@ def deleted_contacts_page(): user_id = get_user_id() or "anonymous" db = ContactsDB() contacts = db.list_deleted(user_id) - return render_template("knowledge/deleted_contacts.html", - domain="knowledge", subnav=KNOWLEDGE_SUBNAV, active_page="/deleted-contacts", + return render_template("navi/deleted_contacts.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/deleted-contacts", contacts=contacts) +@app.route("/nav-i") +def navi_landing_page(): + from .auth import get_user_id + from .contacts import ContactsDB + user_id = get_user_id() or "anonymous" + db = ContactsDB() + deleted_count = len(db.list_deleted(user_id)) + return render_template("navi/landing.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i", + deleted_count=deleted_count) + + +@app.route("/nav-i/api-keys") +def navi_api_keys_page(): + return render_template("navi/api_keys.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i/api-keys") + + @app.route('/peertube') def peertube_dashboard(): return render_template('peertube/dashboard.html', diff --git a/lib/contacts.py b/lib/contacts.py index fd7c451..f2782db 100644 --- a/lib/contacts.py +++ b/lib/contacts.py @@ -178,6 +178,25 @@ class ContactsDB: conn.commit() return self.get(user_id, contact_id), None + def restore_as(self, user_id, contact_id, new_label): + """Restore a soft-deleted contact with a new label (for Home/Work conflict resolution).""" + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row or not row.get('deleted_at'): + return None, 'not_found' + if not new_label or not new_label.strip(): + return None, 'invalid_label' + now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%fZ') + try: + conn.execute( + "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL, label = ?, updated_at = ? WHERE id = ? AND user_id = ?", + (new_label.strip(), now, contact_id, user_id) + ) + conn.commit() + except sqlite3.IntegrityError: + return None, 'conflict' + return self.get(user_id, contact_id), None + def purge(self, user_id, contact_id): conn = self._get_conn() row = self.get(user_id, contact_id, include_deleted=True) diff --git a/lib/contacts_api.py b/lib/contacts_api.py index 4e50605..0e4506b 100644 --- a/lib/contacts_api.py +++ b/lib/contacts_api.py @@ -102,6 +102,24 @@ def restore_contact(contact_id): return jsonify(contact) +@contacts_bp.route('/api/contacts//restore-as', methods=['POST']) +@require_auth +def restore_as_contact(contact_id): + db = _get_db() + data = request.get_json(force=True) + new_label = data.get('label', '').strip() + if not new_label: + return jsonify({'error': 'label is required'}), 400 + contact, err = db.restore_as(request.user_id, contact_id, new_label) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'invalid_label': + return jsonify({'error': 'Invalid label'}), 400 + if err == 'conflict': + return jsonify({'error': 'Label conflict'}), 409 + return jsonify(contact) + + @contacts_bp.route('/api/contacts//purge', methods=['DELETE']) @require_auth def purge_contact(contact_id): diff --git a/templates/base.html b/templates/base.html index 49b1a21..4c06892 100644 --- a/templates/base.html +++ b/templates/base.html @@ -21,6 +21,7 @@ PeerTube Kiwix Search + Nav-I Settings {% if subnav %} diff --git a/templates/navi/api_keys.html b/templates/navi/api_keys.html new file mode 100644 index 0000000..341c6d7 --- /dev/null +++ b/templates/navi/api_keys.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} +{% block content %} +

API Keys

+
+

Per-user API key management is coming soon.

+

This will allow generating keys for programmatic access to the Navi contacts API.

+
+{% endblock %} diff --git a/templates/navi/deleted_contacts.html b/templates/navi/deleted_contacts.html new file mode 100644 index 0000000..0847fab --- /dev/null +++ b/templates/navi/deleted_contacts.html @@ -0,0 +1,116 @@ +{% extends "base.html" %} +{% block content %} +

Deleted Contacts

+{% if not contacts %} +

No deleted contacts.

+{% else %} + + + {% for c in contacts %} + + + + + + + + + {% endfor %} +
LabelNameCategoryPhoneDeleted AtActions
{{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} + + +
+{% endif %} + + + +{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/templates/navi/landing.html b/templates/navi/landing.html new file mode 100644 index 0000000..131f3af --- /dev/null +++ b/templates/navi/landing.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block content %} +

Nav-I

+

Navi frontend management — contacts, API keys, and configuration.

+ + +{% endblock %} From 9c5b0520f993e853eabf8299b27fbb8a4f5ae83a Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 22 Apr 2026 15:36:37 +0000 Subject: [PATCH 19/72] Add PAD-US public land classification lookup Integrates USGS PAD-US 4.0 (651k features) into a local PostGIS database for point-in-polygon land ownership queries. Adds /api/landclass endpoint returning classifications, public/private status, and management hierarchy. - lib/landclass.py: connection pool, lookup_landclass(), domain label maps - lib/api.py: GET /api/landclass?lat=&lon= (feature-flag gated) - home.yaml: enable has_landclass flag Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 2 +- lib/api.py | 34 +++++ lib/landclass.py | 252 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 lib/landclass.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 848a640..a65cab1 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -37,7 +37,7 @@ features: has_hillshade: true has_3d_terrain: false has_traffic_overlay: true - has_landclass: false + has_landclass: true has_address_book_write: false has_overture_enrichment: true has_google_places_enrichment: true diff --git a/lib/api.py b/lib/api.py index 86e3fc8..a54aca7 100644 --- a/lib/api.py +++ b/lib/api.py @@ -26,6 +26,7 @@ from .utils import get_config, content_hash, clean_filename_to_title, derive_sou from .status import StatusDB from .deployment_config import get_deployment_config from .place_detail import get_place_detail +from .landclass import lookup_landclass, format_summary logger = setup_logging('recon.api') @@ -1234,6 +1235,39 @@ def api_place_detail(osm_type, osm_id): return jsonify(result), status + +@app.route('/api/landclass') +def api_landclass(): + """PAD-US land classification lookup for a point.""" + config = get_deployment_config() + if not config.get('features', {}).get('has_landclass'): + return jsonify({'error': 'Land classification not available'}), 404 + + try: + lat = float(request.args.get('lat', '')) + lon = float(request.args.get('lon', '')) + except (ValueError, TypeError): + return jsonify({'error': 'lat and lon required as numbers'}), 400 + + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'lat must be -90..90, lon must be -180..180'}), 400 + + classifications = lookup_landclass(lat, lon) + is_public = len(classifications) > 0 + is_private = len(classifications) == 0 + summary = format_summary(classifications) + + return jsonify({ + 'lat': lat, + 'lon': lon, + 'classifications': classifications, + 'count': len(classifications), + 'is_public': is_public, + 'is_private': is_private, + 'summary': summary, + }) + + @app.route('/api/config') def api_config(): """Return deployment profile config for frontend consumption.""" diff --git a/lib/landclass.py b/lib/landclass.py new file mode 100644 index 0000000..f581994 --- /dev/null +++ b/lib/landclass.py @@ -0,0 +1,252 @@ +""" +PAD-US land classification lookup. + +Provides point-in-polygon queries against the USGS Protected Areas Database +(PAD-US) stored in a local PostGIS database. Returns land ownership, +management, and public access information for any lat/lon coordinate. + +Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, +functions return empty results gracefully (feature degrades, doesn't crash). +""" +import os + +import psycopg2 +import psycopg2.pool + +from .utils import setup_logging + +logger = setup_logging('recon.landclass') + +_pool = None +_pool_failed = False + +# ── Label mappings from PAD-US domain tables ──────────────────────────── +# Extracted from PADUS4_0_Geodatabase.gdb domain lookup layers. +# ogr2ogr lowercases all column names. + +AGENCY_NAME_MAP = { + 'TVA': 'Tennessee Valley Authority', + 'BLM': 'Bureau of Land Management', + 'BOEM': 'Bureau of Ocean Energy Management', + 'USBR': 'Bureau of Reclamation', + 'FWS': 'U.S. Fish and Wildlife Service', + 'USFS': 'Forest Service', + 'DOD': 'Department of Defense', + 'USACE': 'Army Corps of Engineers', + 'DOE': 'Department of Energy', + 'NPS': 'National Park Service', + 'NRCS': 'Natural Resources Conservation Service', + 'ARS': 'Agricultural Research Service', + 'BIA': 'Bureau of Indian Affairs', + 'NOAA': 'National Oceanic and Atmospheric Administration', + 'BPA': 'Bonneville Power Administration', + 'OTHF': 'Other or Unknown Federal Land', + 'TRIB': 'American Indian Lands', + 'SPR': 'State Park and Recreation', + 'SDC': 'State Department of Conservation', + 'SLB': 'State Land Board', +} + +AGENCY_TYPE_MAP = { + 'FED': 'Federal', + 'TRIB': 'American Indian Lands', + 'STAT': 'State', + 'DIST': 'Regional Agency Special District', + 'LOC': 'Local Government', + 'NGO': 'Non-Governmental Organization', + 'PVT': 'Private', + 'JNT': 'Joint', + 'UNK': 'Unknown', + 'TERR': 'Territorial', + 'DESG': 'Designation', +} + +DESIGNATION_TYPE_MAP = { + 'NP': 'National Park', + 'NM': 'National Monument', + 'NCA': 'Conservation Area', + 'NF': 'National Forest', + 'NG': 'National Grassland', + 'PUB': 'National Public Lands', + 'NT': 'National Scenic or Historic Trail', + 'NWR': 'National Wildlife Refuge', + 'WA': 'Wilderness Area', + 'WSR': 'Wild and Scenic River', + 'WSA': 'Wilderness Study Area', + 'MPA': 'Marine Protected Area', + 'NRA': 'National Recreation Area', + 'NSBV': 'National Scenic, Botanical or Volcanic Area', + 'NLS': 'National Lakeshore or Seashore', + 'IRA': 'Inventoried Roadless Area', + 'ACEC': 'Area of Critical Environmental Concern', + 'RNA': 'Research Natural Area', + 'REC': 'Recreation Management Area', + 'RMA': 'Resource Management Area', + 'WPA': 'Watershed Protection Area', + 'REA': 'Research or Educational Area', + 'HCA': 'Historic or Cultural Area', + 'MIT': 'Mitigation Land or Bank', + 'MIL': 'Military Land', + 'ACC': 'Access Area', + 'SDA': 'Special Designation Area', + 'PROC': 'Approved or Proclamation Boundary', + 'FOTH': 'Federal Other or Unknown', + 'ND': 'Not Designated', +} + +PUBLIC_ACCESS_MAP = { + 'OA': 'Open Access', + 'RA': 'Restricted Access', + 'XA': 'Closed', + 'UK': 'Unknown', +} + +GAP_STATUS_MAP = { + '1': 'Managed for biodiversity (disturbance events proceed)', + '2': 'Managed for biodiversity (disturbance suppressed)', + '3': 'Multiple uses (extractive/OHV)', + '4': 'No known mandate for biodiversity protection', +} + +CATEGORY_MAP = { + 'Fee': 'Fee', + 'Easement': 'Easement', + 'Other': 'Other', + 'Unknown': 'Unknown', + 'Designation': 'Designation', + 'Marine': 'Marine Area', + 'Proclamation': 'Approved, Proclamation or Extent Boundary', +} + +STATE_MAP = { + 'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', + 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', + 'DC': 'District of Columbia', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', + 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', + 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', + 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', + 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', + 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', + 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', + 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', + 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', + 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', + 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', +} + + +def _decode(code, label_map): + """Decode a PAD-US code using a label map. Returns decoded label or the raw code.""" + if not code: + return '' + code = str(code).strip() + return label_map.get(code, code) + + +def _get_pool(): + """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" + global _pool, _pool_failed + if _pool is not None: + return _pool + if _pool_failed: + return None + + try: + _pool = psycopg2.pool.SimpleConnectionPool( + minconn=1, + maxconn=3, + host=os.environ.get('PADUS_DB_HOST', 'localhost'), + port=int(os.environ.get('PADUS_DB_PORT', '5432')), + dbname=os.environ.get('PADUS_DB_NAME', 'padus'), + user=os.environ.get('PADUS_DB_USER', 'overture'), + password=os.environ.get('PADUS_DB_PASSWORD', ''), + connect_timeout=5, + ) + logger.info("PAD-US PostgreSQL connection pool initialized") + return _pool + except Exception as e: + _pool_failed = True + logger.warning(f"PAD-US PostgreSQL unavailable, land classification disabled: {e}") + return None + + +def _query_all(sql, params): + """Execute a query and return all rows as a list of dicts, or empty list.""" + pool = _get_pool() + if pool is None: + return [] + + conn = None + try: + conn = pool.getconn() + with conn.cursor() as cur: + cur.execute(sql, params) + rows = cur.fetchall() + if not rows: + return [] + cols = [desc[0] for desc in cur.description] + return [dict(zip(cols, row)) for row in rows] + except Exception as e: + logger.warning(f"PAD-US query error: {e}") + if conn: + try: + conn.rollback() + except Exception: + pass + return [] + finally: + if conn: + try: + pool.putconn(conn) + except Exception: + pass + + +def lookup_landclass(lat, lon): + """ + Look up PAD-US land classifications for a point. + + Returns a list of classification dicts, ordered by area ascending + (smallest/most specific first). Empty list on error or no results. + """ + rows = _query_all( + """SELECT unit_nm, mang_name, mang_type, own_name, own_type, + des_tp, gap_sts, pub_access, category, gis_acres, state_nm + FROM pad_units + WHERE ST_Intersects(geom, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) + ORDER BY gis_acres ASC + LIMIT 10""", + (lon, lat) + ) + + results = [] + for row in rows: + pa_code = str(row.get('pub_access', '')).strip() + + results.append({ + 'unit_name': (row.get('unit_nm') or '').strip(), + 'manager_name': _decode(row.get('mang_name'), AGENCY_NAME_MAP), + 'manager_type': _decode(row.get('mang_type'), AGENCY_TYPE_MAP), + 'owner_type': _decode(row.get('own_type'), AGENCY_TYPE_MAP), + 'designation_type': _decode(row.get('des_tp'), DESIGNATION_TYPE_MAP), + 'gap_status': str(row.get('gap_sts', '')).strip(), + 'public_access': _decode(pa_code, PUBLIC_ACCESS_MAP), + 'public_access_code': pa_code, + 'category': _decode(row.get('category'), CATEGORY_MAP), + 'acres': row.get('gis_acres'), + 'state': _decode(row.get('state_nm'), STATE_MAP), + }) + + return results + + +def format_summary(classifications): + """ + Format a human-readable summary from classification results. + + Returns the most specific unit name, or None if no results. + """ + if not classifications: + return None + # First result is smallest/most specific (ordered by acres ASC) + return classifications[0].get('unit_name') or None From 829bc87b7b9c66592344cb6d018ac987a1cdf854 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 23 Apr 2026 06:34:22 +0000 Subject: [PATCH 20/72] Add wiki link rewriting to local Kiwix Rewrites OSM wikipedia/wikidata/wikivoyage/appropedia extratag values to local Kiwix URLs (wiki.echo6.co) when the article exists in a loaded ZIM, falling back silently to public URLs otherwise. - New lib/wiki_rewrite.py: URL classification, Kiwix OPDS catalog discovery (xml.etree.ElementTree), HEAD-based availability check, positive-only SQLite cache, disabled discovery stubs - place_detail.py: _enrich_wiki_links() at both Nominatim and Overpass enrichment sites, before cache_put - Profile flags: has_wiki_rewriting (home/regional: true, minimal: false), has_wiki_discovery (all: false, stubs for future activation) Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 3 + config/profiles/minimal_pi.yaml | 3 + config/profiles/regional_pi.yaml | 3 + lib/place_detail.py | 52 +++++ lib/wiki_rewrite.py | 324 +++++++++++++++++++++++++++++++ 5 files changed, 385 insertions(+) create mode 100644 lib/wiki_rewrite.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index a65cab1..91fcc93 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -38,10 +38,13 @@ features: has_3d_terrain: false has_traffic_overlay: true has_landclass: true + has_public_lands_layer: true has_address_book_write: false has_overture_enrichment: true has_google_places_enrichment: true has_contacts: true + has_wiki_rewriting: true + has_wiki_discovery: false defaults: center: [42.5736, -114.6066] diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index e855382..e3ae0fd 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -33,10 +33,13 @@ features: has_3d_terrain: false has_traffic_overlay: false has_landclass: false + has_public_lands_layer: false has_address_book_write: true has_overture_enrichment: false has_google_places_enrichment: false has_contacts: false + has_wiki_rewriting: false + has_wiki_discovery: false defaults: center: [44.0, -114.0] diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 9a80e51..2bd6c8a 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -38,10 +38,13 @@ features: has_3d_terrain: false has_traffic_overlay: true has_landclass: true + has_public_lands_layer: true has_address_book_write: true has_overture_enrichment: false has_google_places_enrichment: false has_contacts: false + has_wiki_rewriting: true + has_wiki_discovery: false defaults: center: [44.0, -114.0] diff --git a/lib/place_detail.py b/lib/place_detail.py index 9c71b3b..8dc8e64 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -272,6 +272,56 @@ def _apply_google_data(result, google_data, gaps): result['extratags'] = extratags + + +# ── Wiki link rewriting ───────────────────────────────────────────────── + +# Extratag keys that may contain wiki references +_WIKI_TAGS = ('wikipedia', 'wikidata', 'wikivoyage', 'appropedia') + + +def _enrich_wiki_links(result): + """ + Rewrite wiki-related extratags to local Kiwix URLs where available. + Falls back to public URLs. Only runs when has_wiki_rewriting is enabled. + Returns the (possibly enriched) result dict. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_wiki_rewriting', False): + return result + except Exception: + return result + + try: + from .wiki_rewrite import rewrite_wiki_link + except ImportError: + logger.debug("wiki_rewrite module not available") + return result + + extratags = result.get('extratags', {}) + if not extratags: + return result + + rewrites = {} + for tag in _WIKI_TAGS: + value = extratags.get(tag) + if not value: + continue + url, status = rewrite_wiki_link(tag, value) + if status != 'original': + extratags[tag] = url + rewrites[tag] = status + + if rewrites: + result['extratags'] = extratags + result.setdefault('sources', {})['wiki_rewrites'] = rewrites + logger.debug(f"Wiki rewrites for {result.get('osm_type')}/{result.get('osm_id')}: {rewrites}") + + return result + # ── Nominatim parsing ─────────────────────────────────────────────────── # Nominatim address array uses rank_address to indicate what each entry is. @@ -560,6 +610,7 @@ def get_place_detail(osm_type, osm_id): if nominatim_result: nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) + nominatim_result = _enrich_wiki_links(nominatim_result) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -592,6 +643,7 @@ def get_place_detail(osm_type, osm_id): if overpass_result: overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) + overpass_result = _enrich_wiki_links(overpass_result) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200 diff --git a/lib/wiki_rewrite.py b/lib/wiki_rewrite.py new file mode 100644 index 0000000..d884635 --- /dev/null +++ b/lib/wiki_rewrite.py @@ -0,0 +1,324 @@ +""" +Wiki link rewriter — rewrites OSM wikipedia/wikidata/wikivoyage/appropedia +links to local Kiwix URLs where the article exists in a loaded ZIM. + +Falls back silently to public URLs when article is unavailable locally. +Caches positive results only in place_cache.db. + +Kiwix catalog is parsed from the OPDS Atom feed at startup and refreshed +hourly to pick up newly loaded ZIMs without a restart. + +Operations note: + - After loading a new ZIM, either restart RECON (forces fresh catalog + fetch) or wait up to 1 hour for automatic refresh. + - To invalidate the wiki cache (e.g. after ZIM update): + sqlite3 /opt/recon/data/place_cache.db "DELETE FROM wiki_cache;" +""" +import os +import re +import sqlite3 +import time +import xml.etree.ElementTree as ET +from urllib.parse import unquote, quote + +import requests as http_requests + +from .utils import setup_logging + +logger = setup_logging('recon.wiki_rewrite') + +# ── Configuration ─────────────────────────────────────────────────────── + +KIWIX_BASE = "http://localhost:8430" +KIWIX_PUBLIC_BASE = "https://wiki.echo6.co" +KIWIX_CATALOG_URL = f"{KIWIX_BASE}/catalog/v2/entries" +HEAD_TIMEOUT = 1.5 # seconds +CATALOG_REFRESH_INTERVAL = 3600 # 1 hour + +# OPDS Atom namespace +_ATOM_NS = "http://www.w3.org/2005/Atom" + +# ── ZIM catalog map ───────────────────────────────────────────────────── + +_zim_map = {} # source_type → content_path e.g. 'wikipedia' → 'wikipedia_en_all_maxi_2026-02' +_zim_map_ts = 0.0 # last refresh timestamp + +# Prefix-to-source-type mapping (order matters: longest prefix first) +_ZIM_PREFIX_MAP = [ + ('wikipedia_en_all', 'wikipedia'), + ('appropedia_en_all', 'appropedia'), + ('wikivoyage_en', 'wikivoyage'), + ('wikidata_en', 'wikidata'), +] + + +def _discover_zims(): + """Parse Kiwix OPDS Atom catalog to map source types to content paths.""" + global _zim_map, _zim_map_ts + + try: + resp = http_requests.get(KIWIX_CATALOG_URL, timeout=5) + if resp.status_code != 200: + logger.warning(f"Kiwix catalog returned HTTP {resp.status_code}") + return + + root = ET.fromstring(resp.content) + new_map = {} + + for entry in root.findall(f"{{{_ATOM_NS}}}entry"): + name_el = entry.find(f"{{{_ATOM_NS}}}name") + if name_el is None: + continue + book_name = name_el.text or "" + + # + content_path = None + for link in entry.findall(f"{{{_ATOM_NS}}}link"): + if link.get("type") == "text/html": + href = link.get("href", "") + if href.startswith("/content/"): + content_path = href[len("/content/"):] + break + + if not content_path: + continue + + # Match book name against known prefixes + for prefix, source_type in _ZIM_PREFIX_MAP: + if book_name.startswith(prefix): + new_map[source_type] = content_path + break + + _zim_map = new_map + _zim_map_ts = time.time() + logger.info(f"ZIM catalog refreshed: {new_map}") + + except Exception as e: + logger.warning(f"Failed to discover ZIMs from Kiwix catalog: {e}") + + +def _ensure_zim_map(): + """Lazy-load and refresh ZIM map if stale.""" + if not _zim_map or (time.time() - _zim_map_ts) > CATALOG_REFRESH_INTERVAL: + _discover_zims() + + +# ── Database (wiki_cache in place_cache.db) ───────────────────────────── + +_db_conn = None + + +def _get_db(): + """Return a module-level SQLite connection to place_cache.db (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + os.makedirs(db_dir, exist_ok=True) + db_path = os.path.join(db_dir, 'place_cache.db') + + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS wiki_cache ( + source_type TEXT NOT NULL, + article_id TEXT NOT NULL, + kiwix_url TEXT NOT NULL, + cached_at INTEGER NOT NULL, + PRIMARY KEY (source_type, article_id) + ) + """) + _db_conn.commit() + logger.info(f"Wiki cache table ready in {db_path}") + return _db_conn + + +# ── URL classification ────────────────────────────────────────────────── + +# Patterns for OSM wikipedia/wikidata tag values +_WIKI_TAG_RE = re.compile(r'^(?:en:)?(.+)$') # "en:Title" or just "Title" +_WIKI_URL_RE = re.compile(r'https?://en\.wikipedia\.org/wiki/(.+)') +_WIKIDATA_TAG_RE = re.compile(r'^(Q\d+)$') +_WIKIDATA_URL_RE = re.compile(r'https?://(?:www\.)?wikidata\.org/wiki/(Q\d+)') +_WIKIVOYAGE_URL_RE = re.compile(r'https?://en\.wikivoyage\.org/wiki/(.+)') +_APPROPEDIA_URL_RE = re.compile(r'https?://(?:www\.)?appropedia\.org/(?:wiki/)?(.+)') + + +def _normalize_article_id(article_id): + """Normalize article ID to MediaWiki/Kiwix convention: spaces → underscores.""" + return article_id.replace(' ', '_') + + +def classify_wiki_link(tag_name, value): + """ + Classify an OSM extratag value into (source_type, article_id) or None. + + tag_name: the extratags key ('wikipedia', 'wikidata', etc.) + value: the raw tag value from OSM + + Article IDs are normalized to MediaWiki convention (spaces → underscores). + """ + if not value or not isinstance(value, str): + return None + + value = value.strip() + + if tag_name == 'wikidata': + m = _WIKIDATA_TAG_RE.match(value) + if m: + return ('wikidata', m.group(1)) + m = _WIKIDATA_URL_RE.match(value) + if m: + return ('wikidata', m.group(1)) + return None + + if tag_name == 'wikipedia': + # URL form: https://en.wikipedia.org/wiki/Title + m = _WIKI_URL_RE.match(value) + if m: + return ('wikipedia', _normalize_article_id(unquote(m.group(1)))) + # Tag form: "en:Title" or "Title" + m = _WIKI_TAG_RE.match(value) + if m: + return ('wikipedia', _normalize_article_id(m.group(1))) + return None + + if tag_name == 'wikivoyage': + m = _WIKIVOYAGE_URL_RE.match(value) + if m: + return ('wikivoyage', _normalize_article_id(unquote(m.group(1)))) + # Plain tag: "en:Title" or "Title" + m = _WIKI_TAG_RE.match(value) + if m: + return ('wikivoyage', _normalize_article_id(m.group(1))) + return None + + if tag_name == 'appropedia': + m = _APPROPEDIA_URL_RE.match(value) + if m: + return ('appropedia', _normalize_article_id(unquote(m.group(1)))) + return ('appropedia', _normalize_article_id(value)) + + return None + + +# ── URL builders ──────────────────────────────────────────────────────── + +def build_kiwix_url(source_type, article_id): + """Build a public Kiwix URL. Returns None if source_type not in ZIM map.""" + _ensure_zim_map() + content_path = _zim_map.get(source_type) + if not content_path: + return None + return f"{KIWIX_PUBLIC_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" + + +_PUBLIC_URL_TEMPLATES = { + 'wikipedia': "https://en.wikipedia.org/wiki/{id}", + 'wikidata': "https://www.wikidata.org/wiki/{id}", + 'wikivoyage': "https://en.wikivoyage.org/wiki/{id}", + 'appropedia': "https://www.appropedia.org/wiki/{id}", +} + + +def build_public_url(source_type, article_id): + """Build the canonical public URL for a wiki article.""" + tmpl = _PUBLIC_URL_TEMPLATES.get(source_type) + if not tmpl: + return None + return tmpl.format(id=quote(article_id, safe='/:@!$&\'()*+,;=')) + + +# ── Kiwix availability check ─────────────────────────────────────────── + +def check_kiwix_has_article(source_type, article_id): + """ + Check if an article exists in local Kiwix. + + Returns (bool, url): + - (True, kiwix_public_url) if article exists locally + - (False, None) if not found or Kiwix unavailable + + Only positive results are cached. + """ + # Check cache first + db = _get_db() + row = db.execute( + "SELECT kiwix_url FROM wiki_cache WHERE source_type=? AND article_id=?", + (source_type, article_id) + ).fetchone() + if row: + return (True, row[0]) + + # Build local HEAD URL + _ensure_zim_map() + content_path = _zim_map.get(source_type) + if not content_path: + return (False, None) + + head_url = f"{KIWIX_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" + + try: + resp = http_requests.head(head_url, timeout=HEAD_TIMEOUT, allow_redirects=True) + if resp.status_code == 200: + kiwix_url = build_kiwix_url(source_type, article_id) + # Cache positive result + now = int(time.time()) + db.execute(""" + INSERT OR REPLACE INTO wiki_cache (source_type, article_id, kiwix_url, cached_at) + VALUES (?, ?, ?, ?) + """, (source_type, article_id, kiwix_url, now)) + db.commit() + return (True, kiwix_url) + else: + return (False, None) + except Exception as e: + logger.debug(f"Kiwix HEAD failed for {source_type}/{article_id}: {e}") + return (False, None) + + +# ── Primary entry point ──────────────────────────────────────────────── + +def rewrite_wiki_link(tag_name, value): + """ + Rewrite an OSM wiki tag value to a local Kiwix URL if available. + + Returns (url, 'local'|'public') or (None, None) if unrecognized. + """ + classified = classify_wiki_link(tag_name, value) + if not classified: + return (value, 'original') + + source_type, article_id = classified + + # Try local Kiwix + found, kiwix_url = check_kiwix_has_article(source_type, article_id) + if found and kiwix_url: + return (kiwix_url, 'local') + + # Fall back to public URL + public_url = build_public_url(source_type, article_id) + if public_url: + return (public_url, 'public') + + return (value, 'original') + + +# ── Discovery stubs (disabled, for future activation) ─────────────────── + +def discover_wikivoyage_article(name, category, lat, lon): + """ + Discover a related Wikivoyage article for a place. + Enabled by has_wiki_discovery. Currently returns None. + """ + return None + + +def discover_appropedia_article(name, category): + """ + Discover a related Appropedia article for a place. + Enabled by has_wiki_discovery. Currently returns None. + """ + return None From 15c58a69ac71ee930a590b2d420fddec41f4313c Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 23 Apr 2026 06:50:44 +0000 Subject: [PATCH 21/72] Add Nav-I API key management UI Replace /nav-i/api-keys stub with functional admin page for managing third-party API keys (Gemini, TomTom, Google Places). - New lib/api_keys_admin.py: list/update/test operations with masked display, atomic .env writes (.env.bak backup), provider-specific test calls (Gemini models.list, TomTom geocode, Google Places searchText) - 4 new endpoints: GET /api/nav-i/api-keys/list, POST .../update, POST .../test, POST .../restart-recon - Full UI: key table with masked values, per-key update modal with show/hide toggle, inline test results with latency, Gemini detail sub-table with per-key stats, RECON restart with confirmation Co-Authored-By: Claude Opus 4.6 --- lib/api.py | 57 ++++++ lib/api_keys_admin.py | 358 +++++++++++++++++++++++++++++++++++ templates/navi/api_keys.html | 267 +++++++++++++++++++++++++- 3 files changed, 679 insertions(+), 3 deletions(-) create mode 100644 lib/api_keys_admin.py diff --git a/lib/api.py b/lib/api.py index a54aca7..c9105ad 100644 --- a/lib/api.py +++ b/lib/api.py @@ -1437,6 +1437,63 @@ def api_keys_reload(): return jsonify({'count': count}) + +# ── Nav-I API Key Admin ── + +@app.route('/api/nav-i/api-keys/list', methods=['GET']) +def navi_api_keys_list(): + from .api_keys_admin import list_keys + return jsonify({'keys': list_keys()}) + + +@app.route('/api/nav-i/api-keys/update', methods=['POST']) +def navi_api_keys_update(): + from .auth import require_auth + from .api_keys_admin import update_key, update_gemini_key + data = request.get_json(force=True) + name = data.get('name', '') + new_value = data.get('new_value', '') + index = data.get('index') # optional, for Gemini key replacement + if not name or not new_value: + return jsonify({'error': 'name and new_value required'}), 400 + if name == 'GEMINI_KEY' and index is not None: + result = update_gemini_key(int(index), new_value) + else: + result = update_key(name, new_value) + if result.get('success'): + return jsonify(result) + return jsonify(result), 400 + + +@app.route('/api/nav-i/api-keys/test', methods=['POST']) +def navi_api_keys_test(): + from .api_keys_admin import test_key + data = request.get_json(force=True) + name = data.get('name', '') + index = data.get('index') # optional, for testing specific Gemini key + if not name: + return jsonify({'error': 'name required'}), 400 + result = test_key(name, index=int(index) if index is not None else None) + return jsonify(result) + + +@app.route('/api/nav-i/api-keys/restart-recon', methods=['POST']) +def navi_api_keys_restart(): + import subprocess + try: + result = subprocess.run( + ['sudo', 'systemctl', 'restart', 'recon'], + capture_output=True, text=True, timeout=30 + ) + if result.returncode == 0: + return jsonify({'success': True, 'note': 'RECON service restarted'}) + return jsonify({'success': False, 'error': result.stderr.strip()}), 500 + except subprocess.TimeoutExpired: + return jsonify({'success': False, 'error': 'Restart timed out'}), 500 + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + + # ── YouTube Cookie Management ── PEERTUBE_HOST = '192.168.1.170' diff --git a/lib/api_keys_admin.py b/lib/api_keys_admin.py new file mode 100644 index 0000000..3c63565 --- /dev/null +++ b/lib/api_keys_admin.py @@ -0,0 +1,358 @@ +""" +Nav-I API Keys Admin — unified view/update/test for third-party API keys. + +Manages three provider categories: + - Gemini (multiple keys via KeyManager singleton) + - TomTom (single key in .env) + - Google Places (single key in .env) + +All key values are masked in responses. Full values never leave the server +except as user-supplied input on update. +""" +import os +import re +import shutil +import tempfile +import time + +import requests as http_requests + +from .utils import setup_logging + +logger = setup_logging('recon.api_keys_admin') + +ENV_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env') + +# Key definitions: env_name → display metadata +_KEY_DEFS = { + 'TOMTOM_API_KEY': { + 'display_name': 'TomTom', + 'provider': 'tomtom', + }, + 'GOOGLE_PLACES_API_KEY': { + 'display_name': 'Google Places', + 'provider': 'google_places', + }, +} + + +# ── .env read/write helpers ───────────────────────────────────────────── + +def _read_env(): + """Read .env file into a dict of key=value pairs, preserving order.""" + entries = [] # list of (key, value, raw_line) — preserves order and comments + if not os.path.exists(ENV_PATH): + return entries + with open(ENV_PATH, 'r') as f: + for line in f: + raw = line.rstrip('\n') + stripped = raw.strip() + if not stripped or stripped.startswith('#'): + entries.append((None, None, raw)) + continue + m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)=(.*)$', stripped) + if m: + entries.append((m.group(1), m.group(2).strip().strip('"').strip("'"), raw)) + else: + entries.append((None, None, raw)) + return entries + + +def _write_env(entries): + """Atomically write .env from entries list. Backs up to .env.bak first.""" + # Backup current .env + if os.path.exists(ENV_PATH): + bak_path = ENV_PATH + '.bak' + shutil.copy2(ENV_PATH, bak_path) + + # Write to temp file, then rename (atomic on same filesystem) + fd, tmp_path = tempfile.mkstemp(dir=os.path.dirname(ENV_PATH), prefix='.env.', suffix='.tmp') + try: + with os.fdopen(fd, 'w') as f: + for key, value, raw in entries: + if key is not None: + f.write(f'{key}={value}\n') + else: + f.write(raw + '\n') + os.rename(tmp_path, ENV_PATH) + except Exception: + # Clean up temp file on failure + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + logger.info(f"Wrote .env atomically ({len([e for e in entries if e[0]])} keys)") + + +def _get_env_value(name): + """Get a single value from .env by key name.""" + for key, value, _ in _read_env(): + if key == name: + return value + return None + + +def _set_env_value(name, new_value): + """Set a single value in .env. Adds if not present.""" + entries = _read_env() + found = False + for i, (key, value, raw) in enumerate(entries): + if key == name: + entries[i] = (name, new_value, f'{name}={new_value}') + found = True + break + if not found: + entries.append((name, new_value, f'{name}={new_value}')) + _write_env(entries) + + +# ── Masking ───────────────────────────────────────────────────────────── + +def _mask_key(value): + """Mask a key: first 4 chars + '...' + last 4 chars. Never return full value.""" + if not value: + return None + if len(value) <= 8: + return '****' + return value[:4] + '...' + value[-4:] + + +# ── List ──────────────────────────────────────────────────────────────── + +def list_keys(): + """ + Return masked status of all managed API keys. + + Returns list of dicts with: name, display_name, provider, masked_value, + is_set, count (for multi-key providers like Gemini). + """ + result = [] + env_mtime = None + if os.path.exists(ENV_PATH): + env_mtime = time.strftime('%Y-%m-%dT%H:%M:%SZ', + time.gmtime(os.path.getmtime(ENV_PATH))) + + # Gemini keys (via KeyManager) + from .key_manager import get_key_manager + km = get_key_manager() + gemini_keys = km.get_masked_keys() + gemini_count = len(gemini_keys) + # Show a single summary entry for Gemini with count + first_masked = gemini_keys[0]['masked'] if gemini_keys else None + result.append({ + 'name': 'GEMINI_KEY', + 'display_name': 'Gemini', + 'provider': 'gemini', + 'masked_value': first_masked, + 'is_set': gemini_count > 0, + 'count': gemini_count, + 'last_modified': env_mtime, + 'keys': gemini_keys, # full list with per-key stats + }) + + # Single-value keys + for env_name, meta in _KEY_DEFS.items(): + value = _get_env_value(env_name) + result.append({ + 'name': env_name, + 'display_name': meta['display_name'], + 'provider': meta['provider'], + 'masked_value': _mask_key(value), + 'is_set': bool(value), + 'count': 1 if value else 0, + 'last_modified': env_mtime, + }) + + return result + + +# ── Update ────────────────────────────────────────────────────────────── + +def update_key(name, new_value): + """ + Update a key value. For Gemini, name should be 'GEMINI_KEY' with an + optional 'index' for replacing a specific key, or use the KeyManager API. + For TomTom/Google Places, writes directly to .env. + + Returns dict with success status and masked value. + """ + new_value = new_value.strip() + if not new_value: + return {'success': False, 'error': 'Key value cannot be empty'} + + if name == 'GEMINI_KEY': + # Use KeyManager for Gemini + from .key_manager import get_key_manager + km = get_key_manager() + try: + idx = km.add_gemini_key(new_value) + return { + 'success': True, + 'name': name, + 'masked_value': _mask_key(new_value), + 'action': 'added', + 'index': idx, + } + except ValueError as e: + return {'success': False, 'error': str(e)} + + if name in _KEY_DEFS: + _set_env_value(name, new_value) + return { + 'success': True, + 'name': name, + 'masked_value': _mask_key(new_value), + 'action': 'updated', + } + + return {'success': False, 'error': f'Unknown key: {name}'} + + +def update_gemini_key(index, new_value): + """Replace a specific Gemini key by index.""" + new_value = new_value.strip() + if not new_value: + return {'success': False, 'error': 'Key value cannot be empty'} + + from .key_manager import get_key_manager + km = get_key_manager() + try: + km.replace_gemini_key(index, new_value) + return { + 'success': True, + 'name': 'GEMINI_KEY', + 'index': index, + 'masked_value': _mask_key(new_value), + 'action': 'replaced', + } + except (ValueError, IndexError) as e: + return {'success': False, 'error': str(e)} + + +# ── Test ──────────────────────────────────────────────────────────────── + +def test_key(name, index=None): + """ + Test a key against its provider API using the current .env value. + + Returns dict with: success, latency_ms, error, note. + """ + if name == 'GEMINI_KEY': + return _test_gemini(index) + elif name == 'TOMTOM_API_KEY': + return _test_tomtom() + elif name == 'GOOGLE_PLACES_API_KEY': + return _test_google_places() + else: + return {'success': False, 'error': f'Unknown key: {name}', 'latency_ms': 0} + + +def _test_gemini(index=None): + """Test Gemini key by listing models.""" + from .key_manager import get_key_manager + km = get_key_manager() + + if index is not None: + key = km.get_gemini_key(index) + if not key: + return {'success': False, 'error': f'Gemini key index {index} not found', 'latency_ms': 0} + else: + key = km.get_gemini_key(0) + if not key: + return {'success': False, 'error': 'No Gemini keys configured', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.get( + f"https://generativelanguage.googleapis.com/v1beta/models?key={key}", + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200 and 'models' in resp.text: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Models list returned successfully'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Key disabled or quota exhausted'} + elif resp.status_code == 429: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Valid key — currently rate-limited'} + else: + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} + + +def _test_tomtom(): + """Test TomTom key with a minimal geocode request.""" + key = _get_env_value('TOMTOM_API_KEY') + if not key: + return {'success': False, 'error': 'TOMTOM_API_KEY not set', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.get( + f"https://api.tomtom.com/search/2/geocode/Boise.json", + params={'key': key, 'limit': 1}, + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200: + data = resp.json() + count = data.get('summary', {}).get('totalResults', 0) + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': f'Geocode returned {count} result(s)'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Invalid or expired key'} + else: + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} + + +def _test_google_places(): + """Test Google Places (New) API key with a minimal searchText request.""" + key = _get_env_value('GOOGLE_PLACES_API_KEY') + if not key: + return {'success': False, 'error': 'GOOGLE_PLACES_API_KEY not set', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.post( + "https://places.googleapis.com/v1/places:searchText", + json={'textQuery': 'Boise Idaho', 'maxResultCount': 1}, + headers={ + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'places.displayName', + }, + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200: + data = resp.json() + count = len(data.get('places', [])) + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': f'searchText returned {count} place(s)'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Key not authorized for Places API (New)'} + elif resp.status_code == 429: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Valid key — quota exceeded'} + else: + body = resp.text[:200] + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}: {body}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} diff --git a/templates/navi/api_keys.html b/templates/navi/api_keys.html index 341c6d7..abf2d16 100644 --- a/templates/navi/api_keys.html +++ b/templates/navi/api_keys.html @@ -1,8 +1,269 @@ {% extends "base.html" %} {% block content %}

API Keys

-
-

Per-user API key management is coming soon.

-

This will allow generating keys for programmatic access to the Navi contacts API.

+ +
+

Updating keys does not restart RECON. After updates, click Restart RECON below or restart manually from terminal.

+
+ +
Loading keys...
+ + + + + + + + + + + +
+ + +
+ + + {% endblock %} + +{% block scripts %} + +{% endblock %} From 63b68bfea7e42dd9d53d536bc05d3fe32d425040 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 03:36:16 +0000 Subject: [PATCH 22/72] feat: add has_contours feature flags for home and regional_pi profiles Adds has_contours, has_contours_test, and has_contours_test_10ft flags to support contour layer toggle in Navi frontend. minimal_pi profile intentionally excluded (no tile overlays in stripped-down deployment). --- config/profiles/home.yaml | 3 +++ config/profiles/regional_pi.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 91fcc93..7acc475 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -39,6 +39,9 @@ features: has_traffic_overlay: true has_landclass: true has_public_lands_layer: true + has_contours: true + has_contours_test: true + has_contours_test_10ft: true has_address_book_write: false has_overture_enrichment: true has_google_places_enrichment: true diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 2bd6c8a..8e70cd6 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -39,6 +39,9 @@ features: has_traffic_overlay: true has_landclass: true has_public_lands_layer: true + has_contours: true + has_contours_test: true + has_contours_test_10ft: true has_address_book_write: true has_overture_enrichment: false has_google_places_enrichment: false From f35af18320b047ed4862cd112dd90c1a09db9e7c Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 03:36:21 +0000 Subject: [PATCH 23/72] feat(place): gate Google Places API calls behind auth Guest users receive local and cached data only. New Google Places API calls are only triggered for authenticated users, protecting against cost exploitation on the public navi.echo6.co frontend. The pattern: cached Google data flows freely (already paid for by an authed lookup). New API calls require X-Authentik-Username via get_user_id() check. --- lib/place_detail.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/place_detail.py b/lib/place_detail.py index 8dc8e64..efa805c 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -218,6 +218,12 @@ def _enrich_with_google(result, osm_type, osm_id): if cached_pid is not None: return result + # Skip new Google API calls for guest users (cached data already returned above) + from .auth import get_user_id + if not get_user_id(): + logger.debug(f"google_places: skip API call for {osm_type}/{osm_id} — guest user") + return result + # Daily cap check if not google_places.check_daily_cap(): return result From 2ed9335f4e2a711b14918d94eecff99fa698aa99 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 04:03:44 +0000 Subject: [PATCH 24/72] feat(geocode): add viewport bias for location-aware search - Add lat/lon/zoom params to geocode() and _retrieve_photon_freetext() - Update nav_tools.py wrapper to pass through viewport params - Add /api/geocode handler support for lat/lon/zoom query params - Add _safe_float() helper for param validation - Cast zoom to int for Photon compatibility Allows the frontend to pass current map center/zoom to bias search results toward the visible area. Co-Authored-By: Claude Opus 4.5 --- lib/geocode.py | 17 ++++++++--------- lib/nav_tools.py | 4 ++-- lib/netsyms_api.py | 20 +++++++++++++++++++- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/lib/geocode.py b/lib/geocode.py index 22acdf9..312cca7 100644 --- a/lib/geocode.py +++ b/lib/geocode.py @@ -334,21 +334,20 @@ def _retrieve_photon_structured(parsed, limit=10): return _parse_photon_features(data.get('features', []), 'photon') -def _retrieve_photon_freetext(query, limit=10): +def _retrieve_photon_freetext(query, limit=10, lat=None, lon=None, zoom=None): """Query Photon /api for free-text search with location bias.""" try: params = { 'q': query, 'limit': limit, - 'lat': GEOCODE_BIAS_LAT, - 'lon': GEOCODE_BIAS_LON, - 'zoom': GEOCODE_BIAS_ZOOM, + 'lat': lat if lat is not None else GEOCODE_BIAS_LAT, + 'lon': lon if lon is not None else GEOCODE_BIAS_LON, + 'zoom': int(zoom) if zoom is not None else GEOCODE_BIAS_ZOOM, } resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) resp.raise_for_status() data = resp.json() except Exception as e: - logger.debug("Photon /api failed: %s", e) return [] return _parse_photon_features(data.get('features', []), 'photon') @@ -663,7 +662,7 @@ def _annotate_with_address_book(results): # PUBLIC API # ═══════════════════════════════════════════════════════════════════ -def geocode(query, limit=10): +def geocode(query, limit=10, lat=None, lon=None, zoom=None): """ Structured geocoding with multi-source retrieval and reranking. @@ -731,7 +730,7 @@ def geocode(query, limit=10): # Parallel: Netsyms (structured) + Photon (freetext with expanded query) netsyms_results = _retrieve_netsyms(parsed, limit=limit) photon_results = _retrieve_photon_freetext( - parsed.get('expanded_query', q), limit=limit + parsed.get('expanded_query', q), limit=limit, lat=lat, lon=lon, zoom=zoom ) # Also try Photon /structured for addresses photon_struct = _retrieve_photon_structured(parsed, limit=5) @@ -739,11 +738,11 @@ def geocode(query, limit=10): elif intent == 'POSTCODE': netsyms_results = _retrieve_netsyms(parsed, limit=limit) - photon_results = _retrieve_photon_freetext(q, limit=limit) + photon_results = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) candidates = netsyms_results + photon_results elif intent in ('LOCALITY', 'POI', 'UNKNOWN'): - candidates = _retrieve_photon_freetext(q, limit=limit) + candidates = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) # ── Deduplicate by (lat, lon) proximity ── deduped = [] diff --git a/lib/nav_tools.py b/lib/nav_tools.py index 2f91616..d4bb1f7 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -50,10 +50,10 @@ def _haversine_m(lat1, lon1, lat2, lon2): return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) -def geocode(query: str, limit: int = 10): +def geocode(query: str, limit: int = 10, lat=None, lon=None, zoom=None): """Delegate to the structured geocode module. See lib/geocode.py.""" from . import geocode as geocode_mod - return geocode_mod.geocode(query, limit=limit) + return geocode_mod.geocode(query, limit=limit, lat=lat, lon=lon, zoom=zoom) def _geocode(query: str): diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 92c8b6e..4a0847f 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -35,6 +35,19 @@ def api_netsyms_health(): return jsonify(netsyms.health()) + +def _safe_float(val, lo, hi): + """Parse val as float; return None if missing, non-numeric, or out of [lo, hi].""" + if val is None: + return None + try: + f = float(val) + if lo <= f <= hi: + return f + except (ValueError, TypeError): + pass + return None + @geocode_bp.route('/api/geocode') def api_geocode(): """ @@ -58,7 +71,12 @@ def api_geocode(): except (ValueError, TypeError): limit = 10 - result = nav_tools.geocode(q, limit=limit) + # Viewport bias parameters (optional) + lat = _safe_float(request.args.get("lat"), -90, 90) + lon = _safe_float(request.args.get("lon"), -180, 180) + zoom = _safe_float(request.args.get("zoom"), 0, 22) + + result = nav_tools.geocode(q, limit=limit, lat=lat, lon=lon, zoom=zoom) return jsonify(result) From 4f96d8f6fe34b6d5e9a61942fbaea48f0a6c46c1 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 04:58:01 +0000 Subject: [PATCH 25/72] docs: Navi directions UX redesign with radial map menu Design document covering: - Current state analysis and failure modes - New DirectionsPanel with visible From/To inputs - RadialMenu component for map right-click/long-press - Interaction flows for all directions scenarios - Mobile considerations (bottom sheet, long-press timing) - Implementation sequence (10 phases) - Open questions for Matt Implementation deferred to dedicated session. Co-Authored-By: Claude Opus 4.5 --- docs/NAVI-DIRECTIONS-REDESIGN.md | 579 +++++++++++++++++++++++++++++++ 1 file changed, 579 insertions(+) create mode 100644 docs/NAVI-DIRECTIONS-REDESIGN.md diff --git a/docs/NAVI-DIRECTIONS-REDESIGN.md b/docs/NAVI-DIRECTIONS-REDESIGN.md new file mode 100644 index 0000000..705201f --- /dev/null +++ b/docs/NAVI-DIRECTIONS-REDESIGN.md @@ -0,0 +1,579 @@ +# Navi Directions UX Redesign + +**Status:** Draft +**Author:** Claude + Matt +**Date:** 2026-04-26 +**Implementation:** Deferred to dedicated session + +--- + +## 1. Current State + +### Components + +| Component | File | Role | +|-----------|------|------| +| SearchBar | `SearchBar.jsx` | Overloaded: search, add stop, set origin (hidden modes) | +| StopList | `StopList.jsx` | Drag-drop reordering of stops | +| GpsOriginItem | `GpsOriginItem.jsx` | "Your location" row when GPS granted | +| StopItem | `StopItem.jsx` | Individual stop with delete button | +| ModeSelector | `ModeSelector.jsx` | auto/pedestrian/bicycle toggle | +| ManeuverList | `ManeuverList.jsx` | Turn-by-turn directions display | +| PlaceDetail | `PlaceDetail.jsx` | "Directions" button for selected place | + +### State Model + +```javascript +stops: [] // Array of {id, lat, lon, name, source, matchCode} +gpsOrigin: true // Use GPS as origin when available +pendingDestination: null // Place waiting for origin (GPS-denied flow) +route: null // Valhalla trip response +routeLoading: false +routeError: null +``` + +### Failure Modes + +1. **No visible from/to inputs** — Users cannot see or directly edit origin/destination +2. **SearchBar hidden mode-switching** — Three different behaviors based on invisible state: + - Normal: opens place detail + - With `pendingDestination`: first result becomes origin + - After adding stops: unclear which role next selection plays +3. **GPS-denied flow uses ephemeral toast** — "Set a starting point" disappears, no persistent UI guidance +4. **No swap button** — Cannot reverse route direction +5. **No map context menu** — Right-click/long-press does nothing +6. **No waypoint addition UI** — Only drag-drop reordering, no insert-between +7. **Place panel "Directions" silently sets up route** — Based on hidden state, no confirmation + +--- + +## 2. Design Principles + +1. **Direct manipulation over hidden modes** — Every action should have visible UI +2. **Two visible inputs always** — When in directions mode, From and To fields are always visible +3. **Spatial interactions over linear** — Radial menu for map interactions, not dropdowns +4. **Same gesture model everywhere** — Right-click (desktop) = long-press (mobile) +5. **Preserve existing state model** — `stops[]` array stays, just better UI on top + +--- + +## 3. Visual Mockup — Directions Panel + +``` +┌─────────────────────────────────────┐ +│ DIRECTIONS │ +├─────────────────────────────────────┤ +│ │ +│ From: [📍 Your location ][×] │ +│ ──────────────────────── │ +│ [⇅] │ ← Swap button +│ ──────────────────────── │ +│ To: [Coffee shop on Main St][×] │ +│ │ +│ [+ Add stop] │ +│ │ +├─────────────────────────────────────┤ +│ [🚗 Auto] [🚶 Walk] [🚲 Bike] │ +├─────────────────────────────────────┤ +│ ┌─────────────────────────────┐ │ +│ │ 12 min · 4.2 mi │ │ +│ │ via W Main St │ │ +│ └─────────────────────────────┘ │ +│ │ +│ ▼ Turn-by-turn (expandable) │ +│ → Head north on Oak Ave │ +│ ↱ Turn right onto Main St │ +│ ◉ Arrive at destination │ +│ │ +└─────────────────────────────────────┘ +``` + +### Input States + +**From field:** +- GPS granted: Shows "📍 Your location" pill with clear button +- GPS denied/cleared: Empty, placeholder "Starting point..." +- Filled: Shows place name with clear button + +**To field:** +- Empty: Placeholder "Destination..." +- Filled: Shows place name with clear button + +**Active input:** +- Blue border highlight +- Search dropdown appears on typing +- Map click populates this field + +--- + +## 4. Visual Mockup — Radial Map Menu + +``` + Drop pin + 🔴 + ╱ ╲ + ╱ ╲ + Directions ╱ ╲ Directions + from here 🟢──────────────🔵 to here + │ 43.6166 │ + │ -116.2008 │ + │ [loading…] │ ← Center disc with coords/label + Add as 🟡──────────────🟣 Save place + stop ╲ ╱ + ╲ ╱ + ╲ ╱ + 🟠 + What's here +``` + +### Wedge Layout (60° each) + +| Position | Action | Icon | Color | +|----------|--------|------|-------| +| Top | Drop pin | Pin | Red | +| Top-right | Directions to here | Arrow-in | Blue | +| Bottom-right | Save place | Star | Purple | +| Bottom | What's here | Info | Orange | +| Bottom-left | Add as stop | Plus | Yellow | +| Top-left | Directions from here | Arrow-out | Green | + +### Behavior + +- **Trigger:** Right-click (desktop) or long-press 400-500ms (mobile) +- **Center disc:** ~40px diameter, shows coordinates immediately, reverse-geocoded label async +- **Wedge highlight:** On hover (desktop) or drag-over (mobile) +- **Commit:** Release on wedge (mobile) or click wedge (desktop) +- **Cancel:** Release outside, Escape key, tap elsewhere + +--- + +## 5. Component Breakdown + +### DirectionsPanel + +Replaces current Panel directions mode. + +``` +Props: none (reads from store) +State: none (all in global store) +Children: + - LocationInput (from) + - SwapButton + - LocationInput (to) + - WaypointList (if stops.length > 2) + - AddStopButton + - ModeSelector + - RouteSummary + - ManeuverList (collapsible) +``` + +### LocationInput + +Reusable component for from, to, and waypoint inputs. + +``` +Props: + - slot: 'from' | 'to' | `waypoint:${index}` + - value: { lat, lon, name, source } | null + - placeholder: string + - showGpsPill: boolean + - onClear: () => void + +Features: + - Search-as-you-type (Photon geocoder) + - GPS pill state with clear button + - Active-input visual state (blue border) + - Reverse-geocoded labels for coord-only entries + - Dropdown for search results +``` + +### SwapButton + +Simple button between From and To inputs. + +``` +Props: none +Action: Swaps stops[0] and stops[stops.length - 1] +Visual: ⇅ icon, hover highlight +``` + +### WaypointList + +Refactored from existing StopList, preserves drag-drop. + +``` +Props: none (reads stops from store) +Features: + - Only renders stops[1..n-1] (middle waypoints) + - Drag-drop reordering via @dnd-kit + - Delete button per waypoint + - "Via" label prefix +``` + +### RadialMenu + +New general-purpose component. + +``` +Props: + - open: boolean + - x: number (screen X) + - y: number (screen Y) + - lat: number + - lon: number + - wedges: Array<{ id, icon, label, action: (lat, lon) => void }> + - onClose: () => void + +Features: + - Configurable wedge count and actions + - Async center label (reverse geocode) + - Keyboard dismissal (Escape) + - Touch-friendly sizing on mobile + - Fade in/out animations +``` + +--- + +## 6. State Model + +### Existing (unchanged) + +```javascript +stops: [] // Origin = stops[0], destination = stops[last], waypoints in between +gpsOrigin: boolean // Whether GPS should be used as origin +route: object | null // Valhalla trip response +routeLoading: boolean +routeError: string | null +``` + +### New + +```javascript +activeInputSlot: 'from' | 'to' | `waypoint:${N}` | null +// Which input is currently focused/active for map-click-to-fill + +radialMenuState: { + open: boolean, + x: number, // Screen coordinates + y: number, + lat: number, // Map coordinates + lon: number, + label: string | null // Reverse-geocoded, async populated +} +``` + +### Removed + +```javascript +pendingDestination: null // No longer needed — explicit inputs replace hidden state +``` + +--- + +## 7. Interaction Flows + +### Open directions tab fresh + +1. From field shows GPS pill if `geoPermission === 'granted'`, else empty +2. To field is empty, focused by default +3. No route calculated yet + +### Click "Directions" from place panel + +1. Directions panel opens (if not already) +2. To field auto-fills with selected place +3. From field: + - If GPS granted: shows GPS pill + - Else: empty, receives focus +4. Route calculates if both filled + +### Type in input + +1. Input receives focus, becomes `activeInputSlot` +2. Photon search fires on debounce (300ms) +3. Dropdown shows results +4. Select result → populates input, clears dropdown +5. Route recalculates + +### Right-click / long-press on map + +1. Radial menu appears centered on click point +2. Center disc shows coordinates immediately +3. Reverse geocode fires async, populates label +4. User hovers/drags to wedge: + +| Wedge | Action | +|-------|--------| +| **Directions from here** | Opens directions if closed, fills From with coords, focuses To | +| **Directions to here** | Opens directions if closed, fills To with coords, focuses From if empty | +| **Add as stop** | Inserts new stop before destination | +| **What's here** | Reverse geocode → opens place panel | +| **Drop pin** | Creates transient marker (session-only) | +| **Save place** | Opens save dialog (auth required) | + +5. Release outside or Escape → dismisses without action + +### Click map with active input + +When directions panel is open and an input is focused (`activeInputSlot !== null`): + +1. Single click on map +2. Clicked coordinates populate the active input +3. Reverse geocode fires to get display name +4. Input loses focus, `activeInputSlot = null` +5. Route recalculates + +### Swap button + +1. Click swap button +2. `stops[0]` and `stops[stops.length - 1]` swap positions +3. If GPS was origin, GPS pill moves to destination (unusual but allowed) +4. Route recalculates + +--- + +## 8. Place Panel "Directions" Handoff + +**Current behavior:** Calls `startDirections(place)` with complex conditional logic, may show toast. + +**New behavior:** + +```javascript +handleDirections = () => { + // Always open directions panel + setActiveTab('directions') + + // Fill destination + setStop(stops.length, { // Appends or replaces last + lat: place.lat, + lon: place.lon, + name: place.name, + source: place.source + }) + + // Handle origin + if (geoPermission === 'granted') { + setGpsOrigin(true) // GPS pill in From + } else if (stops.length === 0) { + setActiveInputSlot('from') // Focus From input + } + + // Close place panel + clearSelectedPlace() +} +``` + +**No toast needed** — UI is self-explanatory with visible From/To fields. + +--- + +## 9. Radial Menu Specifics + +### Trigger + +| Platform | Gesture | Duration | +|----------|---------|----------| +| Desktop | Right-click | Instant | +| Mobile | Long-press | 400-500ms | + +### Conflict Avoidance + +Long-press must NOT fire during active pan: +- Track touch start position +- If touch moves >5px before timer fires, cancel long-press +- Pan gesture takes priority + +### Geometry + +``` +Outer radius: ~80px from center +Inner radius: ~40px (center disc) +Wedge angle: 60° each (6 wedges) +Gap between wedges: 2px +``` + +### Visual States + +| Element | Default | Hover/Active | Selected | +|---------|---------|--------------|----------| +| Wedge background | `rgba(0,0,0,0.7)` | `rgba(0,0,0,0.85)` | Wedge accent color | +| Wedge icon | White, 50% opacity | White, 100% opacity | White | +| Wedge label | Hidden | Shown (tooltip) | Shown | +| Center disc | Dark, coords visible | — | — | + +### Animation + +- **Fade in:** <100ms ease-out +- **Fade out:** <150ms ease-in +- **Wedge hover:** Instant background change +- **Center label:** Fade in when reverse geocode completes + +--- + +## 10. Mobile Considerations + +### Panel Layout + +**Decision needed:** Bottom sheet vs side panel + +| Option | Pros | Cons | +|--------|------|------| +| Bottom sheet | Familiar (Google Maps), thumb-friendly | Complex sheet state management | +| Side panel | Consistent with desktop, more vertical space | Covers more map, less thumb-friendly | + +**Recommendation:** Bottom sheet with three states: collapsed (summary only), half (inputs + summary), full (inputs + maneuvers). + +### Long-press Timing + +**Decision needed:** Exact timing + +| Duration | Feel | +|----------|------| +| 400ms | Snappy, risk of accidental trigger | +| 450ms | Balanced | +| 500ms | Deliberate, slightly sluggish | + +**Recommendation:** Start with 450ms, tune based on testing. + +### Radial Sizing + +Mobile radial should be larger for finger touch: +- Outer radius: ~100px (vs 80px desktop) +- Center disc: ~50px (vs 40px desktop) +- Minimum wedge touch target: 48px + +### Compact Directions Mode + +When route is calculated and user is navigating: +1. Collapse From/To inputs to single-line summary +2. Show prominent next maneuver +3. Expand on tap to edit inputs +4. Maneuver list scrollable + +### Keyboard Awareness + +- Detect keyboard open via `visualViewport` API +- Shift panel content up to keep active input visible +- Don't let keyboard overlap input being typed in + +--- + +## 11. Place Panel Restructure + +**Out of scope for this document.** + +Separate session will address: +- Cleaner info card layout (Google Maps style) +- Better visual hierarchy +- Action button placement +- No new data sources, just CSS/JSX polish + +--- + +## 12. Out of Scope (Future Phases) + +| Feature | Notes | +|---------|-------| +| Saved routes | Auth required, dedicated work | +| Route alternatives | Valhalla supports, surface in v2 | +| Avoid tolls/highways | Valhalla supports via costing options | +| Real-time rerouting | Requires location tracking loop | +| Multi-modal | Drive + transit + walk hybrids | +| Traffic-aware routing | Requires traffic data source | +| Offline routing | Requires local Valhalla instance | + +--- + +## 13. Implementation Sequence + +| Phase | Task | Depends On | +|-------|------|------------| +| **a** | Build RadialMenu component (general-purpose, no actions wired) | — | +| **b** | Wire "What's here" action to validate trigger + reverse-geocode flow | a | +| **c** | Refactor SearchBar to single-mode (search-only, remove pending* logic) | — | +| **d** | Build LocationInput component (reusable) | c | +| **e** | Build DirectionsPanel layout with two LocationInputs | d | +| **f** | Wire remaining radial actions to directions flow | b, e | +| **g** | Wire place panel "Directions" handoff to new flow | e | +| **h** | Add SwapButton | e | +| **i** | Add map-click-to-fill-active-input | e | +| **j** | Mobile polish (long-press timing, bottom sheet, keyboard) | a-i | + +**Estimated phases:** 10 discrete tasks, can be done incrementally. + +--- + +## 14. Open Questions + +### For Matt to decide: + +1. **Bottom sheet vs side panel on mobile?** + - Bottom sheet recommended but adds complexity + +2. **Long-press timing exactly?** + - 400ms / 450ms / 500ms + - Recommend 450ms + +3. **Should "Save place" wedge be visible to guests or hidden?** + - Visible with login prompt = more discoverable + - Hidden = cleaner for guests + - Recommend: visible, shows "Sign in to save" toast + +4. **Inner ring of secondary actions in radial v2?** + - Could add less-common actions in inner ring + - Recommend: stay single-ring for v1, evaluate need later + +5. **What does "Drop pin" persistence look like?** + - Session only (lost on refresh) + - localStorage (persists locally) + - Auth-only saved (sync across devices) + - Recommend: session-only for v1, localStorage for v2 + +6. **Radial on map click during active input?** + - Option A: No radial, click fills input directly + - Option B: Radial appears, "Use this location" wedge fills input + - Recommend: Option A (direct fill) for simplicity + +--- + +## Appendix A: Current Code References + +| File | Lines | Relevance | +|------|-------|-----------| +| `store.js` | 72-86 | `startDirections()` logic to replace | +| `store.js` | 16-34 | `stops[]` management to preserve | +| `SearchBar.jsx` | 140-170 | `pendingDestination` logic to remove | +| `PlaceDetail.jsx` | 574-579 | `handleDirections()` to rewrite | +| `App.jsx` | 31-66 | Route fetch effect to preserve | +| `api.js` | 29-56 | `requestRoute()` unchanged | + +--- + +## Appendix B: Radial Menu SVG Structure + +```svg + + + + + + + + + + + 43.6166 + -116.2008 + Loading... + + + + + + + +``` + +--- + +*Document created 2026-04-26. Implementation to follow in dedicated session.* From 07e6d0460bb18ca94dea66dd4bd37c8c5bb28aa7 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 05:03:03 +0000 Subject: [PATCH 26/72] docs: remove design doc (relocated to matt/refactored-recon) NAVI-DIRECTIONS-REDESIGN.md belongs in the design-docs repo (matt/refactored-recon) alongside PROJECT-BIBLE.md, AUTH-PUBLIC-FRONTEND.md, and other design artifacts. Code repo holds code only. --- docs/NAVI-DIRECTIONS-REDESIGN.md | 579 ------------------------------- 1 file changed, 579 deletions(-) delete mode 100644 docs/NAVI-DIRECTIONS-REDESIGN.md diff --git a/docs/NAVI-DIRECTIONS-REDESIGN.md b/docs/NAVI-DIRECTIONS-REDESIGN.md deleted file mode 100644 index 705201f..0000000 --- a/docs/NAVI-DIRECTIONS-REDESIGN.md +++ /dev/null @@ -1,579 +0,0 @@ -# Navi Directions UX Redesign - -**Status:** Draft -**Author:** Claude + Matt -**Date:** 2026-04-26 -**Implementation:** Deferred to dedicated session - ---- - -## 1. Current State - -### Components - -| Component | File | Role | -|-----------|------|------| -| SearchBar | `SearchBar.jsx` | Overloaded: search, add stop, set origin (hidden modes) | -| StopList | `StopList.jsx` | Drag-drop reordering of stops | -| GpsOriginItem | `GpsOriginItem.jsx` | "Your location" row when GPS granted | -| StopItem | `StopItem.jsx` | Individual stop with delete button | -| ModeSelector | `ModeSelector.jsx` | auto/pedestrian/bicycle toggle | -| ManeuverList | `ManeuverList.jsx` | Turn-by-turn directions display | -| PlaceDetail | `PlaceDetail.jsx` | "Directions" button for selected place | - -### State Model - -```javascript -stops: [] // Array of {id, lat, lon, name, source, matchCode} -gpsOrigin: true // Use GPS as origin when available -pendingDestination: null // Place waiting for origin (GPS-denied flow) -route: null // Valhalla trip response -routeLoading: false -routeError: null -``` - -### Failure Modes - -1. **No visible from/to inputs** — Users cannot see or directly edit origin/destination -2. **SearchBar hidden mode-switching** — Three different behaviors based on invisible state: - - Normal: opens place detail - - With `pendingDestination`: first result becomes origin - - After adding stops: unclear which role next selection plays -3. **GPS-denied flow uses ephemeral toast** — "Set a starting point" disappears, no persistent UI guidance -4. **No swap button** — Cannot reverse route direction -5. **No map context menu** — Right-click/long-press does nothing -6. **No waypoint addition UI** — Only drag-drop reordering, no insert-between -7. **Place panel "Directions" silently sets up route** — Based on hidden state, no confirmation - ---- - -## 2. Design Principles - -1. **Direct manipulation over hidden modes** — Every action should have visible UI -2. **Two visible inputs always** — When in directions mode, From and To fields are always visible -3. **Spatial interactions over linear** — Radial menu for map interactions, not dropdowns -4. **Same gesture model everywhere** — Right-click (desktop) = long-press (mobile) -5. **Preserve existing state model** — `stops[]` array stays, just better UI on top - ---- - -## 3. Visual Mockup — Directions Panel - -``` -┌─────────────────────────────────────┐ -│ DIRECTIONS │ -├─────────────────────────────────────┤ -│ │ -│ From: [📍 Your location ][×] │ -│ ──────────────────────── │ -│ [⇅] │ ← Swap button -│ ──────────────────────── │ -│ To: [Coffee shop on Main St][×] │ -│ │ -│ [+ Add stop] │ -│ │ -├─────────────────────────────────────┤ -│ [🚗 Auto] [🚶 Walk] [🚲 Bike] │ -├─────────────────────────────────────┤ -│ ┌─────────────────────────────┐ │ -│ │ 12 min · 4.2 mi │ │ -│ │ via W Main St │ │ -│ └─────────────────────────────┘ │ -│ │ -│ ▼ Turn-by-turn (expandable) │ -│ → Head north on Oak Ave │ -│ ↱ Turn right onto Main St │ -│ ◉ Arrive at destination │ -│ │ -└─────────────────────────────────────┘ -``` - -### Input States - -**From field:** -- GPS granted: Shows "📍 Your location" pill with clear button -- GPS denied/cleared: Empty, placeholder "Starting point..." -- Filled: Shows place name with clear button - -**To field:** -- Empty: Placeholder "Destination..." -- Filled: Shows place name with clear button - -**Active input:** -- Blue border highlight -- Search dropdown appears on typing -- Map click populates this field - ---- - -## 4. Visual Mockup — Radial Map Menu - -``` - Drop pin - 🔴 - ╱ ╲ - ╱ ╲ - Directions ╱ ╲ Directions - from here 🟢──────────────🔵 to here - │ 43.6166 │ - │ -116.2008 │ - │ [loading…] │ ← Center disc with coords/label - Add as 🟡──────────────🟣 Save place - stop ╲ ╱ - ╲ ╱ - ╲ ╱ - 🟠 - What's here -``` - -### Wedge Layout (60° each) - -| Position | Action | Icon | Color | -|----------|--------|------|-------| -| Top | Drop pin | Pin | Red | -| Top-right | Directions to here | Arrow-in | Blue | -| Bottom-right | Save place | Star | Purple | -| Bottom | What's here | Info | Orange | -| Bottom-left | Add as stop | Plus | Yellow | -| Top-left | Directions from here | Arrow-out | Green | - -### Behavior - -- **Trigger:** Right-click (desktop) or long-press 400-500ms (mobile) -- **Center disc:** ~40px diameter, shows coordinates immediately, reverse-geocoded label async -- **Wedge highlight:** On hover (desktop) or drag-over (mobile) -- **Commit:** Release on wedge (mobile) or click wedge (desktop) -- **Cancel:** Release outside, Escape key, tap elsewhere - ---- - -## 5. Component Breakdown - -### DirectionsPanel - -Replaces current Panel directions mode. - -``` -Props: none (reads from store) -State: none (all in global store) -Children: - - LocationInput (from) - - SwapButton - - LocationInput (to) - - WaypointList (if stops.length > 2) - - AddStopButton - - ModeSelector - - RouteSummary - - ManeuverList (collapsible) -``` - -### LocationInput - -Reusable component for from, to, and waypoint inputs. - -``` -Props: - - slot: 'from' | 'to' | `waypoint:${index}` - - value: { lat, lon, name, source } | null - - placeholder: string - - showGpsPill: boolean - - onClear: () => void - -Features: - - Search-as-you-type (Photon geocoder) - - GPS pill state with clear button - - Active-input visual state (blue border) - - Reverse-geocoded labels for coord-only entries - - Dropdown for search results -``` - -### SwapButton - -Simple button between From and To inputs. - -``` -Props: none -Action: Swaps stops[0] and stops[stops.length - 1] -Visual: ⇅ icon, hover highlight -``` - -### WaypointList - -Refactored from existing StopList, preserves drag-drop. - -``` -Props: none (reads stops from store) -Features: - - Only renders stops[1..n-1] (middle waypoints) - - Drag-drop reordering via @dnd-kit - - Delete button per waypoint - - "Via" label prefix -``` - -### RadialMenu - -New general-purpose component. - -``` -Props: - - open: boolean - - x: number (screen X) - - y: number (screen Y) - - lat: number - - lon: number - - wedges: Array<{ id, icon, label, action: (lat, lon) => void }> - - onClose: () => void - -Features: - - Configurable wedge count and actions - - Async center label (reverse geocode) - - Keyboard dismissal (Escape) - - Touch-friendly sizing on mobile - - Fade in/out animations -``` - ---- - -## 6. State Model - -### Existing (unchanged) - -```javascript -stops: [] // Origin = stops[0], destination = stops[last], waypoints in between -gpsOrigin: boolean // Whether GPS should be used as origin -route: object | null // Valhalla trip response -routeLoading: boolean -routeError: string | null -``` - -### New - -```javascript -activeInputSlot: 'from' | 'to' | `waypoint:${N}` | null -// Which input is currently focused/active for map-click-to-fill - -radialMenuState: { - open: boolean, - x: number, // Screen coordinates - y: number, - lat: number, // Map coordinates - lon: number, - label: string | null // Reverse-geocoded, async populated -} -``` - -### Removed - -```javascript -pendingDestination: null // No longer needed — explicit inputs replace hidden state -``` - ---- - -## 7. Interaction Flows - -### Open directions tab fresh - -1. From field shows GPS pill if `geoPermission === 'granted'`, else empty -2. To field is empty, focused by default -3. No route calculated yet - -### Click "Directions" from place panel - -1. Directions panel opens (if not already) -2. To field auto-fills with selected place -3. From field: - - If GPS granted: shows GPS pill - - Else: empty, receives focus -4. Route calculates if both filled - -### Type in input - -1. Input receives focus, becomes `activeInputSlot` -2. Photon search fires on debounce (300ms) -3. Dropdown shows results -4. Select result → populates input, clears dropdown -5. Route recalculates - -### Right-click / long-press on map - -1. Radial menu appears centered on click point -2. Center disc shows coordinates immediately -3. Reverse geocode fires async, populates label -4. User hovers/drags to wedge: - -| Wedge | Action | -|-------|--------| -| **Directions from here** | Opens directions if closed, fills From with coords, focuses To | -| **Directions to here** | Opens directions if closed, fills To with coords, focuses From if empty | -| **Add as stop** | Inserts new stop before destination | -| **What's here** | Reverse geocode → opens place panel | -| **Drop pin** | Creates transient marker (session-only) | -| **Save place** | Opens save dialog (auth required) | - -5. Release outside or Escape → dismisses without action - -### Click map with active input - -When directions panel is open and an input is focused (`activeInputSlot !== null`): - -1. Single click on map -2. Clicked coordinates populate the active input -3. Reverse geocode fires to get display name -4. Input loses focus, `activeInputSlot = null` -5. Route recalculates - -### Swap button - -1. Click swap button -2. `stops[0]` and `stops[stops.length - 1]` swap positions -3. If GPS was origin, GPS pill moves to destination (unusual but allowed) -4. Route recalculates - ---- - -## 8. Place Panel "Directions" Handoff - -**Current behavior:** Calls `startDirections(place)` with complex conditional logic, may show toast. - -**New behavior:** - -```javascript -handleDirections = () => { - // Always open directions panel - setActiveTab('directions') - - // Fill destination - setStop(stops.length, { // Appends or replaces last - lat: place.lat, - lon: place.lon, - name: place.name, - source: place.source - }) - - // Handle origin - if (geoPermission === 'granted') { - setGpsOrigin(true) // GPS pill in From - } else if (stops.length === 0) { - setActiveInputSlot('from') // Focus From input - } - - // Close place panel - clearSelectedPlace() -} -``` - -**No toast needed** — UI is self-explanatory with visible From/To fields. - ---- - -## 9. Radial Menu Specifics - -### Trigger - -| Platform | Gesture | Duration | -|----------|---------|----------| -| Desktop | Right-click | Instant | -| Mobile | Long-press | 400-500ms | - -### Conflict Avoidance - -Long-press must NOT fire during active pan: -- Track touch start position -- If touch moves >5px before timer fires, cancel long-press -- Pan gesture takes priority - -### Geometry - -``` -Outer radius: ~80px from center -Inner radius: ~40px (center disc) -Wedge angle: 60° each (6 wedges) -Gap between wedges: 2px -``` - -### Visual States - -| Element | Default | Hover/Active | Selected | -|---------|---------|--------------|----------| -| Wedge background | `rgba(0,0,0,0.7)` | `rgba(0,0,0,0.85)` | Wedge accent color | -| Wedge icon | White, 50% opacity | White, 100% opacity | White | -| Wedge label | Hidden | Shown (tooltip) | Shown | -| Center disc | Dark, coords visible | — | — | - -### Animation - -- **Fade in:** <100ms ease-out -- **Fade out:** <150ms ease-in -- **Wedge hover:** Instant background change -- **Center label:** Fade in when reverse geocode completes - ---- - -## 10. Mobile Considerations - -### Panel Layout - -**Decision needed:** Bottom sheet vs side panel - -| Option | Pros | Cons | -|--------|------|------| -| Bottom sheet | Familiar (Google Maps), thumb-friendly | Complex sheet state management | -| Side panel | Consistent with desktop, more vertical space | Covers more map, less thumb-friendly | - -**Recommendation:** Bottom sheet with three states: collapsed (summary only), half (inputs + summary), full (inputs + maneuvers). - -### Long-press Timing - -**Decision needed:** Exact timing - -| Duration | Feel | -|----------|------| -| 400ms | Snappy, risk of accidental trigger | -| 450ms | Balanced | -| 500ms | Deliberate, slightly sluggish | - -**Recommendation:** Start with 450ms, tune based on testing. - -### Radial Sizing - -Mobile radial should be larger for finger touch: -- Outer radius: ~100px (vs 80px desktop) -- Center disc: ~50px (vs 40px desktop) -- Minimum wedge touch target: 48px - -### Compact Directions Mode - -When route is calculated and user is navigating: -1. Collapse From/To inputs to single-line summary -2. Show prominent next maneuver -3. Expand on tap to edit inputs -4. Maneuver list scrollable - -### Keyboard Awareness - -- Detect keyboard open via `visualViewport` API -- Shift panel content up to keep active input visible -- Don't let keyboard overlap input being typed in - ---- - -## 11. Place Panel Restructure - -**Out of scope for this document.** - -Separate session will address: -- Cleaner info card layout (Google Maps style) -- Better visual hierarchy -- Action button placement -- No new data sources, just CSS/JSX polish - ---- - -## 12. Out of Scope (Future Phases) - -| Feature | Notes | -|---------|-------| -| Saved routes | Auth required, dedicated work | -| Route alternatives | Valhalla supports, surface in v2 | -| Avoid tolls/highways | Valhalla supports via costing options | -| Real-time rerouting | Requires location tracking loop | -| Multi-modal | Drive + transit + walk hybrids | -| Traffic-aware routing | Requires traffic data source | -| Offline routing | Requires local Valhalla instance | - ---- - -## 13. Implementation Sequence - -| Phase | Task | Depends On | -|-------|------|------------| -| **a** | Build RadialMenu component (general-purpose, no actions wired) | — | -| **b** | Wire "What's here" action to validate trigger + reverse-geocode flow | a | -| **c** | Refactor SearchBar to single-mode (search-only, remove pending* logic) | — | -| **d** | Build LocationInput component (reusable) | c | -| **e** | Build DirectionsPanel layout with two LocationInputs | d | -| **f** | Wire remaining radial actions to directions flow | b, e | -| **g** | Wire place panel "Directions" handoff to new flow | e | -| **h** | Add SwapButton | e | -| **i** | Add map-click-to-fill-active-input | e | -| **j** | Mobile polish (long-press timing, bottom sheet, keyboard) | a-i | - -**Estimated phases:** 10 discrete tasks, can be done incrementally. - ---- - -## 14. Open Questions - -### For Matt to decide: - -1. **Bottom sheet vs side panel on mobile?** - - Bottom sheet recommended but adds complexity - -2. **Long-press timing exactly?** - - 400ms / 450ms / 500ms - - Recommend 450ms - -3. **Should "Save place" wedge be visible to guests or hidden?** - - Visible with login prompt = more discoverable - - Hidden = cleaner for guests - - Recommend: visible, shows "Sign in to save" toast - -4. **Inner ring of secondary actions in radial v2?** - - Could add less-common actions in inner ring - - Recommend: stay single-ring for v1, evaluate need later - -5. **What does "Drop pin" persistence look like?** - - Session only (lost on refresh) - - localStorage (persists locally) - - Auth-only saved (sync across devices) - - Recommend: session-only for v1, localStorage for v2 - -6. **Radial on map click during active input?** - - Option A: No radial, click fills input directly - - Option B: Radial appears, "Use this location" wedge fills input - - Recommend: Option A (direct fill) for simplicity - ---- - -## Appendix A: Current Code References - -| File | Lines | Relevance | -|------|-------|-----------| -| `store.js` | 72-86 | `startDirections()` logic to replace | -| `store.js` | 16-34 | `stops[]` management to preserve | -| `SearchBar.jsx` | 140-170 | `pendingDestination` logic to remove | -| `PlaceDetail.jsx` | 574-579 | `handleDirections()` to rewrite | -| `App.jsx` | 31-66 | Route fetch effect to preserve | -| `api.js` | 29-56 | `requestRoute()` unchanged | - ---- - -## Appendix B: Radial Menu SVG Structure - -```svg - - - - - - - - - - - 43.6166 - -116.2008 - Loading... - - - - - - - -``` - ---- - -*Document created 2026-04-26. Implementation to follow in dedicated session.* From e9c9cee4f3cb2a63acfaf8f04ffccfabe2472850 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 08:15:16 +0000 Subject: [PATCH 27/72] feat: Add wikidata lookup endpoint for place enrichment - Add get_place_by_wikidata() to place_detail.py - Queries Wikidata API for entity details (name, description, coords) - Extracts population, instance_of, OSM relation ID, Wikipedia link - Add /api/place/wikidata/ route to api.py Supports Navi basemap label enrichment when OSM details unavailable. Co-Authored-By: Claude Opus 4.5 --- lib/api.py | 9 +++- lib/place_detail.py | 128 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/lib/api.py b/lib/api.py index c9105ad..732b5de 100644 --- a/lib/api.py +++ b/lib/api.py @@ -25,7 +25,7 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB from .deployment_config import get_deployment_config -from .place_detail import get_place_detail +from .place_detail import get_place_detail, get_place_by_wikidata from .landclass import lookup_landclass, format_summary logger = setup_logging('recon.api') @@ -1235,6 +1235,13 @@ def api_place_detail(osm_type, osm_id): return jsonify(result), status +@app.route("/api/place/wikidata/") +def api_place_wikidata(wikidata_id): + """Fetch place details from Wikidata entity.""" + result, status = get_place_by_wikidata(wikidata_id) + return jsonify(result), status + + @app.route('/api/landclass') def api_landclass(): diff --git a/lib/place_detail.py b/lib/place_detail.py index efa805c..51931c5 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -661,3 +661,131 @@ def get_place_detail(osm_type, osm_id): # Not found in either source (no errors, just empty results) return {'error': f'{osm_type}/{osm_id} not found'}, 404 + + +# ── Wikidata lookup ───────────────────────────────────────────────────── + +WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php" + +def get_place_by_wikidata(wikidata_id): + """ + Fetch place details from Wikidata entity. + + Returns (dict, status_code): + - (data, 200) on success + - (error_dict, 404) if entity not found + - (error_dict, 400) if invalid ID format + - (error_dict, 502) on API error + """ + # Validate wikidata ID format (Q followed by digits) + wikidata_id = wikidata_id.upper().strip() + if not wikidata_id.startswith("Q") or not wikidata_id[1:].isdigit(): + return {"error": f"Invalid wikidata ID: {wikidata_id}. Must be Q followed by digits."}, 400 + + try: + resp = http_requests.get(WIKIDATA_API_URL, params={ + "action": "wbgetentities", + "ids": wikidata_id, + "format": "json", + "languages": "en", + "props": "labels|descriptions|claims|sitelinks", + }, timeout=10, headers={"User-Agent": "Navi/1.0 (forge.echo6.co/matt/recon)"}) + + if resp.status_code != 200: + logger.warning(f"Wikidata API error for {wikidata_id}: HTTP {resp.status_code}") + return {"error": "Wikidata API error"}, 502 + + data = resp.json() + entities = data.get("entities", {}) + entity = entities.get(wikidata_id) + + if not entity or entity.get("missing"): + return {"error": f"Wikidata entity {wikidata_id} not found"}, 404 + + # Extract basic info + labels = entity.get("labels", {}) + descriptions = entity.get("descriptions", {}) + claims = entity.get("claims", {}) + + name = labels.get("en", {}).get("value", wikidata_id) + description = descriptions.get("en", {}).get("value", "") + + # Extract coordinates from P625 (coordinate location) + lat, lon = None, None + if "P625" in claims: + coord_claim = claims["P625"] + if coord_claim and coord_claim[0].get("mainsnak", {}).get("datavalue"): + coord_val = coord_claim[0]["mainsnak"]["datavalue"]["value"] + lat = coord_val.get("latitude") + lon = coord_val.get("longitude") + + # Extract population from P1082 + population = None + if "P1082" in claims: + pop_claims = claims["P1082"] + if pop_claims: + # Get the most recent population value + for claim in pop_claims: + if claim.get("mainsnak", {}).get("datavalue"): + try: + population = int(claim["mainsnak"]["datavalue"]["value"]["amount"].lstrip("+")) + break + except (KeyError, ValueError): + pass + + # Extract country from P17 + country = None + if "P17" in claims: + country_claims = claims["P17"] + if country_claims and country_claims[0].get("mainsnak", {}).get("datavalue"): + country_id = country_claims[0]["mainsnak"]["datavalue"]["value"]["id"] + # Could resolve this to a name, but for now just store the ID + + # Extract instance of (P31) for type classification + instance_of = [] + if "P31" in claims: + for claim in claims["P31"]: + if claim.get("mainsnak", {}).get("datavalue"): + instance_of.append(claim["mainsnak"]["datavalue"]["value"]["id"]) + + # Extract OSM relation ID if available (P402) + osm_relation_id = None + if "P402" in claims: + osm_claims = claims["P402"] + if osm_claims and osm_claims[0].get("mainsnak", {}).get("datavalue"): + osm_relation_id = osm_claims[0]["mainsnak"]["datavalue"]["value"] + + # Extract Wikipedia sitelink + sitelinks = entity.get("sitelinks", {}) + wikipedia = None + if "enwiki" in sitelinks: + wiki_title = sitelinks["enwiki"].get("title", "") + if wiki_title: + wikipedia = f"en:{wiki_title}" + + result = { + "wikidata_id": wikidata_id, + "name": name, + "description": description, + "centroid": {"lat": lat, "lon": lon} if lat and lon else None, + "population": population, + "instance_of": instance_of, + "osm_relation_id": osm_relation_id, + "source": "wikidata", + "extratags": { + "wikidata": wikidata_id, + }, + } + + if wikipedia: + result["extratags"]["wikipedia"] = wikipedia + + # If we have an OSM relation ID, we could optionally fetch more details + # from get_place_detail, but that would add latency + + logger.debug(f"Wikidata hit: {wikidata_id} -> {name}") + return result, 200 + + except Exception as e: + logger.warning(f"Wikidata error for {wikidata_id}: {e}") + return {"error": "Wikidata lookup failed"}, 502 From 2387a96a1e0af55eaaa12c17da83ff4268ac6b7f Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 08:26:47 +0000 Subject: [PATCH 28/72] feat(place): add boundary polygon to place detail response Request polygon_geojson=1 from Nominatim to include admin boundary polygons in place detail responses. Also fetch boundary via OSM relation ID for wikidata lookups. --- lib/place_detail.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/place_detail.py b/lib/place_detail.py index 51931c5..e85ee54 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -460,6 +460,12 @@ def _parse_nominatim(data): # Filter names: only include extra name tags, not the bare "name" extra_names = {k: v for k, v in names.items() if k != 'name'} if names else {} + # Boundary geometry (polygon/multipolygon from Nominatim) + boundary = None + geom = data.get('geometry') + if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): + boundary = geom + return { 'osm_type': osm_type, 'osm_id': osm_id, @@ -472,6 +478,7 @@ def _parse_nominatim(data): 'extratags': extratags, 'names': extra_names if extra_names else None, 'source': 'nominatim_local', + 'boundary': boundary, } @@ -600,6 +607,7 @@ def get_place_detail(osm_type, osm_id): 'addressdetails': 1, 'hierarchy': 0, 'keywords': 0, + 'polygon_geojson': 1, }, timeout=5) if resp.status_code == 200: @@ -780,8 +788,26 @@ def get_place_by_wikidata(wikidata_id): if wikipedia: result["extratags"]["wikipedia"] = wikipedia - # If we have an OSM relation ID, we could optionally fetch more details - # from get_place_detail, but that would add latency + # Fetch boundary polygon from Nominatim if we have an OSM relation ID + boundary = None + if osm_relation_id: + try: + nom_resp = http_requests.get(NOMINATIM_URL, params={ + 'osmtype': 'R', + 'osmid': osm_relation_id, + 'format': 'json', + 'polygon_geojson': 1, + }, timeout=5) + if nom_resp.status_code == 200: + nom_data = nom_resp.json() + geom = nom_data.get('geometry') + if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): + boundary = geom + logger.debug(f"Wikidata boundary hit for {wikidata_id}") + except Exception as e: + logger.debug(f"Wikidata boundary fetch failed: {e}") + + result["boundary"] = boundary logger.debug(f"Wikidata hit: {wikidata_id} -> {name}") return result, 200 From b5de9c6e39a3c6b1a63c059c7c7cafc5ac7b43ee Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 26 Apr 2026 20:59:17 +0000 Subject: [PATCH 29/72] fix(geocode): apply viewport bias to Netsyms address results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /api/geocode endpoint blended Photon and Netsyms results, but only Photon respected viewport bias from prior work. Address queries to Netsyms/AddressDB returned globally-sorted matches regardless of where the user was looking — searching '214 North St' from Idaho returned Illinois results. Now fetches up to 200 Netsyms results when viewport lat/lon provided, sorts by squared distance from viewport center, then returns top N. Falls back to default ordering when viewport absent. Photon path unchanged. --- lib/geocode.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/geocode.py b/lib/geocode.py index 312cca7..aabd37e 100644 --- a/lib/geocode.py +++ b/lib/geocode.py @@ -264,7 +264,7 @@ def _classify_and_parse(query): # STEP 2: RETRIEVAL # ═══════════════════════════════════════════════════════════════════ -def _retrieve_netsyms(parsed, limit=10): +def _retrieve_netsyms(parsed, limit=10, lat=None, lon=None): """Query Netsyms for structured address lookup. Returns list of candidate dicts.""" try: from . import netsyms @@ -278,12 +278,15 @@ def _retrieve_netsyms(parsed, limit=10): state = parsed.get('state', '') zipcode = parsed.get('zipcode', '') + # When viewport provided, fetch more results to sort from + fetch_limit = 200 if (lat is not None and lon is not None) else limit + if number and street: rows = netsyms.lookup_by_street( - number, street, city=city, state=state, zipcode=zipcode, limit=limit + number, street, city=city, state=state, zipcode=zipcode, limit=fetch_limit ) elif zipcode: - rows = netsyms.lookup_by_zipcode(zipcode, limit=limit) + rows = netsyms.lookup_by_zipcode(zipcode, limit=fetch_limit) else: return [] @@ -305,6 +308,10 @@ def _retrieve_netsyms(parsed, limit=10): '_city': row.get('city', ''), '_state': row.get('state', ''), }) + # Sort by viewport distance if lat/lon provided, then limit + if lat is not None and lon is not None and results: + results.sort(key=lambda r: (r["lat"] - lat)**2 + (r["lon"] - lon)**2) + results = results[:limit] return results @@ -728,7 +735,7 @@ def geocode(query, limit=10, lat=None, lon=None, zoom=None): if intent == 'ADDRESS': # Parallel: Netsyms (structured) + Photon (freetext with expanded query) - netsyms_results = _retrieve_netsyms(parsed, limit=limit) + netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) photon_results = _retrieve_photon_freetext( parsed.get('expanded_query', q), limit=limit, lat=lat, lon=lon, zoom=zoom ) @@ -737,7 +744,7 @@ def geocode(query, limit=10, lat=None, lon=None, zoom=None): candidates = netsyms_results + photon_results + photon_struct elif intent == 'POSTCODE': - netsyms_results = _retrieve_netsyms(parsed, limit=limit) + netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) photon_results = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) candidates = netsyms_results + photon_results From 121eb45b44a7c588f23f72e21535e94e0f90346a Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 27 Apr 2026 01:26:44 +0000 Subject: [PATCH 30/72] feat: add /api/auth/whoami endpoint for frontend auth state Returns {authenticated: bool, username: string|null} based on X-Authentik-Username header presence. Used by Navi frontend to detect auth state without triggering SSO redirect. --- lib/api.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/api.py b/lib/api.py index 732b5de..8a1f383 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2704,3 +2704,21 @@ def api_metrics_history(): return jsonify({'type': metric_type, 'hours': hours, 'points': points}) except Exception as e: return jsonify({'type': metric_type, 'hours': hours, 'points': [], 'error': str(e)}) + + +# ── Auth state endpoint ───────────────────────────────────────────────────── +# Returns current auth state for frontend consumption. +# This endpoint must be behind Caddy forward_auth to receive X-Authentik-* headers. +@app.route('/api/auth/whoami') +def api_auth_whoami(): + """Return auth state for frontend. Behind forward_auth, so headers are present when authenticated.""" + username = request.headers.get('X-Authentik-Username') + if username: + return jsonify({ + 'authenticated': True, + 'username': username, + }) + return jsonify({ + 'authenticated': False, + 'username': None, + }) From 991826b4f13c7f3b11be79833de730262596c142 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 27 Apr 2026 02:51:14 +0000 Subject: [PATCH 31/72] config: disable 10ft contour test layer (causes green wall on flat terrain) --- config/profiles/home.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 7acc475..5269812 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -41,7 +41,7 @@ features: has_public_lands_layer: true has_contours: true has_contours_test: true - has_contours_test_10ft: true + has_contours_test_10ft: false has_address_book_write: false has_overture_enrichment: true has_google_places_enrichment: true From b741e217f6257cc7f5424af1ff7d1a662e39a197 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 7 May 2026 01:32:25 +0000 Subject: [PATCH 32/72] =?UTF-8?q?fix:=20ZIM=20table=20extraction=20?= =?UTF-8?q?=E2=80=94=20pipe-delimited=20cells=20instead=20of=20concatenati?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-processes HTML tree before lxml .text_content() to prevent element concatenation: - cells joined with ' | ' delimiter, rows with newlines -
tags produce newlines -
  • items get '- ' prefix and newline separation -
    /
    definition list items get newline separation Fixes ~868 mangled Qdrant points where table content was jammed together (e.g. 'Freq51Primary1A==' instead of 'Freq51 | Primary | 1A=='). Co-Authored-By: Claude Opus 4.6 --- lib/processors/zim_processor.py | 66 +++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/lib/processors/zim_processor.py b/lib/processors/zim_processor.py index b258408..6f5c887 100644 --- a/lib/processors/zim_processor.py +++ b/lib/processors/zim_processor.py @@ -77,10 +77,73 @@ def _text_hash(text): return hashlib.md5(text.encode('utf-8')).hexdigest() +def _flatten_table(table_el): + """Convert a
  • element to pipe-delimited text. + + Each becomes a row with cells joined by ' | '. + Returns the formatted table as a string with blank lines around it. + """ + rows = [] + for tr in table_el.iter('tr'): + cells = [] + for cell in tr: + if cell.tag in ('td', 'th'): + cell_text = (cell.text_content() or '').strip() + # Collapse internal whitespace in each cell + cell_text = re.sub(r'\s+', ' ', cell_text) + if cell_text: + cells.append(cell_text) + if cells: + rows.append(' | '.join(cells)) + if not rows: + return '' + return '\n'.join(rows) + + +def _preprocess_tree(doc): + """Pre-process HTML tree to add delimiters before text_content() flattens it. + + Handles:
    ,
    ,
  • ,
    ,
    -- elements that lxml's + text_content() would concatenate without any separators. + """ + from lxml import etree + + # 1. Replace
  • elements with their pipe-delimited text + for table in list(doc.iter('table')): + formatted = _flatten_table(table) + if formatted: + replacement = etree.Element('div') + replacement.text = '\n\n' + formatted + '\n\n' + parent = table.getparent() + if parent is not None: + parent.replace(table, replacement) + else: + table.drop_tree() + + # 2.
    -> inject newline + for br in list(doc.iter('br')): + br.tail = '\n' + (br.tail or '') + + # 3.
  • -> inject newline + "- " prefix + for li in list(doc.iter('li')): + li.text = '- ' + (li.text or '') + li.tail = '\n' + (li.tail or '') + + # 4.
    -> inject newline before + for dt in list(doc.iter('dt')): + dt.tail = '\n' + (dt.tail or '') + + # 5.
    -> inject newline + indent + for dd in list(doc.iter('dd')): + dd.text = ' ' + (dd.text or '') + dd.tail = '\n' + (dd.tail or '') + + def _html_to_text(html_bytes): """Convert HTML bytes to clean text via lxml. Strips nav, footer, script, style elements. Decodes entities. + Pre-processes tables, lists, and line breaks for proper delimiters. Normalizes whitespace. """ try: @@ -93,6 +156,9 @@ def _html_to_text(html_bytes): for el in doc.iter(tag): el.drop_tree() + # Pre-process tree: tables -> pipe-delimited, br -> newlines, li -> dashes + _preprocess_tree(doc) + # Extract text text = doc.text_content() From 83a21854c39feeb16425f35da38fc3f3c1bf3091 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 7 May 2026 01:36:23 +0000 Subject: [PATCH 33/72] =?UTF-8?q?fix:=20PDF=20extraction=20quality=20?= =?UTF-8?q?=E2=80=94=20word-boundary=20checks=20and=20layout=20mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds _text_quality_ok() gate that replaces the bare 50-char length check at each stage of the extraction fallback chain. Checks: - Word-boundary ratio (≥60% of tokens must be real words) - Concatenation ratio (lc→UC transitions must be <10% of word count) When PyPDF2 default extraction fails quality check, retries with space_width=100 for tighter word-boundary detection. This fixes Haynes/workshop manuals where tight kerning produces concatenated words like 'byMike' and 'oftheGuild'. Also adds -layout flag to pdftotext subprocess calls for better spatial awareness in the poppler fallback stage. Note: PyPDF2 3.0.1 does not support layout=True parameter. The space_width parameter serves the same purpose. Co-Authored-By: Claude Opus 4.6 --- lib/extractor.py | 71 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/lib/extractor.py b/lib/extractor.py index 13159c9..bc236ab 100644 --- a/lib/extractor.py +++ b/lib/extractor.py @@ -21,6 +21,7 @@ Config: processing.extract_workers, processing.max_pdf_size_mb, processing.extract_timeout, processing.page_timeout """ import base64 +import re import json import os import random @@ -99,6 +100,40 @@ def _is_transient(error_str): return any(sig in s for sig in transient_signals) +def _text_quality_ok(text, min_length=50): + """Check if extracted text meets quality thresholds. + + Beyond the basic length check, validates: + - Word-boundary ratio: at least 60% of tokens should be real words (2+ alpha chars) + - Concatenation ratio: lowercase-immediately-followed-by-uppercase shouldn't exceed 10% of word count + + Returns True if text passes all checks. + """ + text = text.strip() + if len(text) < min_length: + return False + + words = text.split() + if not words: + return False + + # Word-like ratio: tokens with 2+ alphabetic characters + word_like = sum(1 for w in words if len(re.findall(r'[a-zA-Z]', w)) >= 2) + word_ratio = word_like / len(words) + if word_ratio < 0.60: + return False + + # Concatenation detector: lowercase immediately followed by uppercase + # Filter out common camelCase patterns in code (short tokens) + concat_hits = len(re.findall(r'[a-z][A-Z]', text)) + concat_ratio = concat_hits / len(words) if words else 0 + if concat_ratio > 0.10: + return False + + return True + + + def _render_page_to_png(pdf_path, page_num_1indexed, dpi=200, timeout=30): """Render a single PDF page to PNG bytes using pdftoppm. @@ -224,7 +259,7 @@ def _extract_page_without_reader(pdf_path, page_num_0indexed, page_timeout=30): # Method 1: pdftotext (poppler) try: result = subprocess.run( - ['pdftotext', '-f', str(page_num_0indexed + 1), + ['pdftotext', '-layout', '-f', str(page_num_0indexed + 1), '-l', str(page_num_0indexed + 1), pdf_path, '-'], capture_output=True, text=True, timeout=page_timeout ) @@ -233,7 +268,7 @@ def _extract_page_without_reader(pdf_path, page_num_0indexed, page_timeout=30): except Exception: pass - if len(text.strip()) >= 50: + if _text_quality_ok(text): return text, 'pdftotext' # Method 2: pdftoppm + Tesseract OCR @@ -258,7 +293,7 @@ def _extract_page_without_reader(pdf_path, page_num_0indexed, page_timeout=30): except Exception: pass - if len(text.strip()) >= 50: + if _text_quality_ok(text): return text, 'tesseract' # Method 3: Gemini Vision (last resort) @@ -276,8 +311,26 @@ def _extract_page_without_reader(pdf_path, page_num_0indexed, page_timeout=30): # ── Core extraction functions ── def _pypdf2_extract(reader, page_num): - """Extract text from a PyPDF2 page object. Runs inside a thread for timeout.""" - return reader.pages[page_num].extract_text() or '' + """Extract text from a PyPDF2 page object. Runs inside a thread for timeout. + + Tries default extraction first (space_width=200). If quality check fails, + retries with space_width=100 which better detects word boundaries in + tightly-kerned PDFs (common in Haynes/workshop manuals). + + Note: PyPDF2 3.0.1 does not support layout=True. The space_width parameter + controls word-boundary detection tolerance. Lower values = more aggressive + space insertion between characters. + """ + text = reader.pages[page_num].extract_text() or '' + if _text_quality_ok(text): + return text + + # Retry with tighter word-boundary detection + text_tight = reader.pages[page_num].extract_text(space_width=100.0) or '' + if len(text_tight.strip()) >= len(text.strip()): + return text_tight + + return text def extract_text_from_page(reader, page_num, pdf_path, page_timeout=30): @@ -302,13 +355,13 @@ def extract_text_from_page(reader, page_num, pdf_path, page_timeout=30): except Exception: text = '' - if len(text.strip()) >= 50: + if _text_quality_ok(text): return text, 'pypdf2' # Method 2: pdftotext via subprocess (inherently timeout-safe) try: result = subprocess.run( - ['pdftotext', '-f', str(page_num + 1), '-l', str(page_num + 1), pdf_path, '-'], + ['pdftotext', '-layout', '-f', str(page_num + 1), '-l', str(page_num + 1), pdf_path, '-'], capture_output=True, text=True, timeout=page_timeout ) if result.returncode == 0 and len(result.stdout.strip()) > len(text.strip()): @@ -316,7 +369,7 @@ def extract_text_from_page(reader, page_num, pdf_path, page_timeout=30): except Exception: pass - if len(text.strip()) >= 50: + if _text_quality_ok(text): return text, 'pdftotext' # Method 3: pdftoppm + Tesseract OCR @@ -340,7 +393,7 @@ def extract_text_from_page(reader, page_num, pdf_path, page_timeout=30): except Exception: pass - if len(text.strip()) >= 50: + if _text_quality_ok(text): return text, 'tesseract' # Method 4: Gemini Vision (last resort — costs API calls but handles scanned docs) From f2a0f81580a82bcb33bf5b72420d208f9282ba63 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 7 May 2026 23:43:56 +0000 Subject: [PATCH 34/72] =?UTF-8?q?feat(offroute):=20Phase=20O1=20foundation?= =?UTF-8?q?=20=E2=80=94=20PMTiles=20decoder,=20Tobler=20cost,=20MCP=20path?= =?UTF-8?q?finder=20prototype?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - dem.py: Terrarium-encoded PMTiles tile reader with LRU cache - Decodes WebP tiles from planet-dem.pmtiles - Stitches tiles into numpy elevation grids for arbitrary bboxes - Provides pixel-to-latlon coordinate conversion - cost.py: Tobler off-path hiking cost function - speed = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) km/h - Max slope cutoff: 40 degrees → impassable - Returns time-to-traverse (seconds/cell) as cost metric - prototype.py: Standalone validation on Idaho test bbox - 43km × 80km bbox (~17M cells at 14m resolution) - scikit-image MCP_Geometric Dijkstra pathfinder - Outputs GeoJSON LineString with path metadata - Validated: 61.6km path, 21.3 hours effort time Co-Authored-By: Claude Opus 4.5 --- lib/offroute/__init__.py | 1 + lib/offroute/cost.py | 94 +++++++++++++ lib/offroute/dem.py | 190 ++++++++++++++++++++++++++ lib/offroute/prototype.py | 274 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 559 insertions(+) create mode 100644 lib/offroute/__init__.py create mode 100644 lib/offroute/cost.py create mode 100644 lib/offroute/dem.py create mode 100755 lib/offroute/prototype.py diff --git a/lib/offroute/__init__.py b/lib/offroute/__init__.py new file mode 100644 index 0000000..b0536cd --- /dev/null +++ b/lib/offroute/__init__.py @@ -0,0 +1 @@ +"""OFFROUTE: Off-network effort-based routing module.""" diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py new file mode 100644 index 0000000..f460ab9 --- /dev/null +++ b/lib/offroute/cost.py @@ -0,0 +1,94 @@ +""" +Tobler off-path hiking cost function for OFFROUTE. + +Computes travel time cost based on terrain slope using Tobler's +hiking function with off-trail penalty. +""" +import math +import numpy as np +from typing import Tuple + +# Maximum passable slope in degrees +MAX_SLOPE_DEG = 40.0 + +# Tobler off-path parameters +TOBLER_BASE_SPEED = 6.0 +TOBLER_OFF_TRAIL_MULT = 0.6 + + +def tobler_speed(grade: float) -> float: + """ + Calculate hiking speed using Tobler's off-path function. + + speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) + + Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). + """ + return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) + + +def compute_cost_grid( + elevation: np.ndarray, + cell_size_m: float, + cell_size_lat_m: float = None, + cell_size_lon_m: float = None +) -> np.ndarray: + """ + Compute isotropic travel cost grid from elevation data. + + Each cell's cost represents the time (in seconds) to traverse that cell, + based on the average slope from neighboring cells. + """ + if cell_size_lat_m is None: + cell_size_lat_m = cell_size_m + if cell_size_lon_m is None: + cell_size_lon_m = cell_size_m + + rows, cols = elevation.shape + + # Compute gradients in both directions + dy = np.zeros_like(elevation) + dx = np.zeros_like(elevation) + + # Central differences for interior, forward/backward at edges + dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) + dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m + dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m + + dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) + dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m + dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m + + # Compute slope magnitude (grade = rise/run) + grade_magnitude = np.sqrt(dx**2 + dy**2) + + # Convert to slope angle in degrees + slope_deg = np.degrees(np.arctan(grade_magnitude)) + + # Compute speed for each cell using Tobler function + speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) + + # Convert speed to time cost (seconds to traverse one cell) + avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 + cost = avg_cell_size * 3.6 / speed_kmh + + # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity + cost[slope_deg > MAX_SLOPE_DEG] = np.inf + + # Handle NaN elevations (no data) + cost[np.isnan(elevation)] = np.inf + + return cost + + +if __name__ == "__main__": + print("Testing Tobler speed function:") + for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: + speed = tobler_speed(grade) + print(f" Grade {grade:+.2f}: {speed:.2f} km/h") + + print("\nTesting cost grid computation:") + elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 + cost = compute_cost_grid(elev, cell_size_m=30.0) + print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") + print(f" Cost range: {cost[~np.isinf(cost)].min():.1f} - {cost[~np.isinf(cost)].max():.1f} s") diff --git a/lib/offroute/dem.py b/lib/offroute/dem.py new file mode 100644 index 0000000..f715611 --- /dev/null +++ b/lib/offroute/dem.py @@ -0,0 +1,190 @@ +""" +DEM tile reader for OFFROUTE. + +Reads elevation tiles from planet-dem.pmtiles (Terrarium-encoded WebP), +decodes them into numpy arrays, and provides a stitched elevation grid +for a given bounding box. +""" +import math +from functools import lru_cache +from io import BytesIO +from pathlib import Path +from typing import Tuple, Optional + +import numpy as np +from PIL import Image +from pmtiles.reader import MmapSource, Reader as PMTilesReader + +# Default path to the planet DEM PMTiles file +DEFAULT_DEM_PATH = Path("/mnt/nas/nav/planet-dem.pmtiles") + +# Tile size in pixels (z12 tiles are 512x512 in this tileset) +TILE_SIZE = 512 + +# Zoom level to use for elevation data +ZOOM_LEVEL = 12 + + +def terrarium_decode(rgb_array: np.ndarray) -> np.ndarray: + """ + Decode Terrarium-encoded RGB values to elevation in meters. + + Formula: elevation = (R * 256 + G + B/256) - 32768 + """ + r = rgb_array[:, :, 0].astype(np.float32) + g = rgb_array[:, :, 1].astype(np.float32) + b = rgb_array[:, :, 2].astype(np.float32) + + elevation = (r * 256.0 + g + b / 256.0) - 32768.0 + return elevation + + +def lat_lon_to_tile(lat: float, lon: float, zoom: int) -> Tuple[int, int]: + """Convert lat/lon to tile coordinates at given zoom level.""" + n = 2 ** zoom + x = int((lon + 180.0) / 360.0 * n) + lat_rad = math.radians(lat) + y = int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n) + return x, y + + +def tile_to_lat_lon(x: int, y: int, zoom: int) -> Tuple[float, float, float, float]: + """Convert tile coordinates to bounding box (north, south, west, east).""" + n = 2 ** zoom + lon_west = x / n * 360.0 - 180.0 + lon_east = (x + 1) / n * 360.0 - 180.0 + lat_north = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y / n)))) + lat_south = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * (y + 1) / n)))) + return lat_north, lat_south, lon_west, lon_east + + +class DEMReader: + """Reader for Terrarium-encoded DEM tiles from PMTiles.""" + + def __init__(self, pmtiles_path: Path = DEFAULT_DEM_PATH, tile_cache_size: int = 128): + self.pmtiles_path = pmtiles_path + self._source = MmapSource(open(pmtiles_path, "rb")) + self._reader = PMTilesReader(self._source) + self._header = self._reader.header() + self._decode_tile = lru_cache(maxsize=tile_cache_size)(self._decode_tile_impl) + + def _decode_tile_impl(self, z: int, x: int, y: int) -> Optional[np.ndarray]: + """Fetch and decode a single tile.""" + tile_data = self._reader.get(z, x, y) + if tile_data is None: + return None + + img = Image.open(BytesIO(tile_data)) + rgb_array = np.array(img) + + if rgb_array.shape[2] == 4: + rgb_array = rgb_array[:, :, :3] + + elevation = terrarium_decode(rgb_array) + return elevation + + def get_elevation_grid( + self, + south: float, + north: float, + west: float, + east: float, + zoom: int = ZOOM_LEVEL + ) -> Tuple[np.ndarray, dict]: + """Get a stitched elevation grid for the given bounding box.""" + x_min, y_max = lat_lon_to_tile(south, west, zoom) + x_max, y_min = lat_lon_to_tile(north, east, zoom) + + n = 2 ** zoom + x_min = max(0, x_min) + x_max = min(n - 1, x_max) + y_min = max(0, y_min) + y_max = min(n - 1, y_max) + + n_tiles_x = x_max - x_min + 1 + n_tiles_y = y_max - y_min + 1 + out_height = n_tiles_y * TILE_SIZE + out_width = n_tiles_x * TILE_SIZE + + elevation = np.full((out_height, out_width), np.nan, dtype=np.float32) + + for ty in range(y_min, y_max + 1): + for tx in range(x_min, x_max + 1): + tile_elev = self._decode_tile(zoom, tx, ty) + if tile_elev is not None: + out_y = (ty - y_min) * TILE_SIZE + out_x = (tx - x_min) * TILE_SIZE + elevation[out_y:out_y + TILE_SIZE, out_x:out_x + TILE_SIZE] = tile_elev + + grid_north, _, grid_west, _ = tile_to_lat_lon(x_min, y_min, zoom) + _, grid_south, _, grid_east = tile_to_lat_lon(x_max, y_max, zoom) + + pixel_size_lat = (grid_north - grid_south) / out_height + pixel_size_lon = (grid_east - grid_west) / out_width + + origin_lat = grid_north - pixel_size_lat / 2 + origin_lon = grid_west + pixel_size_lon / 2 + + center_lat = (south + north) / 2 + lat_m = 111320.0 + lon_m = 111320.0 * math.cos(math.radians(center_lat)) + cell_size_lat_m = abs(pixel_size_lat) * lat_m + cell_size_lon_m = abs(pixel_size_lon) * lon_m + cell_size_m = (cell_size_lat_m + cell_size_lon_m) / 2 + + row_start = int((grid_north - north) / abs(pixel_size_lat)) + row_end = int((grid_north - south) / abs(pixel_size_lat)) + col_start = int((west - grid_west) / pixel_size_lon) + col_end = int((east - grid_west) / pixel_size_lon) + + row_start = max(0, row_start) + row_end = min(out_height, row_end) + col_start = max(0, col_start) + col_end = min(out_width, col_end) + + elevation = elevation[row_start:row_end, col_start:col_end] + + origin_lat = grid_north - (row_start + 0.5) * abs(pixel_size_lat) + origin_lon = grid_west + (col_start + 0.5) * pixel_size_lon + + metadata = { + "bounds": (south, north, west, east), + "pixel_size_lat": -abs(pixel_size_lat), + "pixel_size_lon": pixel_size_lon, + "origin_lat": origin_lat, + "origin_lon": origin_lon, + "cell_size_m": cell_size_m, + "shape": elevation.shape, + } + + return elevation, metadata + + def pixel_to_latlon(self, row: int, col: int, metadata: dict) -> Tuple[float, float]: + """Convert pixel coordinates to lat/lon.""" + lat = metadata["origin_lat"] + row * metadata["pixel_size_lat"] + lon = metadata["origin_lon"] + col * metadata["pixel_size_lon"] + return lat, lon + + def latlon_to_pixel(self, lat: float, lon: float, metadata: dict) -> Tuple[int, int]: + """Convert lat/lon to pixel coordinates.""" + row = int((metadata["origin_lat"] - lat) / abs(metadata["pixel_size_lat"])) + col = int((lon - metadata["origin_lon"]) / metadata["pixel_size_lon"]) + return row, col + + def close(self): + """Close the PMTiles file.""" + pass # MmapSource handles cleanup + + +if __name__ == "__main__": + reader = DEMReader() + elevation, meta = reader.get_elevation_grid( + south=42.4, north=42.6, west=-114.5, east=-114.3 + ) + print(f"Elevation grid shape: {elevation.shape}") + print(f"Cell size: {meta['cell_size_m']:.1f} m") + print(f"Elevation range: {np.nanmin(elevation):.1f} - {np.nanmax(elevation):.1f} m") + center_row, center_col = elevation.shape[0] // 2, elevation.shape[1] // 2 + lat, lon = reader.pixel_to_latlon(center_row, center_col, meta) + print(f"Center pixel lat/lon: {lat:.4f}, {lon:.4f}") + reader.close() diff --git a/lib/offroute/prototype.py b/lib/offroute/prototype.py new file mode 100755 index 0000000..0790a32 --- /dev/null +++ b/lib/offroute/prototype.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +""" +OFFROUTE Phase O1 Prototype + +Validates the PMTiles decoder, Tobler cost function, and MCP pathfinder +on a real Idaho bounding box. + +Test bbox (four Idaho towns as corners): + SW: Rogerson, ID (~42.21, -114.60) + NW: Buhl, ID (~42.60, -114.76) + NE: Burley, ID (~42.54, -113.79) + SE: Oakley, ID (~42.24, -113.88) + Approximate bbox: south=42.21, north=42.60, west=-114.76, east=-113.79 +""" +import json +import time +import sys +from pathlib import Path + +import numpy as np +from skimage.graph import MCP_Geometric + +# Add parent to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from lib.offroute.dem import DEMReader +from lib.offroute.cost import compute_cost_grid + +# Test bounding box +BBOX = { + "south": 42.21, + "north": 42.60, + "west": -114.76, + "east": -113.79, +} + +# Start point: wilderness area south of Twin Falls +# (in the Sawtooth National Forest foothills) +START_LAT = 42.35 +START_LON = -114.50 + +# End point: near Burley, ID (on road network) +END_LAT = 42.52 +END_LON = -113.85 + +# Output file +OUTPUT_PATH = Path("/opt/recon/data/offroute-test.geojson") + +# Memory limit in GB +MEMORY_LIMIT_GB = 12 + + +def check_memory_usage(): + """Check current memory usage and abort if over limit.""" + try: + import psutil + process = psutil.Process() + mem_gb = process.memory_info().rss / (1024**3) + if mem_gb > MEMORY_LIMIT_GB: + print(f"ERROR: Memory usage {mem_gb:.1f}GB exceeds {MEMORY_LIMIT_GB}GB limit") + sys.exit(1) + return mem_gb + except ImportError: + return 0 + + +def main(): + print("=" * 60) + print("OFFROUTE Phase O1 Prototype") + print("=" * 60) + + t0 = time.time() + + # Step 1: Load elevation data + print(f"\n[1] Loading DEM for bbox: {BBOX}") + reader = DEMReader() + + t1 = time.time() + elevation, meta = reader.get_elevation_grid( + south=BBOX["south"], + north=BBOX["north"], + west=BBOX["west"], + east=BBOX["east"], + ) + t2 = time.time() + + print(f" Elevation grid shape: {elevation.shape}") + print(f" Cell count: {elevation.size:,}") + print(f" Cell size: {meta['cell_size_m']:.1f} m") + print(f" Elevation range: {np.nanmin(elevation):.0f} - {np.nanmax(elevation):.0f} m") + print(f" Load time: {t2 - t1:.1f}s") + + mem = check_memory_usage() + if mem > 0: + print(f" Memory usage: {mem:.1f} GB") + + # Step 2: Compute cost grid + print(f"\n[2] Computing Tobler cost grid...") + t3 = time.time() + cost = compute_cost_grid(elevation, cell_size_m=meta["cell_size_m"]) + t4 = time.time() + + finite_cost = cost[~np.isinf(cost)] + print(f" Cost range: {finite_cost.min():.1f} - {finite_cost.max():.1f} s/cell") + print(f" Impassable cells: {np.sum(np.isinf(cost)):,} ({100*np.sum(np.isinf(cost))/cost.size:.1f}%)") + print(f" Compute time: {t4 - t3:.1f}s") + + mem = check_memory_usage() + if mem > 0: + print(f" Memory usage: {mem:.1f} GB") + + # Step 3: Convert start/end to pixel coordinates + print(f"\n[3] Converting coordinates...") + start_row, start_col = reader.latlon_to_pixel(START_LAT, START_LON, meta) + end_row, end_col = reader.latlon_to_pixel(END_LAT, END_LON, meta) + + print(f" Start: ({START_LAT}, {START_LON}) -> pixel ({start_row}, {start_col})") + print(f" End: ({END_LAT}, {END_LON}) -> pixel ({end_row}, {end_col})") + + # Validate coordinates are within bounds + rows, cols = elevation.shape + if not (0 <= start_row < rows and 0 <= start_col < cols): + print(f"ERROR: Start point outside grid bounds") + sys.exit(1) + if not (0 <= end_row < rows and 0 <= end_col < cols): + print(f"ERROR: End point outside grid bounds") + sys.exit(1) + + start_elev = elevation[start_row, start_col] + end_elev = elevation[end_row, end_col] + print(f" Start elevation: {start_elev:.0f} m") + print(f" End elevation: {end_elev:.0f} m") + + # Step 4: Run MCP pathfinder + print(f"\n[4] Running MCP_Geometric pathfinder...") + t5 = time.time() + + # MCP_Geometric finds minimum cost path + # It uses Dijkstra's algorithm internally + mcp = MCP_Geometric(cost, fully_connected=True) + + # Find costs from start to all reachable cells + cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) + t6 = time.time() + + print(f" Dijkstra completed in {t6 - t5:.1f}s") + + # Get cost to reach end point + end_cost = cumulative_costs[end_row, end_col] + print(f" Total cost to endpoint: {end_cost:.0f} seconds ({end_cost/60:.1f} minutes)") + + if np.isinf(end_cost): + print("ERROR: No path found to endpoint (blocked by impassable terrain)") + sys.exit(1) + + # Trace back the path + t7 = time.time() + path_indices = mcp.traceback((end_row, end_col)) + t8 = time.time() + + print(f" Traceback completed in {t8 - t7:.2f}s") + print(f" Path length: {len(path_indices)} cells") + + mem = check_memory_usage() + if mem > 0: + print(f" Memory usage: {mem:.1f} GB") + + # Step 5: Convert path to coordinates and compute stats + print(f"\n[5] Converting path to GeoJSON...") + + coordinates = [] + elevations = [] + + for row, col in path_indices: + lat, lon = reader.pixel_to_latlon(row, col, meta) + elev = elevation[row, col] + coordinates.append([lon, lat]) # GeoJSON is [lon, lat] + elevations.append(elev) + + # Compute path distance + total_distance_m = 0 + for i in range(1, len(coordinates)): + lon1, lat1 = coordinates[i-1] + lon2, lat2 = coordinates[i] + # Haversine formula + R = 6371000 # Earth radius in meters + dlat = np.radians(lat2 - lat1) + dlon = np.radians(lon2 - lon1) + a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2 + c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) + total_distance_m += R * c + + # Compute elevation gain/loss + elev_arr = np.array(elevations) + elev_diff = np.diff(elev_arr) + elev_gain = np.sum(elev_diff[elev_diff > 0]) + elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) + + # Build GeoJSON + geojson = { + "type": "Feature", + "properties": { + "type": "offroute_prototype", + "start": {"lat": START_LAT, "lon": START_LON}, + "end": {"lat": END_LAT, "lon": END_LON}, + "total_time_seconds": float(end_cost), + "total_time_minutes": float(end_cost / 60), + "total_distance_m": float(total_distance_m), + "total_distance_km": float(total_distance_m / 1000), + "elevation_gain_m": float(elev_gain), + "elevation_loss_m": float(elev_loss), + "min_elevation_m": float(np.min(elev_arr)), + "max_elevation_m": float(np.max(elev_arr)), + "cell_count": len(path_indices), + "cell_size_m": meta["cell_size_m"], + }, + "geometry": { + "type": "LineString", + "coordinates": coordinates, + } + } + + # Write output + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + with open(OUTPUT_PATH, "w") as f: + json.dump(geojson, f, indent=2) + + t_end = time.time() + + # Final report + print(f"\n" + "=" * 60) + print("RESULTS") + print("=" * 60) + print(f"Start: ({START_LAT:.4f}, {START_LON:.4f})") + print(f"End: ({END_LAT:.4f}, {END_LON:.4f})") + print(f"Total effort: {end_cost/60:.1f} minutes ({end_cost/3600:.2f} hours)") + print(f"Distance: {total_distance_m/1000:.2f} km") + print(f"Elevation gain: {elev_gain:.0f} m") + print(f"Elevation loss: {elev_loss:.0f} m") + print(f"Elevation range: {np.min(elev_arr):.0f} - {np.max(elev_arr):.0f} m") + print(f"Path cells: {len(path_indices):,}") + print(f"Wall time: {t_end - t0:.1f}s") + print(f"\nOutput saved to: {OUTPUT_PATH}") + + # Validation checks + print(f"\n" + "-" * 60) + print("VALIDATION") + print("-" * 60) + + # Check coordinates are within bbox + lons = [c[0] for c in coordinates] + lats = [c[1] for c in coordinates] + lon_ok = BBOX["west"] <= min(lons) and max(lons) <= BBOX["east"] + lat_ok = BBOX["south"] <= min(lats) and max(lats) <= BBOX["north"] + print(f"Coordinates within bbox: {'PASS' if lon_ok and lat_ok else 'FAIL'}") + + # Check path is not trivial + is_nontrivial = len(path_indices) > 10 and total_distance_m > 1000 + print(f"Path is non-trivial: {'PASS' if is_nontrivial else 'FAIL'}") + + # Check it's not a straight line (measure sinuosity) + straight_line_dist = np.sqrt( + (coordinates[-1][0] - coordinates[0][0])**2 + + (coordinates[-1][1] - coordinates[0][1])**2 + ) * 111000 # rough degrees to meters + sinuosity = total_distance_m / max(straight_line_dist, 1) + print(f"Sinuosity: {sinuosity:.2f} (>1.0 means path curves around obstacles)") + + reader.close() + print("\nPrototype completed successfully.") + + +if __name__ == "__main__": + main() From 26d4bc74784c7586ce14318a1d2cbf8d00f13b8e Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 06:33:45 +0000 Subject: [PATCH 35/72] =?UTF-8?q?feat(offroute):=20Phase=20O2b=20=E2=80=94?= =?UTF-8?q?=20WorldCover=20friction=20integration,=20lake=20avoidance=20va?= =?UTF-8?q?lidated?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New friction.py: reads WorldCover friction VRT, resamples to match elevation grid, provides point sampling for validation - Modified cost.py: accepts optional friction array, multiplies Tobler time cost by friction multiplier, inf for water/nodata (255/0) - Modified prototype.py: loads friction layer, passes to cost function, validates path avoids water cells (friction=255) Validated on Idaho test bbox: - Path avoids Murtaugh Lake (no water cells on path) - Friction along path: min=10, max=20, mean=10.2 - Effort increased 3.4% vs Phase O1 due to friction multipliers Co-Authored-By: Claude Opus 4.5 --- lib/offroute/cost.py | 226 ++++++++++++++++++++++---------------- lib/offroute/friction.py | 137 +++++++++++++++++++++++ lib/offroute/prototype.py | 190 +++++++++++++++++++++++++------- 3 files changed, 420 insertions(+), 133 deletions(-) create mode 100644 lib/offroute/friction.py diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py index f460ab9..3607de6 100644 --- a/lib/offroute/cost.py +++ b/lib/offroute/cost.py @@ -1,94 +1,132 @@ -""" -Tobler off-path hiking cost function for OFFROUTE. - -Computes travel time cost based on terrain slope using Tobler's -hiking function with off-trail penalty. -""" -import math -import numpy as np -from typing import Tuple - -# Maximum passable slope in degrees -MAX_SLOPE_DEG = 40.0 - -# Tobler off-path parameters -TOBLER_BASE_SPEED = 6.0 -TOBLER_OFF_TRAIL_MULT = 0.6 - - -def tobler_speed(grade: float) -> float: - """ - Calculate hiking speed using Tobler's off-path function. - - speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) - - Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). - """ - return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) - - -def compute_cost_grid( - elevation: np.ndarray, - cell_size_m: float, - cell_size_lat_m: float = None, - cell_size_lon_m: float = None -) -> np.ndarray: - """ - Compute isotropic travel cost grid from elevation data. - - Each cell's cost represents the time (in seconds) to traverse that cell, - based on the average slope from neighboring cells. - """ - if cell_size_lat_m is None: - cell_size_lat_m = cell_size_m - if cell_size_lon_m is None: - cell_size_lon_m = cell_size_m - - rows, cols = elevation.shape - - # Compute gradients in both directions - dy = np.zeros_like(elevation) - dx = np.zeros_like(elevation) - - # Central differences for interior, forward/backward at edges - dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) - dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m - dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m - - dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) - dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m - dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m - - # Compute slope magnitude (grade = rise/run) - grade_magnitude = np.sqrt(dx**2 + dy**2) - - # Convert to slope angle in degrees - slope_deg = np.degrees(np.arctan(grade_magnitude)) - - # Compute speed for each cell using Tobler function - speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) - - # Convert speed to time cost (seconds to traverse one cell) - avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 - cost = avg_cell_size * 3.6 / speed_kmh - - # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity - cost[slope_deg > MAX_SLOPE_DEG] = np.inf - - # Handle NaN elevations (no data) - cost[np.isnan(elevation)] = np.inf - - return cost - - -if __name__ == "__main__": - print("Testing Tobler speed function:") - for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: - speed = tobler_speed(grade) - print(f" Grade {grade:+.2f}: {speed:.2f} km/h") - - print("\nTesting cost grid computation:") - elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 - cost = compute_cost_grid(elev, cell_size_m=30.0) - print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") - print(f" Cost range: {cost[~np.isinf(cost)].min():.1f} - {cost[~np.isinf(cost)].max():.1f} s") +""" +Tobler off-path hiking cost function for OFFROUTE. + +Computes travel time cost based on terrain slope using Tobler's +hiking function with off-trail penalty. Optionally applies friction +multipliers from land cover data. +""" +import math +import numpy as np +from typing import Optional + +# Maximum passable slope in degrees +MAX_SLOPE_DEG = 40.0 + +# Tobler off-path parameters +TOBLER_BASE_SPEED = 6.0 +TOBLER_OFF_TRAIL_MULT = 0.6 + + +def tobler_speed(grade: float) -> float: + """ + Calculate hiking speed using Tobler's off-path function. + + speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) + + Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). + """ + return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) + + +def compute_cost_grid( + elevation: np.ndarray, + cell_size_m: float, + cell_size_lat_m: float = None, + cell_size_lon_m: float = None, + friction: Optional[np.ndarray] = None +) -> np.ndarray: + """ + Compute isotropic travel cost grid from elevation data. + + Each cell's cost represents the time (in seconds) to traverse that cell, + based on the average slope from neighboring cells. + + Args: + elevation: 2D array of elevation values in meters + cell_size_m: Average cell size in meters + cell_size_lat_m: Cell size in latitude direction (optional) + cell_size_lon_m: Cell size in longitude direction (optional) + friction: Optional 2D array of friction multipliers. + Values should be float (1.0 = baseline, 2.0 = 2x slower). + np.inf marks impassable cells. + If None, no friction is applied (backward compatible). + + Returns: + 2D array of travel cost in seconds per cell. + np.inf for impassable cells. + """ + if cell_size_lat_m is None: + cell_size_lat_m = cell_size_m + if cell_size_lon_m is None: + cell_size_lon_m = cell_size_m + + rows, cols = elevation.shape + + # Compute gradients in both directions + dy = np.zeros_like(elevation) + dx = np.zeros_like(elevation) + + # Central differences for interior, forward/backward at edges + dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) + dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m + dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m + + dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) + dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m + dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m + + # Compute slope magnitude (grade = rise/run) + grade_magnitude = np.sqrt(dx**2 + dy**2) + + # Convert to slope angle in degrees + slope_deg = np.degrees(np.arctan(grade_magnitude)) + + # Compute speed for each cell using Tobler function + speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) + + # Convert speed to time cost (seconds to traverse one cell) + avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 + cost = avg_cell_size * 3.6 / speed_kmh + + # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity + cost[slope_deg > MAX_SLOPE_DEG] = np.inf + + # Handle NaN elevations (no data) + cost[np.isnan(elevation)] = np.inf + + # Apply friction multipliers if provided + if friction is not None: + if friction.shape != elevation.shape: + raise ValueError( + f"Friction shape {friction.shape} does not match elevation shape {elevation.shape}" + ) + # Multiply cost by friction (inf * anything = inf, which is correct) + cost = cost * friction + + return cost + + +if __name__ == "__main__": + print("Testing Tobler speed function:") + for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: + speed = tobler_speed(grade) + print(f" Grade {grade:+.2f}: {speed:.2f} km/h") + + print("\nTesting cost grid computation (no friction):") + elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 + cost = compute_cost_grid(elev, cell_size_m=30.0) + print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") + finite = cost[~np.isinf(cost)] + if len(finite) > 0: + print(f" Cost range: {finite.min():.1f} - {finite.max():.1f} s") + else: + print(f" All cells impassable (test data too steep)") + + print("\nTesting cost grid with friction:") + elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain + friction = np.ones((10, 10), dtype=np.float32) * 1.5 # 1.5x friction + friction[5, 5] = np.inf # one impassable cell + cost = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) + print(f" Base cost (flat, 30m cell): {30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)):.1f} s") + print(f" With 1.5x friction: {cost[0, 0]:.1f} s") + print(f" Impassable cells: {np.sum(np.isinf(cost))}") diff --git a/lib/offroute/friction.py b/lib/offroute/friction.py new file mode 100644 index 0000000..32df0c0 --- /dev/null +++ b/lib/offroute/friction.py @@ -0,0 +1,137 @@ +""" +Friction layer reader for OFFROUTE. + +Reads friction values from the WorldCover friction VRT and resamples +to match the elevation grid dimensions. +""" +import numpy as np +from pathlib import Path +from typing import Tuple, Optional + +try: + import rasterio + from rasterio.windows import from_bounds + from rasterio.enums import Resampling +except ImportError: + raise ImportError("rasterio is required for friction layer support") + +# Default path to the friction VRT +DEFAULT_FRICTION_PATH = Path("/mnt/nav/worldcover/friction/friction_conus.vrt") + + +class FrictionReader: + """Reader for WorldCover friction raster.""" + + def __init__(self, friction_path: Path = DEFAULT_FRICTION_PATH): + self.friction_path = friction_path + self._dataset = None + + def _open(self): + """Lazy open the dataset.""" + if self._dataset is None: + self._dataset = rasterio.open(self.friction_path) + return self._dataset + + def get_friction_grid( + self, + south: float, + north: float, + west: float, + east: float, + target_shape: Tuple[int, int] + ) -> np.ndarray: + """ + Get friction values for a bounding box, resampled to target shape. + + Args: + south, north, west, east: Bounding box coordinates + target_shape: (rows, cols) to resample to (matches elevation grid) + + Returns: + np.ndarray of uint8 friction values, same shape as target_shape. + Values: 10-40 = friction multiplier (divide by 10) + 255 = impassable + 0 = nodata (treat as impassable) + """ + ds = self._open() + + # Create a window from the bounding box + window = from_bounds(west, south, east, north, ds.transform) + + # Read with resampling to target shape + # Use nearest neighbor for categorical data + friction = ds.read( + 1, + window=window, + out_shape=target_shape, + resampling=Resampling.nearest + ) + + return friction + + def sample_point(self, lat: float, lon: float) -> int: + """Sample friction value at a single point.""" + ds = self._open() + + # Get pixel coordinates + row, col = ds.index(lon, lat) + + # Check bounds + if row < 0 or row >= ds.height or col < 0 or col >= ds.width: + return 0 # Out of bounds = nodata + + # Read single pixel + window = rasterio.windows.Window(col, row, 1, 1) + value = ds.read(1, window=window) + return int(value[0, 0]) + + def close(self): + """Close the dataset.""" + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + +def friction_to_multiplier(friction: np.ndarray) -> np.ndarray: + """ + Convert friction values to cost multipliers. + + Args: + friction: uint8 array of friction values + + Returns: + float32 array of multipliers. + Values 10-40 become 1.0-4.0 (divide by 10). + Values 0 or 255 become np.inf (impassable). + """ + multiplier = friction.astype(np.float32) / 10.0 + + # Mark impassable cells + multiplier[friction == 0] = np.inf # nodata + multiplier[friction == 255] = np.inf # water/impassable + + return multiplier + + +if __name__ == "__main__": + print("Testing FrictionReader...") + + reader = FrictionReader() + + # Test point sampling - Murtaugh Lake (should be water = 255) + lake_lat, lake_lon = 42.47, -114.15 + lake_friction = reader.sample_point(lake_lat, lake_lon) + print(f"Murtaugh Lake ({lake_lat}, {lake_lon}): friction = {lake_friction}") + print(f" Expected: 255 (water/impassable)") + + # Test grid read for small bbox + friction = reader.get_friction_grid( + south=42.4, north=42.5, west=-114.2, east=-114.1, + target_shape=(100, 100) + ) + print(f"\nGrid test shape: {friction.shape}") + print(f"Unique values: {np.unique(friction)}") + print(f"Water cells (255): {np.sum(friction == 255)}") + + reader.close() + print("\nFrictionReader test complete.") diff --git a/lib/offroute/prototype.py b/lib/offroute/prototype.py index 0790a32..9822021 100755 --- a/lib/offroute/prototype.py +++ b/lib/offroute/prototype.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 """ -OFFROUTE Phase O1 Prototype +OFFROUTE Phase O2b Prototype -Validates the PMTiles decoder, Tobler cost function, and MCP pathfinder -on a real Idaho bounding box. +Validates the PMTiles decoder, Tobler cost function, WorldCover friction +integration, and MCP pathfinder on a real Idaho bounding box. + +Now includes friction layer to avoid water bodies like Murtaugh Lake. Test bbox (four Idaho towns as corners): SW: Rogerson, ID (~42.21, -114.60) @@ -25,6 +27,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from lib.offroute.dem import DEMReader from lib.offroute.cost import compute_cost_grid +from lib.offroute.friction import FrictionReader, friction_to_multiplier # Test bounding box BBOX = { @@ -43,8 +46,18 @@ START_LON = -114.50 END_LAT = 42.52 END_LON = -113.85 -# Output file -OUTPUT_PATH = Path("/opt/recon/data/offroute-test.geojson") +# Murtaugh Lake - actual water extent from WorldCover +LAKE_BOUNDS = { + "south": 42.44, + "north": 42.50, + "west": -114.20, + "east": -114.10, +} +LAKE_CENTER = (42.465, -114.155) # Verified water in WorldCover + +# Output files +OUTPUT_PATH_O1 = Path("/opt/recon/data/offroute-test.geojson") +OUTPUT_PATH_FRICTION = Path("/opt/recon/data/offroute-test-friction.geojson") # Memory limit in GB MEMORY_LIMIT_GB = 12 @@ -64,19 +77,28 @@ def check_memory_usage(): return 0 +def path_crosses_lake(coordinates, lake_bounds): + """Check if any path coordinates fall within the lake bounding box.""" + for lon, lat in coordinates: + if (lake_bounds["south"] <= lat <= lake_bounds["north"] and + lake_bounds["west"] <= lon <= lake_bounds["east"]): + return True, (lat, lon) + return False, None + + def main(): print("=" * 60) - print("OFFROUTE Phase O1 Prototype") + print("OFFROUTE Phase O2b Prototype (with Friction)") print("=" * 60) t0 = time.time() # Step 1: Load elevation data print(f"\n[1] Loading DEM for bbox: {BBOX}") - reader = DEMReader() + dem_reader = DEMReader() t1 = time.time() - elevation, meta = reader.get_elevation_grid( + elevation, meta = dem_reader.get_elevation_grid( south=BBOX["south"], north=BBOX["north"], west=BBOX["west"], @@ -94,25 +116,67 @@ def main(): if mem > 0: print(f" Memory usage: {mem:.1f} GB") - # Step 2: Compute cost grid - print(f"\n[2] Computing Tobler cost grid...") + # Step 2: Load friction data + print(f"\n[2] Loading WorldCover friction layer...") + t2a = time.time() + + friction_reader = FrictionReader() + + # Validate lake is marked as impassable + lake_friction = friction_reader.sample_point(LAKE_CENTER[0], LAKE_CENTER[1]) + print(f" Murtaugh Lake center ({LAKE_CENTER[0]}, {LAKE_CENTER[1]}): friction = {lake_friction}") + if lake_friction != 255: + print(f" WARNING: Lake not marked as water (expected 255, got {lake_friction})") + else: + print(f" Lake correctly marked as impassable (255)") + + # Load friction grid matching elevation shape + friction_raw = friction_reader.get_friction_grid( + south=BBOX["south"], + north=BBOX["north"], + west=BBOX["west"], + east=BBOX["east"], + target_shape=elevation.shape + ) + t2b = time.time() + + # Convert to multipliers + friction_mult = friction_to_multiplier(friction_raw) + + impassable_count = np.sum(np.isinf(friction_mult)) + print(f" Friction grid shape: {friction_raw.shape}") + print(f" Unique friction values: {np.unique(friction_raw[friction_raw > 0])}") + print(f" Impassable cells (water/nodata): {impassable_count:,} ({100*impassable_count/friction_raw.size:.1f}%)") + print(f" Load time: {t2b - t2a:.1f}s") + + mem = check_memory_usage() + if mem > 0: + print(f" Memory usage: {mem:.1f} GB") + + # Step 3: Compute cost grid with friction + print(f"\n[3] Computing Tobler cost grid with friction...") t3 = time.time() - cost = compute_cost_grid(elevation, cell_size_m=meta["cell_size_m"]) + cost = compute_cost_grid( + elevation, + cell_size_m=meta["cell_size_m"], + friction=friction_mult + ) t4 = time.time() finite_cost = cost[~np.isinf(cost)] + total_impassable = np.sum(np.isinf(cost)) print(f" Cost range: {finite_cost.min():.1f} - {finite_cost.max():.1f} s/cell") - print(f" Impassable cells: {np.sum(np.isinf(cost)):,} ({100*np.sum(np.isinf(cost))/cost.size:.1f}%)") + print(f" Total impassable cells: {total_impassable:,} ({100*total_impassable/cost.size:.1f}%)") print(f" Compute time: {t4 - t3:.1f}s") mem = check_memory_usage() if mem > 0: print(f" Memory usage: {mem:.1f} GB") - # Step 3: Convert start/end to pixel coordinates - print(f"\n[3] Converting coordinates...") - start_row, start_col = reader.latlon_to_pixel(START_LAT, START_LON, meta) - end_row, end_col = reader.latlon_to_pixel(END_LAT, END_LON, meta) + # Step 4: Convert start/end to pixel coordinates + print(f"\n[4] Converting coordinates...") + start_row, start_col = dem_reader.latlon_to_pixel(START_LAT, START_LON, meta) + end_row, end_col = dem_reader.latlon_to_pixel(END_LAT, END_LON, meta) print(f" Start: ({START_LAT}, {START_LON}) -> pixel ({start_row}, {start_col})") print(f" End: ({END_LAT}, {END_LON}) -> pixel ({end_row}, {end_col})") @@ -131,21 +195,16 @@ def main(): print(f" Start elevation: {start_elev:.0f} m") print(f" End elevation: {end_elev:.0f} m") - # Step 4: Run MCP pathfinder - print(f"\n[4] Running MCP_Geometric pathfinder...") + # Step 5: Run MCP pathfinder + print(f"\n[5] Running MCP_Geometric pathfinder...") t5 = time.time() - # MCP_Geometric finds minimum cost path - # It uses Dijkstra's algorithm internally mcp = MCP_Geometric(cost, fully_connected=True) - - # Find costs from start to all reachable cells cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) t6 = time.time() print(f" Dijkstra completed in {t6 - t5:.1f}s") - # Get cost to reach end point end_cost = cumulative_costs[end_row, end_col] print(f" Total cost to endpoint: {end_cost:.0f} seconds ({end_cost/60:.1f} minutes)") @@ -153,7 +212,6 @@ def main(): print("ERROR: No path found to endpoint (blocked by impassable terrain)") sys.exit(1) - # Trace back the path t7 = time.time() path_indices = mcp.traceback((end_row, end_col)) t8 = time.time() @@ -165,25 +223,27 @@ def main(): if mem > 0: print(f" Memory usage: {mem:.1f} GB") - # Step 5: Convert path to coordinates and compute stats - print(f"\n[5] Converting path to GeoJSON...") + # Step 6: Convert path to coordinates and compute stats + print(f"\n[6] Converting path to GeoJSON...") coordinates = [] elevations = [] + friction_values = [] for row, col in path_indices: - lat, lon = reader.pixel_to_latlon(row, col, meta) + lat, lon = dem_reader.pixel_to_latlon(row, col, meta) elev = elevation[row, col] - coordinates.append([lon, lat]) # GeoJSON is [lon, lat] + fric = friction_raw[row, col] + coordinates.append([lon, lat]) elevations.append(elev) + friction_values.append(fric) # Compute path distance total_distance_m = 0 for i in range(1, len(coordinates)): lon1, lat1 = coordinates[i-1] lon2, lat2 = coordinates[i] - # Haversine formula - R = 6371000 # Earth radius in meters + R = 6371000 dlat = np.radians(lat2 - lat1) dlon = np.radians(lon2 - lon1) a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2 @@ -196,11 +256,16 @@ def main(): elev_gain = np.sum(elev_diff[elev_diff > 0]) elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) + # Friction stats along path + fric_arr = np.array(friction_values) + valid_fric = fric_arr[(fric_arr > 0) & (fric_arr < 255)] + # Build GeoJSON geojson = { "type": "Feature", "properties": { - "type": "offroute_prototype", + "type": "offroute_prototype_friction", + "phase": "O2b", "start": {"lat": START_LAT, "lon": START_LON}, "end": {"lat": END_LAT, "lon": END_LON}, "total_time_seconds": float(end_cost), @@ -211,6 +276,9 @@ def main(): "elevation_loss_m": float(elev_loss), "min_elevation_m": float(np.min(elev_arr)), "max_elevation_m": float(np.max(elev_arr)), + "friction_min": int(valid_fric.min()) if len(valid_fric) > 0 else 0, + "friction_max": int(valid_fric.max()) if len(valid_fric) > 0 else 0, + "friction_mean": float(valid_fric.mean()) if len(valid_fric) > 0 else 0, "cell_count": len(path_indices), "cell_size_m": meta["cell_size_m"], }, @@ -221,15 +289,15 @@ def main(): } # Write output - OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) - with open(OUTPUT_PATH, "w") as f: + OUTPUT_PATH_FRICTION.parent.mkdir(parents=True, exist_ok=True) + with open(OUTPUT_PATH_FRICTION, "w") as f: json.dump(geojson, f, indent=2) t_end = time.time() # Final report print(f"\n" + "=" * 60) - print("RESULTS") + print("RESULTS (Phase O2b with Friction)") print("=" * 60) print(f"Start: ({START_LAT:.4f}, {START_LON:.4f})") print(f"End: ({END_LAT:.4f}, {END_LON:.4f})") @@ -238,11 +306,13 @@ def main(): print(f"Elevation gain: {elev_gain:.0f} m") print(f"Elevation loss: {elev_loss:.0f} m") print(f"Elevation range: {np.min(elev_arr):.0f} - {np.max(elev_arr):.0f} m") + if len(valid_fric) > 0: + print(f"Friction (path): min={valid_fric.min()}, max={valid_fric.max()}, mean={valid_fric.mean():.1f}") print(f"Path cells: {len(path_indices):,}") print(f"Wall time: {t_end - t0:.1f}s") - print(f"\nOutput saved to: {OUTPUT_PATH}") + print(f"\nOutput saved to: {OUTPUT_PATH_FRICTION}") - # Validation checks + # Validation print(f"\n" + "-" * 60) print("VALIDATION") print("-" * 60) @@ -258,15 +328,57 @@ def main(): is_nontrivial = len(path_indices) > 10 and total_distance_m > 1000 print(f"Path is non-trivial: {'PASS' if is_nontrivial else 'FAIL'}") - # Check it's not a straight line (measure sinuosity) + # Check sinuosity straight_line_dist = np.sqrt( (coordinates[-1][0] - coordinates[0][0])**2 + (coordinates[-1][1] - coordinates[0][1])**2 - ) * 111000 # rough degrees to meters + ) * 111000 sinuosity = total_distance_m / max(straight_line_dist, 1) print(f"Sinuosity: {sinuosity:.2f} (>1.0 means path curves around obstacles)") - reader.close() + # CRITICAL: Check no water cells (friction=255) on path + # This is the authoritative test - friction layer prevents water crossings + print(f"\n--- Water Avoidance Check ---") + water_on_path = np.sum(fric_arr == 255) + if water_on_path > 0: + print(f"FAIL: Path crosses {water_on_path} water cells (friction=255)") + sys.exit(1) + else: + print(f"PASS: No water cells (friction=255) on path") + + # Informational: Check if path goes through lake bounding box + # Path may go through land cells within the bbox, which is fine + print(f"\n--- Lake Bounding Box Check (informational) ---") + print(f"Murtaugh Lake bounds: {LAKE_BOUNDS}") + crosses_lake, crossing_point = path_crosses_lake(coordinates, LAKE_BOUNDS) + if crosses_lake: + print(f"INFO: Path passes through lake bbox at {crossing_point}") + print(f" (This is OK if friction check passed - path uses land cells)") + else: + print(f"PASS: Path does not enter lake bounding box") + + # Compare with Phase O1 if available + print(f"\n" + "-" * 60) + print("COMPARISON: Phase O1 vs O2b") + print("-" * 60) + + if OUTPUT_PATH_O1.exists(): + with open(OUTPUT_PATH_O1) as f: + o1_data = json.load(f) + o1_props = o1_data["properties"] + + print(f"{'Metric':<20} {'O1 (no friction)':<20} {'O2b (with friction)':<20}") + print("-" * 60) + print(f"{'Distance (km)':<20} {o1_props['total_distance_km']:<20.2f} {total_distance_m/1000:<20.2f}") + print(f"{'Effort (min)':<20} {o1_props['total_time_minutes']:<20.1f} {end_cost/60:<20.1f}") + print(f"{'Cell count':<20} {o1_props['cell_count']:<20} {len(path_indices):<20}") + print(f"{'Elev gain (m)':<20} {o1_props['elevation_gain_m']:<20.0f} {elev_gain:<20.0f}") + else: + print(f"Phase O1 output not found at {OUTPUT_PATH_O1}") + print(f"Run the O1 prototype first to enable comparison.") + + dem_reader.close() + friction_reader.close() print("\nPrototype completed successfully.") From e0eedcedfde5672fa3a61efe286ceb0082a6a838 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 06:53:11 +0000 Subject: [PATCH 36/72] =?UTF-8?q?feat(offroute):=20Phase=20O2c=20=E2=80=94?= =?UTF-8?q?=20PAD-US=20barriers=20with=20three-mode=20boundary=20respect?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add barriers.py: PAD-US raster reader + build_barriers_raster() function - Rasterize PAD-US Pub_Access=XA (Closed) polygons to CONUS GeoTIFF - Modify cost.py: boundary_mode parameter (strict/pragmatic/emergency) - strict: private land = impassable (np.inf) - pragmatic: private land = 5x friction penalty (default) - emergency: private land barriers ignored - Modify prototype.py: three-way comparison output - Output: padus_barriers.tif at /mnt/nav/worldcover/ (144MB, ~33m resolution) Co-Authored-By: Claude Opus 4.5 --- lib/offroute/barriers.py | 266 ++++++++++++++++++++ lib/offroute/cost.py | 310 ++++++++++++++---------- lib/offroute/prototype.py | 494 +++++++++++++++++++------------------- 3 files changed, 694 insertions(+), 376 deletions(-) create mode 100644 lib/offroute/barriers.py diff --git a/lib/offroute/barriers.py b/lib/offroute/barriers.py new file mode 100644 index 0000000..7fcad75 --- /dev/null +++ b/lib/offroute/barriers.py @@ -0,0 +1,266 @@ +""" +PAD-US barrier layer for OFFROUTE. + +Provides access to the PAD-US land ownership raster for routing decisions. +Cells with value 255 represent closed/restricted areas (Pub_Access = XA). + +Build function rasterizes PAD-US geodatabase to aligned GeoTIFF. +Runtime functions read the raster and resample to match elevation grids. +""" +import numpy as np +from pathlib import Path +from typing import Tuple, Optional +import subprocess +import tempfile +import os + +try: + import rasterio + from rasterio.windows import from_bounds + from rasterio.enums import Resampling +except ImportError: + raise ImportError("rasterio is required for barriers layer support") + +# Paths +DEFAULT_BARRIERS_PATH = Path("/mnt/nav/worldcover/padus_barriers.tif") +PADUS_GDB_PATH = Path("/mnt/nav/padus/PADUS4_0_Geodatabase.gdb") +PADUS_LAYER = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement" + +# CONUS bounding box in WGS84 +CONUS_BOUNDS = { + "west": -125.0, + "east": -66.0, + "south": 24.0, + "north": 50.0, +} + +# Resolution in degrees (~30m at mid-latitudes) +PIXEL_SIZE = 0.0003 # ~33m + + +class BarrierReader: + """Reader for PAD-US barrier raster.""" + + def __init__(self, barrier_path: Path = DEFAULT_BARRIERS_PATH): + self.barrier_path = barrier_path + self._dataset = None + + def _open(self): + """Lazy open the dataset.""" + if self._dataset is None: + if not self.barrier_path.exists(): + raise FileNotFoundError( + f"Barrier raster not found at {self.barrier_path}. " + f"Run build_barriers_raster() first." + ) + self._dataset = rasterio.open(self.barrier_path) + return self._dataset + + def get_barrier_grid( + self, + south: float, + north: float, + west: float, + east: float, + target_shape: Tuple[int, int] + ) -> np.ndarray: + """ + Get barrier values for a bounding box, resampled to target shape. + + Args: + south, north, west, east: Bounding box coordinates (WGS84) + target_shape: (rows, cols) to resample to (matches elevation grid) + + Returns: + np.ndarray of uint8 barrier values: + 255 = closed/restricted (impassable when respect_boundaries=True) + 0 = public/accessible + """ + ds = self._open() + + # Create a window from the bounding box + window = from_bounds(west, south, east, north, ds.transform) + + # Read with resampling to target shape + barriers = ds.read( + 1, + window=window, + out_shape=target_shape, + resampling=Resampling.nearest + ) + + return barriers + + def sample_point(self, lat: float, lon: float) -> int: + """Sample barrier value at a single point.""" + ds = self._open() + + # Get pixel coordinates + row, col = ds.index(lon, lat) + + # Check bounds + if row < 0 or row >= ds.height or col < 0 or col >= ds.width: + return 0 # Out of bounds = accessible + + # Read single pixel + window = rasterio.windows.Window(col, row, 1, 1) + value = ds.read(1, window=window) + return int(value[0, 0]) + + def close(self): + """Close the dataset.""" + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + +def build_barriers_raster( + output_path: Path = DEFAULT_BARRIERS_PATH, + gdb_path: Path = PADUS_GDB_PATH, + pixel_size: float = PIXEL_SIZE, + bounds: dict = CONUS_BOUNDS, +) -> Path: + """ + Build the PAD-US barriers raster from the source geodatabase. + + Extracts polygons where Pub_Access = 'XA' (Closed) and rasterizes them. + + Args: + output_path: Output GeoTIFF path + gdb_path: Path to PAD-US geodatabase + pixel_size: Pixel size in degrees + bounds: CONUS bounding box + + Returns: + Path to the created raster + """ + import shutil + + if not gdb_path.exists(): + raise FileNotFoundError(f"PAD-US geodatabase not found at {gdb_path}") + + # Check for required tools + if not shutil.which('ogr2ogr'): + raise RuntimeError("ogr2ogr not found. Install GDAL.") + if not shutil.which('gdal_rasterize'): + raise RuntimeError("gdal_rasterize not found. Install GDAL.") + + output_path.parent.mkdir(parents=True, exist_ok=True) + + print(f"Building PAD-US barriers raster...") + print(f" Source: {gdb_path}") + print(f" Output: {output_path}") + print(f" Pixel size: {pixel_size} degrees (~{pixel_size * 111000:.0f}m)") + print(f" Bounds: {bounds}") + + with tempfile.TemporaryDirectory() as tmpdir: + # Step 1: Extract closed areas and reproject to WGS84 + closed_gpkg = Path(tmpdir) / "closed_areas.gpkg" + + print(f"\n[1/3] Extracting closed areas (Pub_Access = 'XA')...") + + ogr_cmd = [ + "ogr2ogr", + "-f", "GPKG", + str(closed_gpkg), + str(gdb_path), + PADUS_LAYER, + "-where", "Pub_Access = 'XA'", + "-t_srs", "EPSG:4326", + "-nlt", "MULTIPOLYGON", + "-nln", "closed_areas", + ] + + result = subprocess.run(ogr_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"STDERR: {result.stderr}") + raise RuntimeError(f"ogr2ogr failed: {result.stderr}") + + # Check feature count + info_cmd = ["ogrinfo", "-so", str(closed_gpkg), "closed_areas"] + info_result = subprocess.run(info_cmd, capture_output=True, text=True) + print(f" Extraction result:\n{info_result.stdout}") + + # Step 2: Create empty raster + print(f"\n[2/3] Creating raster grid...") + + width = int((bounds['east'] - bounds['west']) / pixel_size) + height = int((bounds['north'] - bounds['south']) / pixel_size) + + print(f" Grid size: {width} x {height} pixels") + print(f" Memory estimate: {width * height / 1e6:.1f} MB") + + # Step 3: Rasterize + print(f"\n[3/3] Rasterizing closed areas...") + + rasterize_cmd = [ + "gdal_rasterize", + "-burn", "255", + "-init", "0", + "-a_nodata", "0", # No nodata - 0 means accessible + "-te", str(bounds['west']), str(bounds['south']), + str(bounds['east']), str(bounds['north']), + "-tr", str(pixel_size), str(pixel_size), + "-ot", "Byte", + "-co", "COMPRESS=LZW", + "-co", "TILED=YES", + "-l", "closed_areas", + str(closed_gpkg), + str(output_path), + ] + + result = subprocess.run(rasterize_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"STDERR: {result.stderr}") + raise RuntimeError(f"gdal_rasterize failed: {result.stderr}") + + # Verify output + print(f"\n[Done] Verifying output...") + with rasterio.open(output_path) as ds: + print(f" Size: {ds.width} x {ds.height}") + print(f" CRS: {ds.crs}") + print(f" Bounds: {ds.bounds}") + + # Sample a few tiles to check + sample = ds.read(1, window=rasterio.windows.Window(0, 0, 1000, 1000)) + closed_count = np.sum(sample == 255) + print(f" Sample (1000x1000): {closed_count} closed cells") + + file_size = output_path.stat().st_size / (1024**2) + print(f" File size: {file_size:.1f} MB") + + return output_path + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "build": + # Build the raster + print("="*60) + print("PAD-US Barriers Raster Build") + print("="*60) + build_barriers_raster() + else: + # Test the reader + print("Testing BarrierReader...") + + if not DEFAULT_BARRIERS_PATH.exists(): + print(f"Barrier raster not found at {DEFAULT_BARRIERS_PATH}") + print(f"Run: python barriers.py build") + sys.exit(1) + + reader = BarrierReader() + + # Test grid read for Idaho area + barriers = reader.get_barrier_grid( + south=42.2, north=42.6, west=-114.8, east=-113.8, + target_shape=(400, 1000) + ) + print(f"\nGrid test shape: {barriers.shape}") + print(f"Unique values: {np.unique(barriers)}") + closed_cells = np.sum(barriers == 255) + print(f"Closed cells: {closed_cells} ({100*closed_cells/barriers.size:.2f}%)") + + reader.close() + print("\nBarrierReader test complete.") diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py index 3607de6..f31b8f5 100644 --- a/lib/offroute/cost.py +++ b/lib/offroute/cost.py @@ -1,132 +1,178 @@ -""" -Tobler off-path hiking cost function for OFFROUTE. - -Computes travel time cost based on terrain slope using Tobler's -hiking function with off-trail penalty. Optionally applies friction -multipliers from land cover data. -""" -import math -import numpy as np -from typing import Optional - -# Maximum passable slope in degrees -MAX_SLOPE_DEG = 40.0 - -# Tobler off-path parameters -TOBLER_BASE_SPEED = 6.0 -TOBLER_OFF_TRAIL_MULT = 0.6 - - -def tobler_speed(grade: float) -> float: - """ - Calculate hiking speed using Tobler's off-path function. - - speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) - - Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). - """ - return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) - - -def compute_cost_grid( - elevation: np.ndarray, - cell_size_m: float, - cell_size_lat_m: float = None, - cell_size_lon_m: float = None, - friction: Optional[np.ndarray] = None -) -> np.ndarray: - """ - Compute isotropic travel cost grid from elevation data. - - Each cell's cost represents the time (in seconds) to traverse that cell, - based on the average slope from neighboring cells. - - Args: - elevation: 2D array of elevation values in meters - cell_size_m: Average cell size in meters - cell_size_lat_m: Cell size in latitude direction (optional) - cell_size_lon_m: Cell size in longitude direction (optional) - friction: Optional 2D array of friction multipliers. - Values should be float (1.0 = baseline, 2.0 = 2x slower). - np.inf marks impassable cells. - If None, no friction is applied (backward compatible). - - Returns: - 2D array of travel cost in seconds per cell. - np.inf for impassable cells. - """ - if cell_size_lat_m is None: - cell_size_lat_m = cell_size_m - if cell_size_lon_m is None: - cell_size_lon_m = cell_size_m - - rows, cols = elevation.shape - - # Compute gradients in both directions - dy = np.zeros_like(elevation) - dx = np.zeros_like(elevation) - - # Central differences for interior, forward/backward at edges - dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) - dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m - dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m - - dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) - dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m - dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m - - # Compute slope magnitude (grade = rise/run) - grade_magnitude = np.sqrt(dx**2 + dy**2) - - # Convert to slope angle in degrees - slope_deg = np.degrees(np.arctan(grade_magnitude)) - - # Compute speed for each cell using Tobler function - speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) - - # Convert speed to time cost (seconds to traverse one cell) - avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 - cost = avg_cell_size * 3.6 / speed_kmh - - # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity - cost[slope_deg > MAX_SLOPE_DEG] = np.inf - - # Handle NaN elevations (no data) - cost[np.isnan(elevation)] = np.inf - - # Apply friction multipliers if provided - if friction is not None: - if friction.shape != elevation.shape: - raise ValueError( - f"Friction shape {friction.shape} does not match elevation shape {elevation.shape}" - ) - # Multiply cost by friction (inf * anything = inf, which is correct) - cost = cost * friction - - return cost - - -if __name__ == "__main__": - print("Testing Tobler speed function:") - for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: - speed = tobler_speed(grade) - print(f" Grade {grade:+.2f}: {speed:.2f} km/h") - - print("\nTesting cost grid computation (no friction):") - elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 - cost = compute_cost_grid(elev, cell_size_m=30.0) - print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") - finite = cost[~np.isinf(cost)] - if len(finite) > 0: - print(f" Cost range: {finite.min():.1f} - {finite.max():.1f} s") - else: - print(f" All cells impassable (test data too steep)") - - print("\nTesting cost grid with friction:") - elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain - friction = np.ones((10, 10), dtype=np.float32) * 1.5 # 1.5x friction - friction[5, 5] = np.inf # one impassable cell - cost = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) - print(f" Base cost (flat, 30m cell): {30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)):.1f} s") - print(f" With 1.5x friction: {cost[0, 0]:.1f} s") - print(f" Impassable cells: {np.sum(np.isinf(cost))}") +""" +Tobler off-path hiking cost function for OFFROUTE. + +Computes travel time cost based on terrain slope using Tobler's +hiking function with off-trail penalty. Optionally applies friction +multipliers from land cover data and barrier grids from PAD-US. +""" +import math +import numpy as np +from typing import Optional, Literal + +# Maximum passable slope in degrees +MAX_SLOPE_DEG = 40.0 + +# Tobler off-path parameters +TOBLER_BASE_SPEED = 6.0 +TOBLER_OFF_TRAIL_MULT = 0.6 + +# Pragmatic mode friction multiplier for private land +PRAGMATIC_BARRIER_MULTIPLIER = 5.0 + + +def tobler_speed(grade: float) -> float: + """ + Calculate hiking speed using Tobler's off-path function. + + speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) + + Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). + """ + return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) + + +def compute_cost_grid( + elevation: np.ndarray, + cell_size_m: float, + cell_size_lat_m: float = None, + cell_size_lon_m: float = None, + friction: Optional[np.ndarray] = None, + barriers: Optional[np.ndarray] = None, + boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" +) -> np.ndarray: + """ + Compute isotropic travel cost grid from elevation data. + + Each cell's cost represents the time (in seconds) to traverse that cell, + based on the average slope from neighboring cells. + + Args: + elevation: 2D array of elevation values in meters + cell_size_m: Average cell size in meters + cell_size_lat_m: Cell size in latitude direction (optional) + cell_size_lon_m: Cell size in longitude direction (optional) + friction: Optional 2D array of friction multipliers. + Values should be float (1.0 = baseline, 2.0 = 2x slower). + np.inf marks impassable cells. + If None, no friction is applied (backward compatible). + barriers: Optional 2D array of barrier values (uint8). + 255 = closed/restricted area (from PAD-US Pub_Access = XA). + 0 = accessible. + If None, no barriers are applied. + boundary_mode: How to handle private/restricted land barriers: + "strict" - cells with barrier=255 become impassable (np.inf) + "pragmatic" - cells with barrier=255 get 5.0x friction penalty + "emergency" - barriers are ignored entirely + Default: "pragmatic" + + Returns: + 2D array of travel cost in seconds per cell. + np.inf for impassable cells. + """ + if boundary_mode not in ("strict", "pragmatic", "emergency"): + raise ValueError(f"boundary_mode must be 'strict', 'pragmatic', or 'emergency', got '{boundary_mode}'") + + if cell_size_lat_m is None: + cell_size_lat_m = cell_size_m + if cell_size_lon_m is None: + cell_size_lon_m = cell_size_m + + rows, cols = elevation.shape + + # Compute gradients in both directions + dy = np.zeros_like(elevation) + dx = np.zeros_like(elevation) + + # Central differences for interior, forward/backward at edges + dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) + dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m + dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m + + dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) + dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m + dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m + + # Compute slope magnitude (grade = rise/run) + grade_magnitude = np.sqrt(dx**2 + dy**2) + + # Convert to slope angle in degrees + slope_deg = np.degrees(np.arctan(grade_magnitude)) + + # Compute speed for each cell using Tobler function + speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) + + # Convert speed to time cost (seconds to traverse one cell) + avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 + cost = avg_cell_size * 3.6 / speed_kmh + + # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity + cost[slope_deg > MAX_SLOPE_DEG] = np.inf + + # Handle NaN elevations (no data) + cost[np.isnan(elevation)] = np.inf + + # Apply friction multipliers if provided + if friction is not None: + if friction.shape != elevation.shape: + raise ValueError( + f"Friction shape {friction.shape} does not match elevation shape {elevation.shape}" + ) + # Multiply cost by friction (inf * anything = inf, which is correct) + cost = cost * friction + + # Apply barriers based on boundary_mode + if barriers is not None and boundary_mode != "emergency": + if barriers.shape != elevation.shape: + raise ValueError( + f"Barriers shape {barriers.shape} does not match elevation shape {elevation.shape}" + ) + + barrier_mask = barriers == 255 + + if boundary_mode == "strict": + # Mark closed/restricted areas as impassable + cost[barrier_mask] = np.inf + elif boundary_mode == "pragmatic": + # Apply friction penalty to closed/restricted areas + cost[barrier_mask] = cost[barrier_mask] * PRAGMATIC_BARRIER_MULTIPLIER + + return cost + + +if __name__ == "__main__": + print("Testing Tobler speed function:") + for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: + speed = tobler_speed(grade) + print(f" Grade {grade:+.2f}: {speed:.2f} km/h") + + print("\nTesting cost grid computation (no friction, no barriers):") + elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 + cost = compute_cost_grid(elev, cell_size_m=30.0) + print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") + finite = cost[~np.isinf(cost)] + if len(finite) > 0: + print(f" Cost range: {finite.min():.1f} - {finite.max():.1f} s") + else: + print(f" All cells impassable (test data too steep)") + + print("\nTesting cost grid with friction:") + elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain + friction = np.ones((10, 10), dtype=np.float32) * 1.5 # 1.5x friction + friction[5, 5] = np.inf # one impassable cell + cost = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) + print(f" Base cost (flat, 30m cell): {30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)):.1f} s") + print(f" With 1.5x friction: {cost[0, 0]:.1f} s") + print(f" Impassable cells: {np.sum(np.isinf(cost))}") + + print("\nTesting cost grid with barriers (three modes):") + elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain + barriers = np.zeros((10, 10), dtype=np.uint8) + barriers[3:7, 3:7] = 255 # 4x4 closed area in center + + base_cost = 30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)) + + for mode in ["strict", "pragmatic", "emergency"]: + cost = compute_cost_grid(elev, cell_size_m=30.0, barriers=barriers, boundary_mode=mode) + impassable = np.sum(np.isinf(cost)) + barrier_cost = cost[5, 5] if not np.isinf(cost[5, 5]) else "inf" + print(f" {mode:10s}: {impassable} impassable, barrier cell cost = {barrier_cost}") diff --git a/lib/offroute/prototype.py b/lib/offroute/prototype.py index 9822021..b5caf86 100755 --- a/lib/offroute/prototype.py +++ b/lib/offroute/prototype.py @@ -1,18 +1,16 @@ #!/usr/bin/env python3 """ -OFFROUTE Phase O2b Prototype +OFFROUTE Phase O2c Prototype -Validates the PMTiles decoder, Tobler cost function, WorldCover friction -integration, and MCP pathfinder on a real Idaho bounding box. +Validates the PMTiles decoder, Tobler cost function, WorldCover friction, +PAD-US barriers integration, and MCP pathfinder on a real Idaho bounding box. -Now includes friction layer to avoid water bodies like Murtaugh Lake. +Runs THREE pathfinding passes with different boundary modes: + 1. boundary_mode="strict" - private land is impassable + 2. boundary_mode="pragmatic" - private land has 5x friction penalty + 3. boundary_mode="emergency" - private land barriers ignored -Test bbox (four Idaho towns as corners): - SW: Rogerson, ID (~42.21, -114.60) - NW: Buhl, ID (~42.60, -114.76) - NE: Burley, ID (~42.54, -113.79) - SE: Oakley, ID (~42.24, -113.88) - Approximate bbox: south=42.21, north=42.60, west=-114.76, east=-113.79 +Outputs comparison showing impact of boundary mode on routing. """ import json import time @@ -28,8 +26,9 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from lib.offroute.dem import DEMReader from lib.offroute.cost import compute_cost_grid from lib.offroute.friction import FrictionReader, friction_to_multiplier +from lib.offroute.barriers import BarrierReader, DEFAULT_BARRIERS_PATH -# Test bounding box +# Test bounding box - Idaho area known to have mixed public/private land BBOX = { "south": 42.21, "north": 42.60, @@ -38,26 +37,25 @@ BBOX = { } # Start point: wilderness area south of Twin Falls -# (in the Sawtooth National Forest foothills) -START_LAT = 42.35 -START_LON = -114.50 +START_LAT = 42.36 +START_LON = -114.55 # End point: near Burley, ID (on road network) -END_LAT = 42.52 -END_LON = -113.85 - -# Murtaugh Lake - actual water extent from WorldCover -LAKE_BOUNDS = { - "south": 42.44, - "north": 42.50, - "west": -114.20, - "east": -114.10, -} -LAKE_CENTER = (42.465, -114.155) # Verified water in WorldCover +END_LAT = 42.55 +END_LON = -114.25 # Output files -OUTPUT_PATH_O1 = Path("/opt/recon/data/offroute-test.geojson") -OUTPUT_PATH_FRICTION = Path("/opt/recon/data/offroute-test-friction.geojson") +OUTPUT_PATHS = { + "strict": Path("/opt/recon/data/offroute-test-strict.geojson"), + "pragmatic": Path("/opt/recon/data/offroute-test-pragmatic.geojson"), + "emergency": Path("/opt/recon/data/offroute-test-emergency.geojson"), +} + +# Old files to delete +OLD_FILES = [ + Path("/opt/recon/data/offroute-test-barriers-on.geojson"), + Path("/opt/recon/data/offroute-test-barriers-off.geojson"), +] # Memory limit in GB MEMORY_LIMIT_GB = 12 @@ -77,40 +75,139 @@ def check_memory_usage(): return 0 -def path_crosses_lake(coordinates, lake_bounds): - """Check if any path coordinates fall within the lake bounding box.""" - for lon, lat in coordinates: - if (lake_bounds["south"] <= lat <= lake_bounds["north"] and - lake_bounds["west"] <= lon <= lake_bounds["east"]): - return True, (lat, lon) - return False, None +def run_pathfinder( + elevation: np.ndarray, + meta: dict, + friction_mult: np.ndarray, + barriers: np.ndarray, + boundary_mode: str, + start_row: int, + start_col: int, + end_row: int, + end_col: int, + dem_reader: DEMReader, +) -> dict: + """ + Run the MCP pathfinder with given parameters. + + Returns dict with path info and stats. + """ + # Compute cost grid + cost = compute_cost_grid( + elevation, + cell_size_m=meta["cell_size_m"], + friction=friction_mult, + barriers=barriers, + boundary_mode=boundary_mode, + ) + + # Count impassable cells + impassable_count = np.sum(np.isinf(cost)) + barrier_count = np.sum(barriers == 255) if barriers is not None else 0 + + # Run MCP + mcp = MCP_Geometric(cost, fully_connected=True) + cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) + + end_cost = cumulative_costs[end_row, end_col] + + if np.isinf(end_cost): + return { + "success": False, + "reason": "No path found (blocked by impassable terrain)", + "impassable_cells": int(impassable_count), + "barrier_cells": int(barrier_count), + } + + # Traceback path + path_indices = mcp.traceback((end_row, end_col)) + + # Convert to coordinates + coordinates = [] + elevations = [] + barrier_values = [] + + for row, col in path_indices: + lat, lon = dem_reader.pixel_to_latlon(row, col, meta) + elev = elevation[row, col] + barr = barriers[row, col] if barriers is not None else 0 + coordinates.append([lon, lat]) + elevations.append(elev) + barrier_values.append(barr) + + # Compute distance + total_distance_m = 0 + for i in range(1, len(coordinates)): + lon1, lat1 = coordinates[i-1] + lon2, lat2 = coordinates[i] + R = 6371000 + dlat = np.radians(lat2 - lat1) + dlon = np.radians(lon2 - lon1) + a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2 + c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) + total_distance_m += R * c + + # Elevation stats + elev_arr = np.array(elevations) + elev_diff = np.diff(elev_arr) + elev_gain = np.sum(elev_diff[elev_diff > 0]) + elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) + + # Barrier crossings on path + barr_arr = np.array(barrier_values) + barrier_crossings = np.sum(barr_arr == 255) + + return { + "success": True, + "coordinates": coordinates, + "total_time_seconds": float(end_cost), + "total_time_minutes": float(end_cost / 60), + "total_distance_m": float(total_distance_m), + "total_distance_km": float(total_distance_m / 1000), + "elevation_gain_m": float(elev_gain), + "elevation_loss_m": float(elev_loss), + "min_elevation_m": float(np.min(elev_arr)), + "max_elevation_m": float(np.max(elev_arr)), + "cell_count": len(path_indices), + "impassable_cells": int(impassable_count), + "barrier_cells": int(barrier_count), + "barrier_crossings": int(barrier_crossings), + } def main(): - print("=" * 60) - print("OFFROUTE Phase O2b Prototype (with Friction)") - print("=" * 60) + print("=" * 80) + print("OFFROUTE Phase O2c Prototype (Three-Mode Boundary Respect)") + print("=" * 80) t0 = time.time() + # Delete old output files + for old_file in OLD_FILES: + if old_file.exists(): + old_file.unlink() + print(f"Deleted old file: {old_file}") + + # Check if barrier raster exists + if not DEFAULT_BARRIERS_PATH.exists(): + print(f"\nERROR: Barrier raster not found at {DEFAULT_BARRIERS_PATH}") + print(f"Run first: python /opt/recon/lib/offroute/barriers.py build") + sys.exit(1) + # Step 1: Load elevation data print(f"\n[1] Loading DEM for bbox: {BBOX}") dem_reader = DEMReader() - t1 = time.time() elevation, meta = dem_reader.get_elevation_grid( south=BBOX["south"], north=BBOX["north"], west=BBOX["west"], east=BBOX["east"], ) - t2 = time.time() print(f" Elevation grid shape: {elevation.shape}") print(f" Cell count: {elevation.size:,}") print(f" Cell size: {meta['cell_size_m']:.1f} m") - print(f" Elevation range: {np.nanmin(elevation):.0f} - {np.nanmax(elevation):.0f} m") - print(f" Load time: {t2 - t1:.1f}s") mem = check_memory_usage() if mem > 0: @@ -118,19 +215,8 @@ def main(): # Step 2: Load friction data print(f"\n[2] Loading WorldCover friction layer...") - t2a = time.time() - friction_reader = FrictionReader() - # Validate lake is marked as impassable - lake_friction = friction_reader.sample_point(LAKE_CENTER[0], LAKE_CENTER[1]) - print(f" Murtaugh Lake center ({LAKE_CENTER[0]}, {LAKE_CENTER[1]}): friction = {lake_friction}") - if lake_friction != 255: - print(f" WARNING: Lake not marked as water (expected 255, got {lake_friction})") - else: - print(f" Lake correctly marked as impassable (255)") - - # Load friction grid matching elevation shape friction_raw = friction_reader.get_friction_grid( south=BBOX["south"], north=BBOX["north"], @@ -138,36 +224,30 @@ def main(): east=BBOX["east"], target_shape=elevation.shape ) - t2b = time.time() - - # Convert to multipliers friction_mult = friction_to_multiplier(friction_raw) - impassable_count = np.sum(np.isinf(friction_mult)) print(f" Friction grid shape: {friction_raw.shape}") - print(f" Unique friction values: {np.unique(friction_raw[friction_raw > 0])}") - print(f" Impassable cells (water/nodata): {impassable_count:,} ({100*impassable_count/friction_raw.size:.1f}%)") - print(f" Load time: {t2b - t2a:.1f}s") + print(f" Water/impassable cells: {np.sum(np.isinf(friction_mult)):,}") - mem = check_memory_usage() - if mem > 0: - print(f" Memory usage: {mem:.1f} GB") + # Step 3: Load barrier data + print(f"\n[3] Loading PAD-US barrier layer...") + barrier_reader = BarrierReader() - # Step 3: Compute cost grid with friction - print(f"\n[3] Computing Tobler cost grid with friction...") - t3 = time.time() - cost = compute_cost_grid( - elevation, - cell_size_m=meta["cell_size_m"], - friction=friction_mult + barriers = barrier_reader.get_barrier_grid( + south=BBOX["south"], + north=BBOX["north"], + west=BBOX["west"], + east=BBOX["east"], + target_shape=elevation.shape ) - t4 = time.time() - finite_cost = cost[~np.isinf(cost)] - total_impassable = np.sum(np.isinf(cost)) - print(f" Cost range: {finite_cost.min():.1f} - {finite_cost.max():.1f} s/cell") - print(f" Total impassable cells: {total_impassable:,} ({100*total_impassable/cost.size:.1f}%)") - print(f" Compute time: {t4 - t3:.1f}s") + closed_cells = np.sum(barriers == 255) + print(f" Barrier grid shape: {barriers.shape}") + print(f" Closed/restricted cells: {closed_cells:,} ({100*closed_cells/barriers.size:.2f}%)") + + if closed_cells == 0: + print("\n WARNING: No closed/restricted areas in this bbox.") + print(" The test may not show meaningful differences between modes.") mem = check_memory_usage() if mem > 0: @@ -190,196 +270,122 @@ def main(): print(f"ERROR: End point outside grid bounds") sys.exit(1) - start_elev = elevation[start_row, start_col] - end_elev = elevation[end_row, end_col] - print(f" Start elevation: {start_elev:.0f} m") - print(f" End elevation: {end_elev:.0f} m") + # Step 5: Run pathfinder THREE times + results = {} + modes = ["strict", "pragmatic", "emergency"] - # Step 5: Run MCP pathfinder - print(f"\n[5] Running MCP_Geometric pathfinder...") - t5 = time.time() + for i, mode in enumerate(modes, start=5): + print(f"\n[{i}] Running pathfinder (boundary_mode=\"{mode}\")...") + t_start = time.time() + results[mode] = run_pathfinder( + elevation, meta, friction_mult, barriers, + boundary_mode=mode, + start_row=start_row, start_col=start_col, + end_row=end_row, end_col=end_col, + dem_reader=dem_reader, + ) + t_end = time.time() + print(f" Completed in {t_end - t_start:.1f}s") - mcp = MCP_Geometric(cost, fully_connected=True) - cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) - t6 = time.time() + # Step 6: Save GeoJSON outputs + print(f"\n[8] Saving GeoJSON outputs...") - print(f" Dijkstra completed in {t6 - t5:.1f}s") + OUTPUT_PATHS["strict"].parent.mkdir(parents=True, exist_ok=True) - end_cost = cumulative_costs[end_row, end_col] - print(f" Total cost to endpoint: {end_cost:.0f} seconds ({end_cost/60:.1f} minutes)") + for mode, result in results.items(): + output_path = OUTPUT_PATHS[mode] + if result["success"]: + geojson = { + "type": "Feature", + "properties": { + "type": f"offroute_{mode}", + "phase": "O2c", + "boundary_mode": mode, + "start": {"lat": START_LAT, "lon": START_LON}, + "end": {"lat": END_LAT, "lon": END_LON}, + **{k: v for k, v in result.items() if k not in ["success", "coordinates"]}, + }, + "geometry": { + "type": "LineString", + "coordinates": result["coordinates"], + } + } + with open(output_path, "w") as f: + json.dump(geojson, f, indent=2) + print(f" Saved: {output_path}") + else: + print(f" SKIPPED ({mode}): {result['reason']}") - if np.isinf(end_cost): - print("ERROR: No path found to endpoint (blocked by impassable terrain)") - sys.exit(1) + t_total = time.time() - t7 = time.time() - path_indices = mcp.traceback((end_row, end_col)) - t8 = time.time() + # Final report - three-way comparison + print(f"\n" + "=" * 80) + print("THREE-WAY COMPARISON") + print("=" * 80) - print(f" Traceback completed in {t8 - t7:.2f}s") - print(f" Path length: {len(path_indices)} cells") + # Check how many succeeded + success_count = sum(1 for r in results.values() if r["success"]) - mem = check_memory_usage() - if mem > 0: - print(f" Memory usage: {mem:.1f} GB") + if success_count == 3: + print(f"{'Metric':<22} {'STRICT':<18} {'PRAGMATIC':<18} {'EMERGENCY':<18}") + print("-" * 80) - # Step 6: Convert path to coordinates and compute stats - print(f"\n[6] Converting path to GeoJSON...") + metrics = [ + ("Distance (km)", "total_distance_km", ".2f"), + ("Effort time (min)", "total_time_minutes", ".1f"), + ("Cell count", "cell_count", "d"), + ("Elevation gain (m)", "elevation_gain_m", ".0f"), + ("Elevation loss (m)", "elevation_loss_m", ".0f"), + ("Barrier crossings", "barrier_crossings", "d"), + ("Impassable cells", "impassable_cells", ",d"), + ] - coordinates = [] - elevations = [] - friction_values = [] + for label, key, fmt in metrics: + vals = [results[m][key] for m in modes] + print(f"{label:<22} {vals[0]:<18{fmt}} {vals[1]:<18{fmt}} {vals[2]:<18{fmt}}") - for row, col in path_indices: - lat, lon = dem_reader.pixel_to_latlon(row, col, meta) - elev = elevation[row, col] - fric = friction_raw[row, col] - coordinates.append([lon, lat]) - elevations.append(elev) - friction_values.append(fric) + # Analysis + print(f"\n" + "-" * 80) + print("ANALYSIS") + print("-" * 80) - # Compute path distance - total_distance_m = 0 - for i in range(1, len(coordinates)): - lon1, lat1 = coordinates[i-1] - lon2, lat2 = coordinates[i] - R = 6371000 - dlat = np.radians(lat2 - lat1) - dlon = np.radians(lon2 - lon1) - a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2 - c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) - total_distance_m += R * c + strict_crossings = results["strict"]["barrier_crossings"] + pragmatic_crossings = results["pragmatic"]["barrier_crossings"] + emergency_crossings = results["emergency"]["barrier_crossings"] - # Compute elevation gain/loss - elev_arr = np.array(elevations) - elev_diff = np.diff(elev_arr) - elev_gain = np.sum(elev_diff[elev_diff > 0]) - elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) + print(f"Barrier crossings: strict={strict_crossings}, pragmatic={pragmatic_crossings}, emergency={emergency_crossings}") - # Friction stats along path - fric_arr = np.array(friction_values) - valid_fric = fric_arr[(fric_arr > 0) & (fric_arr < 255)] + if strict_crossings == 0 and pragmatic_crossings == 0 and emergency_crossings == 0: + print("No path crosses private land - terrain naturally avoids barriers.") + else: + if emergency_crossings > pragmatic_crossings: + print(f"Pragmatic mode reduces barrier crossings vs emergency: {emergency_crossings} -> {pragmatic_crossings}") + if pragmatic_crossings > 0 and strict_crossings == 0: + print(f"Strict mode completely avoids private land (pragmatic crosses {pragmatic_crossings} cells)") - # Build GeoJSON - geojson = { - "type": "Feature", - "properties": { - "type": "offroute_prototype_friction", - "phase": "O2b", - "start": {"lat": START_LAT, "lon": START_LON}, - "end": {"lat": END_LAT, "lon": END_LON}, - "total_time_seconds": float(end_cost), - "total_time_minutes": float(end_cost / 60), - "total_distance_m": float(total_distance_m), - "total_distance_km": float(total_distance_m / 1000), - "elevation_gain_m": float(elev_gain), - "elevation_loss_m": float(elev_loss), - "min_elevation_m": float(np.min(elev_arr)), - "max_elevation_m": float(np.max(elev_arr)), - "friction_min": int(valid_fric.min()) if len(valid_fric) > 0 else 0, - "friction_max": int(valid_fric.max()) if len(valid_fric) > 0 else 0, - "friction_mean": float(valid_fric.mean()) if len(valid_fric) > 0 else 0, - "cell_count": len(path_indices), - "cell_size_m": meta["cell_size_m"], - }, - "geometry": { - "type": "LineString", - "coordinates": coordinates, - } - } + # Time/distance comparison + if results["strict"]["total_time_minutes"] > results["emergency"]["total_time_minutes"]: + time_penalty = results["strict"]["total_time_minutes"] - results["emergency"]["total_time_minutes"] + print(f"Time cost of strict boundary respect: +{time_penalty:.1f} min") - # Write output - OUTPUT_PATH_FRICTION.parent.mkdir(parents=True, exist_ok=True) - with open(OUTPUT_PATH_FRICTION, "w") as f: - json.dump(geojson, f, indent=2) - - t_end = time.time() - - # Final report - print(f"\n" + "=" * 60) - print("RESULTS (Phase O2b with Friction)") - print("=" * 60) - print(f"Start: ({START_LAT:.4f}, {START_LON:.4f})") - print(f"End: ({END_LAT:.4f}, {END_LON:.4f})") - print(f"Total effort: {end_cost/60:.1f} minutes ({end_cost/3600:.2f} hours)") - print(f"Distance: {total_distance_m/1000:.2f} km") - print(f"Elevation gain: {elev_gain:.0f} m") - print(f"Elevation loss: {elev_loss:.0f} m") - print(f"Elevation range: {np.min(elev_arr):.0f} - {np.max(elev_arr):.0f} m") - if len(valid_fric) > 0: - print(f"Friction (path): min={valid_fric.min()}, max={valid_fric.max()}, mean={valid_fric.mean():.1f}") - print(f"Path cells: {len(path_indices):,}") - print(f"Wall time: {t_end - t0:.1f}s") - print(f"\nOutput saved to: {OUTPUT_PATH_FRICTION}") - - # Validation - print(f"\n" + "-" * 60) - print("VALIDATION") - print("-" * 60) - - # Check coordinates are within bbox - lons = [c[0] for c in coordinates] - lats = [c[1] for c in coordinates] - lon_ok = BBOX["west"] <= min(lons) and max(lons) <= BBOX["east"] - lat_ok = BBOX["south"] <= min(lats) and max(lats) <= BBOX["north"] - print(f"Coordinates within bbox: {'PASS' if lon_ok and lat_ok else 'FAIL'}") - - # Check path is not trivial - is_nontrivial = len(path_indices) > 10 and total_distance_m > 1000 - print(f"Path is non-trivial: {'PASS' if is_nontrivial else 'FAIL'}") - - # Check sinuosity - straight_line_dist = np.sqrt( - (coordinates[-1][0] - coordinates[0][0])**2 + - (coordinates[-1][1] - coordinates[0][1])**2 - ) * 111000 - sinuosity = total_distance_m / max(straight_line_dist, 1) - print(f"Sinuosity: {sinuosity:.2f} (>1.0 means path curves around obstacles)") - - # CRITICAL: Check no water cells (friction=255) on path - # This is the authoritative test - friction layer prevents water crossings - print(f"\n--- Water Avoidance Check ---") - water_on_path = np.sum(fric_arr == 255) - if water_on_path > 0: - print(f"FAIL: Path crosses {water_on_path} water cells (friction=255)") - sys.exit(1) else: - print(f"PASS: No water cells (friction=255) on path") + print(f"Only {success_count}/3 modes found a path:") + for mode, result in results.items(): + if result["success"]: + print(f" {mode}: {result['total_distance_km']:.2f} km, {result['total_time_minutes']:.1f} min") + else: + print(f" {mode}: FAILED - {result.get('reason', 'unknown')}") - # Informational: Check if path goes through lake bounding box - # Path may go through land cells within the bbox, which is fine - print(f"\n--- Lake Bounding Box Check (informational) ---") - print(f"Murtaugh Lake bounds: {LAKE_BOUNDS}") - crosses_lake, crossing_point = path_crosses_lake(coordinates, LAKE_BOUNDS) - if crosses_lake: - print(f"INFO: Path passes through lake bbox at {crossing_point}") - print(f" (This is OK if friction check passed - path uses land cells)") - else: - print(f"PASS: Path does not enter lake bounding box") - - # Compare with Phase O1 if available - print(f"\n" + "-" * 60) - print("COMPARISON: Phase O1 vs O2b") - print("-" * 60) - - if OUTPUT_PATH_O1.exists(): - with open(OUTPUT_PATH_O1) as f: - o1_data = json.load(f) - o1_props = o1_data["properties"] - - print(f"{'Metric':<20} {'O1 (no friction)':<20} {'O2b (with friction)':<20}") - print("-" * 60) - print(f"{'Distance (km)':<20} {o1_props['total_distance_km']:<20.2f} {total_distance_m/1000:<20.2f}") - print(f"{'Effort (min)':<20} {o1_props['total_time_minutes']:<20.1f} {end_cost/60:<20.1f}") - print(f"{'Cell count':<20} {o1_props['cell_count']:<20} {len(path_indices):<20}") - print(f"{'Elev gain (m)':<20} {o1_props['elevation_gain_m']:<20.0f} {elev_gain:<20.0f}") - else: - print(f"Phase O1 output not found at {OUTPUT_PATH_O1}") - print(f"Run the O1 prototype first to enable comparison.") + print(f"\n" + "-" * 80) + print(f"Total wall time: {t_total - t0:.1f}s") + print(f"Closed cells in bbox: {closed_cells:,}") + # Cleanup dem_reader.close() friction_reader.close() - print("\nPrototype completed successfully.") + barrier_reader.close() + + print("\nPrototype completed.") if __name__ == "__main__": From 3293cb4238a1ed489b9d6eb2259a7a4094b5331c Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 07:26:25 +0000 Subject: [PATCH 37/72] =?UTF-8?q?feat(offroute):=20Phase=20O3a=20=E2=80=94?= =?UTF-8?q?=20trail=20burn-in,=20pathfinder=20seeks=20trail=20corridors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trail friction REPLACES land cover friction where trails exist: - Road (value 5): 0.1× friction - Track (value 15): 0.3× friction - Foot trail (value 25): 0.5× friction TrailReader loads /mnt/nav/worldcover/trails.tif rasterized from OSM highways. Validation shows trail-seeking behavior: - On-trail travel: 17.3% → 98.7% - Effort time: 1047 min → 155 min (-85.2%) - Path travels farther but stays on roads for speed Co-Authored-By: Claude Opus 4.5 --- lib/offroute/cost.py | 70 ++++++--- lib/offroute/prototype.py | 302 ++++++++++++++++++++------------------ lib/offroute/trails.py | 174 ++++++++++++++++++++++ 3 files changed, 389 insertions(+), 157 deletions(-) create mode 100644 lib/offroute/trails.py diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py index f31b8f5..5f3618c 100644 --- a/lib/offroute/cost.py +++ b/lib/offroute/cost.py @@ -3,7 +3,7 @@ Tobler off-path hiking cost function for OFFROUTE. Computes travel time cost based on terrain slope using Tobler's hiking function with off-trail penalty. Optionally applies friction -multipliers from land cover data and barrier grids from PAD-US. +multipliers from land cover data, trail corridors, and barrier grids. """ import math import numpy as np @@ -19,6 +19,14 @@ TOBLER_OFF_TRAIL_MULT = 0.6 # Pragmatic mode friction multiplier for private land PRAGMATIC_BARRIER_MULTIPLIER = 5.0 +# Trail value to friction multiplier mapping +# Trail friction REPLACES land cover friction (a road through forest is still easy) +TRAIL_FRICTION_MAP = { + 5: 0.1, # road + 15: 0.3, # track + 25: 0.5, # foot trail +} + def tobler_speed(grade: float) -> float: """ @@ -37,6 +45,7 @@ def compute_cost_grid( cell_size_lat_m: float = None, cell_size_lon_m: float = None, friction: Optional[np.ndarray] = None, + trails: Optional[np.ndarray] = None, barriers: Optional[np.ndarray] = None, boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" ) -> np.ndarray: @@ -51,10 +60,17 @@ def compute_cost_grid( cell_size_m: Average cell size in meters cell_size_lat_m: Cell size in latitude direction (optional) cell_size_lon_m: Cell size in longitude direction (optional) - friction: Optional 2D array of friction multipliers. + friction: Optional 2D array of friction multipliers (WorldCover). Values should be float (1.0 = baseline, 2.0 = 2x slower). np.inf marks impassable cells. If None, no friction is applied (backward compatible). + trails: Optional 2D array of trail values (uint8). + 0 = no trail (use friction) + 5 = road (0.1× friction, replaces WorldCover) + 15 = track (0.3× friction, replaces WorldCover) + 25 = foot trail (0.5× friction, replaces WorldCover) + Trail friction REPLACES land cover friction where trails exist. + If None, no trail burn-in is applied. barriers: Optional 2D array of barrier values (uint8). 255 = closed/restricted area (from PAD-US Pub_Access = XA). 0 = accessible. @@ -111,14 +127,30 @@ def compute_cost_grid( # Handle NaN elevations (no data) cost[np.isnan(elevation)] = np.inf - # Apply friction multipliers if provided + # Build effective friction array + # Start with WorldCover friction if provided, else 1.0 if friction is not None: if friction.shape != elevation.shape: raise ValueError( f"Friction shape {friction.shape} does not match elevation shape {elevation.shape}" ) - # Multiply cost by friction (inf * anything = inf, which is correct) - cost = cost * friction + effective_friction = friction.copy() + else: + effective_friction = np.ones(elevation.shape, dtype=np.float32) + + # Apply trail burn-in: trails REPLACE land cover friction + if trails is not None: + if trails.shape != elevation.shape: + raise ValueError( + f"Trails shape {trails.shape} does not match elevation shape {elevation.shape}" + ) + # Replace friction where trails exist + for trail_value, trail_friction in TRAIL_FRICTION_MAP.items(): + trail_mask = trails == trail_value + effective_friction[trail_mask] = trail_friction + + # Apply friction to cost + cost = cost * effective_friction # Apply barriers based on boundary_mode if barriers is not None and boundary_mode != "emergency": @@ -145,7 +177,7 @@ if __name__ == "__main__": speed = tobler_speed(grade) print(f" Grade {grade:+.2f}: {speed:.2f} km/h") - print("\nTesting cost grid computation (no friction, no barriers):") + print("\nTesting cost grid computation (no friction, no trails):") elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 cost = compute_cost_grid(elev, cell_size_m=30.0) print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") @@ -155,21 +187,25 @@ if __name__ == "__main__": else: print(f" All cells impassable (test data too steep)") - print("\nTesting cost grid with friction:") + print("\nTesting cost grid with friction and trails:") elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain - friction = np.ones((10, 10), dtype=np.float32) * 1.5 # 1.5x friction - friction[5, 5] = np.inf # one impassable cell - cost = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) - print(f" Base cost (flat, 30m cell): {30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)):.1f} s") - print(f" With 1.5x friction: {cost[0, 0]:.1f} s") - print(f" Impassable cells: {np.sum(np.isinf(cost))}") + friction = np.ones((10, 10), dtype=np.float32) * 2.0 # 2.0x friction (forest) + trails = np.zeros((10, 10), dtype=np.uint8) + trails[5, :] = 5 # road across middle row - print("\nTesting cost grid with barriers (three modes):") - elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain - barriers = np.zeros((10, 10), dtype=np.uint8) - barriers[3:7, 3:7] = 255 # 4x4 closed area in center + cost_no_trail = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) + cost_with_trail = compute_cost_grid(elev, cell_size_m=30.0, friction=friction, trails=trails) base_cost = 30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)) + print(f" Base cost (flat, 30m cell): {base_cost:.1f} s") + print(f" Forest cell (2.0x friction): {cost_no_trail[0, 0]:.1f} s") + print(f" Road cell (0.1x friction, replaces forest): {cost_with_trail[5, 0]:.1f} s") + print(f" Road friction advantage: {cost_no_trail[0, 0] / cost_with_trail[5, 0]:.1f}x faster") + + print("\nTesting cost grid with barriers (three modes):") + elev = np.ones((10, 10), dtype=np.float32) * 1000 + barriers = np.zeros((10, 10), dtype=np.uint8) + barriers[3:7, 3:7] = 255 for mode in ["strict", "pragmatic", "emergency"]: cost = compute_cost_grid(elev, cell_size_m=30.0, barriers=barriers, boundary_mode=mode) diff --git a/lib/offroute/prototype.py b/lib/offroute/prototype.py index b5caf86..c9b78f0 100755 --- a/lib/offroute/prototype.py +++ b/lib/offroute/prototype.py @@ -1,16 +1,12 @@ #!/usr/bin/env python3 """ -OFFROUTE Phase O2c Prototype +OFFROUTE Phase O3a Prototype -Validates the PMTiles decoder, Tobler cost function, WorldCover friction, -PAD-US barriers integration, and MCP pathfinder on a real Idaho bounding box. +Validates trail burn-in integration with the MCP pathfinder. +The path should actively seek out trails and roads when nearby. -Runs THREE pathfinding passes with different boundary modes: - 1. boundary_mode="strict" - private land is impassable - 2. boundary_mode="pragmatic" - private land has 5x friction penalty - 3. boundary_mode="emergency" - private land barriers ignored - -Outputs comparison showing impact of boundary mode on routing. +Compares paths with and without trail burn-in to show the benefit +of trail-seeking behavior. """ import json import time @@ -27,8 +23,9 @@ from lib.offroute.dem import DEMReader from lib.offroute.cost import compute_cost_grid from lib.offroute.friction import FrictionReader, friction_to_multiplier from lib.offroute.barriers import BarrierReader, DEFAULT_BARRIERS_PATH +from lib.offroute.trails import TrailReader, DEFAULT_TRAILS_PATH -# Test bounding box - Idaho area known to have mixed public/private land +# Test bounding box - Idaho area BBOX = { "south": 42.21, "north": 42.60, @@ -36,26 +33,17 @@ BBOX = { "east": -113.79, } -# Start point: wilderness area south of Twin Falls -START_LAT = 42.36 -START_LON = -114.55 +# Start point: wilderness area away from roads +START_LAT = 42.35 +START_LON = -114.60 -# End point: near Burley, ID (on road network) +# End point: near Twin Falls (has roads/trails) END_LAT = 42.55 -END_LON = -114.25 +END_LON = -114.20 # Output files -OUTPUT_PATHS = { - "strict": Path("/opt/recon/data/offroute-test-strict.geojson"), - "pragmatic": Path("/opt/recon/data/offroute-test-pragmatic.geojson"), - "emergency": Path("/opt/recon/data/offroute-test-emergency.geojson"), -} - -# Old files to delete -OLD_FILES = [ - Path("/opt/recon/data/offroute-test-barriers-on.geojson"), - Path("/opt/recon/data/offroute-test-barriers-off.geojson"), -] +OUTPUT_PATH_WITH_TRAILS = Path("/opt/recon/data/offroute-test-trails.geojson") +OUTPUT_PATH_NO_TRAILS = Path("/opt/recon/data/offroute-test-no-trails.geojson") # Memory limit in GB MEMORY_LIMIT_GB = 12 @@ -79,32 +67,26 @@ def run_pathfinder( elevation: np.ndarray, meta: dict, friction_mult: np.ndarray, + trails: np.ndarray, barriers: np.ndarray, - boundary_mode: str, + use_trails: bool, start_row: int, start_col: int, end_row: int, end_col: int, dem_reader: DEMReader, ) -> dict: - """ - Run the MCP pathfinder with given parameters. - - Returns dict with path info and stats. - """ + """Run the MCP pathfinder with given parameters.""" # Compute cost grid cost = compute_cost_grid( elevation, cell_size_m=meta["cell_size_m"], friction=friction_mult, + trails=trails if use_trails else None, barriers=barriers, - boundary_mode=boundary_mode, + boundary_mode="pragmatic", ) - # Count impassable cells - impassable_count = np.sum(np.isinf(cost)) - barrier_count = np.sum(barriers == 255) if barriers is not None else 0 - # Run MCP mcp = MCP_Geometric(cost, fully_connected=True) cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) @@ -115,25 +97,23 @@ def run_pathfinder( return { "success": False, "reason": "No path found (blocked by impassable terrain)", - "impassable_cells": int(impassable_count), - "barrier_cells": int(barrier_count), } # Traceback path path_indices = mcp.traceback((end_row, end_col)) - # Convert to coordinates + # Convert to coordinates and collect stats coordinates = [] elevations = [] - barrier_values = [] + trail_values = [] for row, col in path_indices: lat, lon = dem_reader.pixel_to_latlon(row, col, meta) elev = elevation[row, col] - barr = barriers[row, col] if barriers is not None else 0 + trail_val = trails[row, col] if trails is not None else 0 coordinates.append([lon, lat]) elevations.append(elev) - barrier_values.append(barr) + trail_values.append(trail_val) # Compute distance total_distance_m = 0 @@ -153,9 +133,14 @@ def run_pathfinder( elev_gain = np.sum(elev_diff[elev_diff > 0]) elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) - # Barrier crossings on path - barr_arr = np.array(barrier_values) - barrier_crossings = np.sum(barr_arr == 255) + # Trail stats + trail_arr = np.array(trail_values) + road_cells = np.sum(trail_arr == 5) + track_cells = np.sum(trail_arr == 15) + trail_cells = np.sum(trail_arr == 25) + off_trail_cells = np.sum(trail_arr == 0) + on_trail_cells = road_cells + track_cells + trail_cells + total_cells = len(trail_arr) return { "success": True, @@ -168,30 +153,28 @@ def run_pathfinder( "elevation_loss_m": float(elev_loss), "min_elevation_m": float(np.min(elev_arr)), "max_elevation_m": float(np.max(elev_arr)), - "cell_count": len(path_indices), - "impassable_cells": int(impassable_count), - "barrier_cells": int(barrier_count), - "barrier_crossings": int(barrier_crossings), + "cell_count": total_cells, + "road_cells": int(road_cells), + "track_cells": int(track_cells), + "trail_cells": int(trail_cells), + "off_trail_cells": int(off_trail_cells), + "on_trail_pct": float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0, } def main(): print("=" * 80) - print("OFFROUTE Phase O2c Prototype (Three-Mode Boundary Respect)") + print("OFFROUTE Phase O3a Prototype (Trail Burn-In)") print("=" * 80) t0 = time.time() - # Delete old output files - for old_file in OLD_FILES: - if old_file.exists(): - old_file.unlink() - print(f"Deleted old file: {old_file}") - - # Check if barrier raster exists + # Check for required rasters if not DEFAULT_BARRIERS_PATH.exists(): print(f"\nERROR: Barrier raster not found at {DEFAULT_BARRIERS_PATH}") - print(f"Run first: python /opt/recon/lib/offroute/barriers.py build") + sys.exit(1) + if not DEFAULT_TRAILS_PATH.exists(): + print(f"\nERROR: Trails raster not found at {DEFAULT_TRAILS_PATH}") sys.exit(1) # Step 1: Load elevation data @@ -243,25 +226,42 @@ def main(): closed_cells = np.sum(barriers == 255) print(f" Barrier grid shape: {barriers.shape}") - print(f" Closed/restricted cells: {closed_cells:,} ({100*closed_cells/barriers.size:.2f}%)") + print(f" Closed/restricted cells: {closed_cells:,}") - if closed_cells == 0: - print("\n WARNING: No closed/restricted areas in this bbox.") - print(" The test may not show meaningful differences between modes.") + # Step 4: Load trails data + print(f"\n[4] Loading OSM trails layer...") + trail_reader = TrailReader() + + trails = trail_reader.get_trails_grid( + south=BBOX["south"], + north=BBOX["north"], + west=BBOX["west"], + east=BBOX["east"], + target_shape=elevation.shape + ) + + road_cells = np.sum(trails == 5) + track_cells = np.sum(trails == 15) + trail_cells = np.sum(trails == 25) + print(f" Trails grid shape: {trails.shape}") + print(f" Road cells: {road_cells:,}") + print(f" Track cells: {track_cells:,}") + print(f" Trail cells: {trail_cells:,}") + print(f" Total trail coverage: {100*(road_cells+track_cells+trail_cells)/trails.size:.2f}%") mem = check_memory_usage() if mem > 0: print(f" Memory usage: {mem:.1f} GB") - # Step 4: Convert start/end to pixel coordinates - print(f"\n[4] Converting coordinates...") + # Step 5: Convert start/end to pixel coordinates + print(f"\n[5] Converting coordinates...") start_row, start_col = dem_reader.latlon_to_pixel(START_LAT, START_LON, meta) end_row, end_col = dem_reader.latlon_to_pixel(END_LAT, END_LON, meta) print(f" Start: ({START_LAT}, {START_LON}) -> pixel ({start_row}, {start_col})") print(f" End: ({END_LAT}, {END_LON}) -> pixel ({end_row}, {end_col})") - # Validate coordinates are within bounds + # Validate coordinates rows, cols = elevation.shape if not (0 <= start_row < rows and 0 <= start_col < cols): print(f"ERROR: Start point outside grid bounds") @@ -270,64 +270,86 @@ def main(): print(f"ERROR: End point outside grid bounds") sys.exit(1) - # Step 5: Run pathfinder THREE times - results = {} - modes = ["strict", "pragmatic", "emergency"] + # Step 6: Run pathfinder WITH trails + print(f"\n[6] Running pathfinder WITH trail burn-in...") + t6a = time.time() + result_trails = run_pathfinder( + elevation, meta, friction_mult, trails, barriers, + use_trails=True, + start_row=start_row, start_col=start_col, + end_row=end_row, end_col=end_col, + dem_reader=dem_reader, + ) + t6b = time.time() + print(f" Completed in {t6b - t6a:.1f}s") - for i, mode in enumerate(modes, start=5): - print(f"\n[{i}] Running pathfinder (boundary_mode=\"{mode}\")...") - t_start = time.time() - results[mode] = run_pathfinder( - elevation, meta, friction_mult, barriers, - boundary_mode=mode, - start_row=start_row, start_col=start_col, - end_row=end_row, end_col=end_col, - dem_reader=dem_reader, - ) - t_end = time.time() - print(f" Completed in {t_end - t_start:.1f}s") + # Step 7: Run pathfinder WITHOUT trails + print(f"\n[7] Running pathfinder WITHOUT trail burn-in...") + t7a = time.time() + result_no_trails = run_pathfinder( + elevation, meta, friction_mult, trails, barriers, + use_trails=False, + start_row=start_row, start_col=start_col, + end_row=end_row, end_col=end_col, + dem_reader=dem_reader, + ) + t7b = time.time() + print(f" Completed in {t7b - t7a:.1f}s") - # Step 6: Save GeoJSON outputs + # Step 8: Save GeoJSON outputs print(f"\n[8] Saving GeoJSON outputs...") - OUTPUT_PATHS["strict"].parent.mkdir(parents=True, exist_ok=True) + OUTPUT_PATH_WITH_TRAILS.parent.mkdir(parents=True, exist_ok=True) - for mode, result in results.items(): - output_path = OUTPUT_PATHS[mode] - if result["success"]: - geojson = { - "type": "Feature", - "properties": { - "type": f"offroute_{mode}", - "phase": "O2c", - "boundary_mode": mode, - "start": {"lat": START_LAT, "lon": START_LON}, - "end": {"lat": END_LAT, "lon": END_LON}, - **{k: v for k, v in result.items() if k not in ["success", "coordinates"]}, - }, - "geometry": { - "type": "LineString", - "coordinates": result["coordinates"], - } + if result_trails["success"]: + geojson = { + "type": "Feature", + "properties": { + "type": "offroute_with_trails", + "phase": "O3a", + "trail_burn_in": True, + "start": {"lat": START_LAT, "lon": START_LON}, + "end": {"lat": END_LAT, "lon": END_LON}, + **{k: v for k, v in result_trails.items() if k not in ["success", "coordinates"]}, + }, + "geometry": { + "type": "LineString", + "coordinates": result_trails["coordinates"], } - with open(output_path, "w") as f: - json.dump(geojson, f, indent=2) - print(f" Saved: {output_path}") - else: - print(f" SKIPPED ({mode}): {result['reason']}") + } + with open(OUTPUT_PATH_WITH_TRAILS, "w") as f: + json.dump(geojson, f, indent=2) + print(f" Saved: {OUTPUT_PATH_WITH_TRAILS}") + + if result_no_trails["success"]: + geojson = { + "type": "Feature", + "properties": { + "type": "offroute_no_trails", + "phase": "O3a", + "trail_burn_in": False, + "start": {"lat": START_LAT, "lon": START_LON}, + "end": {"lat": END_LAT, "lon": END_LON}, + **{k: v for k, v in result_no_trails.items() if k not in ["success", "coordinates"]}, + }, + "geometry": { + "type": "LineString", + "coordinates": result_no_trails["coordinates"], + } + } + with open(OUTPUT_PATH_NO_TRAILS, "w") as f: + json.dump(geojson, f, indent=2) + print(f" Saved: {OUTPUT_PATH_NO_TRAILS}") t_total = time.time() - # Final report - three-way comparison + # Final report print(f"\n" + "=" * 80) - print("THREE-WAY COMPARISON") + print("SIDE-BY-SIDE COMPARISON: Trail Burn-In Effect") print("=" * 80) - # Check how many succeeded - success_count = sum(1 for r in results.values() if r["success"]) - - if success_count == 3: - print(f"{'Metric':<22} {'STRICT':<18} {'PRAGMATIC':<18} {'EMERGENCY':<18}") + if result_trails["success"] and result_no_trails["success"]: + print(f"{'Metric':<25} {'WITH TRAILS':<20} {'WITHOUT TRAILS':<20} {'Delta':<15}") print("-" * 80) metrics = [ @@ -335,55 +357,55 @@ def main(): ("Effort time (min)", "total_time_minutes", ".1f"), ("Cell count", "cell_count", "d"), ("Elevation gain (m)", "elevation_gain_m", ".0f"), - ("Elevation loss (m)", "elevation_loss_m", ".0f"), - ("Barrier crossings", "barrier_crossings", "d"), - ("Impassable cells", "impassable_cells", ",d"), + ("On-trail %", "on_trail_pct", ".1f"), + ("Road cells", "road_cells", "d"), + ("Track cells", "track_cells", "d"), + ("Trail cells", "trail_cells", "d"), ] for label, key, fmt in metrics: - vals = [results[m][key] for m in modes] - print(f"{label:<22} {vals[0]:<18{fmt}} {vals[1]:<18{fmt}} {vals[2]:<18{fmt}}") + val_with = result_trails[key] + val_without = result_no_trails[key] + if isinstance(val_with, int): + delta = val_with - val_without + delta_str = f"{delta:+d}" + else: + delta = val_with - val_without + delta_str = f"{delta:+.2f}" + print(f"{label:<25} {val_with:<20{fmt}} {val_without:<20{fmt}} {delta_str:<15}") # Analysis print(f"\n" + "-" * 80) print("ANALYSIS") print("-" * 80) - strict_crossings = results["strict"]["barrier_crossings"] - pragmatic_crossings = results["pragmatic"]["barrier_crossings"] - emergency_crossings = results["emergency"]["barrier_crossings"] + time_saved = result_no_trails["total_time_minutes"] - result_trails["total_time_minutes"] + if time_saved > 0: + print(f"Trail burn-in saves {time_saved:.1f} minutes ({100*time_saved/result_no_trails['total_time_minutes']:.1f}% faster)") + elif time_saved < 0: + print(f"Trail burn-in adds {-time_saved:.1f} minutes (path seeks trails even if longer)") - print(f"Barrier crossings: strict={strict_crossings}, pragmatic={pragmatic_crossings}, emergency={emergency_crossings}") - - if strict_crossings == 0 and pragmatic_crossings == 0 and emergency_crossings == 0: - print("No path crosses private land - terrain naturally avoids barriers.") + on_trail_with = result_trails["on_trail_pct"] + on_trail_without = result_no_trails["on_trail_pct"] + if on_trail_with > on_trail_without: + print(f"Trail burn-in increases on-trail travel: {on_trail_without:.1f}% → {on_trail_with:.1f}%") else: - if emergency_crossings > pragmatic_crossings: - print(f"Pragmatic mode reduces barrier crossings vs emergency: {emergency_crossings} -> {pragmatic_crossings}") - if pragmatic_crossings > 0 and strict_crossings == 0: - print(f"Strict mode completely avoids private land (pragmatic crosses {pragmatic_crossings} cells)") - - # Time/distance comparison - if results["strict"]["total_time_minutes"] > results["emergency"]["total_time_minutes"]: - time_penalty = results["strict"]["total_time_minutes"] - results["emergency"]["total_time_minutes"] - print(f"Time cost of strict boundary respect: +{time_penalty:.1f} min") + print(f"Both paths have similar on-trail percentage") else: - print(f"Only {success_count}/3 modes found a path:") - for mode, result in results.items(): - if result["success"]: - print(f" {mode}: {result['total_distance_km']:.2f} km, {result['total_time_minutes']:.1f} min") - else: - print(f" {mode}: FAILED - {result.get('reason', 'unknown')}") + if not result_trails["success"]: + print(f"WITH TRAILS: FAILED - {result_trails.get('reason', 'unknown')}") + if not result_no_trails["success"]: + print(f"WITHOUT TRAILS: FAILED - {result_no_trails.get('reason', 'unknown')}") print(f"\n" + "-" * 80) print(f"Total wall time: {t_total - t0:.1f}s") - print(f"Closed cells in bbox: {closed_cells:,}") # Cleanup dem_reader.close() friction_reader.close() barrier_reader.close() + trail_reader.close() print("\nPrototype completed.") diff --git a/lib/offroute/trails.py b/lib/offroute/trails.py new file mode 100644 index 0000000..9d9185e --- /dev/null +++ b/lib/offroute/trails.py @@ -0,0 +1,174 @@ +""" +Trail corridor reader for OFFROUTE. + +Provides access to the OSM-derived trail raster for pathfinding. +Trail values replace WorldCover friction where trails exist. + +Raster values: + 0 = no trail (use WorldCover friction) + 5 = road (0.1× friction) + 15 = track (0.3× friction) + 25 = foot trail (0.5× friction) +""" +import numpy as np +from pathlib import Path +from typing import Tuple, Optional + +try: + import rasterio + from rasterio.windows import from_bounds + from rasterio.enums import Resampling +except ImportError: + raise ImportError("rasterio is required for trails layer support") + +# Default path to the trails raster +DEFAULT_TRAILS_PATH = Path("/mnt/nav/worldcover/trails.tif") + +# Trail value to friction multiplier mapping +TRAIL_FRICTION_MAP = { + 5: 0.1, # road + 15: 0.3, # track + 25: 0.5, # foot trail +} + + +class TrailReader: + """Reader for OSM-derived trail corridor raster.""" + + def __init__(self, trails_path: Path = DEFAULT_TRAILS_PATH): + self.trails_path = trails_path + self._dataset = None + + def _open(self): + """Lazy open the dataset.""" + if self._dataset is None: + if not self.trails_path.exists(): + raise FileNotFoundError( + f"Trails raster not found at {self.trails_path}. " + f"Run the Phase B rasterization script first." + ) + self._dataset = rasterio.open(self.trails_path) + return self._dataset + + def get_trails_grid( + self, + south: float, + north: float, + west: float, + east: float, + target_shape: Tuple[int, int] + ) -> np.ndarray: + """ + Get trail values for a bounding box, resampled to target shape. + + Args: + south, north, west, east: Bounding box coordinates (WGS84) + target_shape: (rows, cols) to resample to (matches elevation grid) + + Returns: + np.ndarray of uint8 trail values: + 0 = no trail + 5 = road (0.1× friction) + 15 = track (0.3× friction) + 25 = foot trail (0.5× friction) + """ + ds = self._open() + + # Create a window from the bounding box + window = from_bounds(west, south, east, north, ds.transform) + + # Read with resampling to target shape + # Use nearest neighbor to preserve discrete values + trails = ds.read( + 1, + window=window, + out_shape=target_shape, + resampling=Resampling.nearest + ) + + return trails + + def sample_point(self, lat: float, lon: float) -> int: + """Sample trail value at a single point.""" + ds = self._open() + + # Get pixel coordinates + row, col = ds.index(lon, lat) + + # Check bounds + if row < 0 or row >= ds.height or col < 0 or col >= ds.width: + return 0 # Out of bounds = no trail + + # Read single pixel + window = rasterio.windows.Window(col, row, 1, 1) + value = ds.read(1, window=window) + return int(value[0, 0]) + + def close(self): + """Close the dataset.""" + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + +def trails_to_friction(trails: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """ + Convert trail values to friction multipliers. + + Args: + trails: uint8 array of trail values (0, 5, 15, or 25) + + Returns: + Tuple of: + - friction: float32 array of friction multipliers + - has_trail: bool array indicating where trails exist + """ + friction = np.ones_like(trails, dtype=np.float32) + has_trail = trails > 0 + + # Apply friction values where trails exist + friction[trails == 5] = 0.1 # road + friction[trails == 15] = 0.3 # track + friction[trails == 25] = 0.5 # foot trail + + return friction, has_trail + + +if __name__ == "__main__": + print("Testing TrailReader...") + + if not DEFAULT_TRAILS_PATH.exists(): + print(f"Trails raster not found at {DEFAULT_TRAILS_PATH}") + print("Run Phase B rasterization first.") + exit(1) + + reader = TrailReader() + + # Test point sampling - Twin Falls downtown (should have roads) + test_lat, test_lon = 42.563, -114.461 + trail_value = reader.sample_point(test_lat, test_lon) + print(f"\nTwin Falls ({test_lat}, {test_lon}): trail value = {trail_value}") + label = {0: "no trail", 5: "road", 15: "track", 25: "trail"}.get(trail_value, "unknown") + print(f" Type: {label}") + + # Test grid read for test bbox + trails = reader.get_trails_grid( + south=42.21, north=42.60, west=-114.76, east=-113.79, + target_shape=(400, 1000) + ) + print(f"\nGrid test shape: {trails.shape}") + + unique, counts = np.unique(trails, return_counts=True) + print("Value distribution:") + for v, c in zip(unique, counts): + pct = 100 * c / trails.size + label = {0: "no trail", 5: "road", 15: "track", 25: "trail"}.get(v, f"unknown({v})") + print(f" {label}: {c:,} pixels ({pct:.2f}%)") + + # Test conversion to friction + friction, has_trail = trails_to_friction(trails) + print(f"\nTrail coverage: {100 * np.sum(has_trail) / trails.size:.2f}%") + print(f"Friction range (on trails): {friction[has_trail].min():.1f} - {friction[has_trail].max():.1f}") + + reader.close() + print("\nTrailReader test complete.") From 1a9dfc8f8d78fb5c7a9798b58fa94cd6d648f6a8 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 13:44:34 +0000 Subject: [PATCH 38/72] =?UTF-8?q?feat(offroute):=20Phase=20O3b=20=E2=80=94?= =?UTF-8?q?=20trail=20entry=20index,=20Valhalla=20stitching,=20/api/offrou?= =?UTF-8?q?te=20endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A: Trail Entry Point Index - Extract highway endpoints from idaho-latest.osm.pbf using osmium + ogr2ogr - Store 740,430 entry points in /mnt/nav/navi.db (SQLite with spatial index) - Entry points by class: service (271k), footway (152k), residential (146k), track (111k), path (26k), unclassified (16k), tertiary (9k), secondary (4k), primary (4k), bridleway (15) Phase B: Pathfinder → Valhalla Stitching (router.py) - OffrouteRouter orchestrates wilderness pathfinding + Valhalla on-network routing - Queries entry points within 50km (expanding to 100km if needed) - MCP pathfinder routes to nearest reachable entry point - Calls Valhalla pedestrian/bicycle/auto costing for on-network segment - Returns GeoJSON FeatureCollection with wilderness + network + combined segments Phase C: Flask Endpoint - POST /api/offroute with start/end coordinates, mode, boundary_mode - Returns GeoJSON route with per-segment metadata and turn-by-turn maneuvers Validated: 42.35,-114.30 → Twin Falls downtown - Wilderness: 0.5km, 9min | Network: 36km, 413min | Total: ~421min - 21 turn-by-turn instructions, segments connect at entry point Co-Authored-By: Claude Opus 4.5 --- lib/api.py | 77 +++++ lib/offroute/router.py | 752 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 829 insertions(+) create mode 100644 lib/offroute/router.py diff --git a/lib/api.py b/lib/api.py index 8a1f383..cd32e33 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2722,3 +2722,80 @@ def api_auth_whoami(): 'authenticated': False, 'username': None, }) + + +# ── OFFROUTE API ── + +@app.route("/api/offroute", methods=["POST"]) +def api_offroute(): + """ + Off-network routing from wilderness to destination. + + Request body: + { + "start": [lat, lon], + "end": [lat, lon], + "mode": "foot" | "mtb" | "atv", (default: "foot") + "boundary_mode": "strict" | "pragmatic" | "emergency" (default: "pragmatic") + } + + Response: + { + "status": "ok", + "route": { GeoJSON FeatureCollection with wilderness + network segments }, + "summary": { total_distance_km, total_effort_minutes, ... } + } + """ + try: + data = request.get_json() + if not data: + return jsonify({"status": "error", "message": "No JSON body provided"}), 400 + + # Parse coordinates + start = data.get("start") + end = data.get("end") + + if not start or not end: + return jsonify({"status": "error", "message": "Missing start or end coordinates"}), 400 + + if not isinstance(start, (list, tuple)) or len(start) != 2: + return jsonify({"status": "error", "message": "start must be [lat, lon]"}), 400 + if not isinstance(end, (list, tuple)) or len(end) != 2: + return jsonify({"status": "error", "message": "end must be [lat, lon]"}), 400 + + start_lat, start_lon = float(start[0]), float(start[1]) + end_lat, end_lon = float(end[0]), float(end[1]) + + # Parse options + mode = data.get("mode", "foot") + if mode not in ("foot", "mtb", "atv"): + return jsonify({"status": "error", "message": "mode must be foot, mtb, or atv"}), 400 + + boundary_mode = data.get("boundary_mode", "pragmatic") + if boundary_mode not in ("strict", "pragmatic", "emergency"): + return jsonify({"status": "error", "message": "boundary_mode must be strict, pragmatic, or emergency"}), 400 + + # Import and run router + from .offroute.router import OffrouteRouter + + router = OffrouteRouter() + try: + result = router.route( + start_lat=start_lat, + start_lon=start_lon, + end_lat=end_lat, + end_lon=end_lon, + mode=mode, + boundary_mode=boundary_mode + ) + finally: + router.close() + + if result.get("status") == "error": + return jsonify(result), 400 + + return jsonify(result) + + except Exception as e: + logger.exception("Offroute error") + return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/lib/offroute/router.py b/lib/offroute/router.py new file mode 100644 index 0000000..57d6ce5 --- /dev/null +++ b/lib/offroute/router.py @@ -0,0 +1,752 @@ +""" +OFFROUTE Router — Wilderness to network path orchestration. + +Connects the raster pathfinder (wilderness segment) to Valhalla (on-network segment). + +Entry points are extracted from OSM highways and stored in /mnt/nav/navi.db. +The pathfinder routes from a wilderness start to the nearest entry point, +then Valhalla completes the route to the destination. +""" +import json +import math +import sqlite3 +import subprocess +import tempfile +import time +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Literal + +import numpy as np +import requests +from skimage.graph import MCP_Geometric + +from .dem import DEMReader +from .cost import compute_cost_grid +from .friction import FrictionReader, friction_to_multiplier +from .barriers import BarrierReader +from .trails import TrailReader + +# Paths +NAVI_DB_PATH = Path("/mnt/nav/navi.db") +OSM_PBF_PATH = Path("/mnt/nav/sources/idaho-latest.osm.pbf") + +# Valhalla endpoint +VALHALLA_URL = "http://localhost:8002" + +# Search radius for entry points (km) +DEFAULT_SEARCH_RADIUS_KM = 50 +EXPANDED_SEARCH_RADIUS_KM = 100 + +# Memory limit +MEMORY_LIMIT_GB = 12 + +# Mode to Valhalla costing mapping +MODE_TO_COSTING = { + "foot": "pedestrian", + "mtb": "bicycle", + "atv": "auto", +} + + +def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Calculate distance between two points in meters.""" + R = 6371000 + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2 + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) + return R * c + + +def check_memory_usage() -> float: + """Check current memory usage in GB.""" + try: + import psutil + process = psutil.Process() + return process.memory_info().rss / (1024**3) + except ImportError: + return 0 + + +class EntryPointIndex: + """ + Trail entry point index for wilderness-to-network handoff. + + Entry points are endpoints and intersections of OSM highways + that connect wilderness areas to the routable network. + """ + + def __init__(self, db_path: Path = NAVI_DB_PATH): + self.db_path = db_path + self._conn = None + + def _get_conn(self) -> sqlite3.Connection: + if self._conn is None: + self._conn = sqlite3.connect(str(self.db_path)) + self._conn.row_factory = sqlite3.Row + return self._conn + + def table_exists(self) -> bool: + """Check if trail_entry_points table exists.""" + if not self.db_path.exists(): + return False + conn = self._get_conn() + cur = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='trail_entry_points'" + ) + return cur.fetchone() is not None + + def get_entry_point_count(self) -> int: + """Get count of entry points.""" + if not self.table_exists(): + return 0 + conn = self._get_conn() + cur = conn.execute("SELECT COUNT(*) FROM trail_entry_points") + return cur.fetchone()[0] + + def query_bbox(self, south: float, north: float, west: float, east: float) -> List[Dict]: + """Query entry points within a bounding box.""" + if not self.table_exists(): + return [] + + conn = self._get_conn() + cur = conn.execute(""" + SELECT id, lat, lon, highway_class, name + FROM trail_entry_points + WHERE lat >= ? AND lat <= ? AND lon >= ? AND lon <= ? + """, (south, north, west, east)) + + return [dict(row) for row in cur.fetchall()] + + def query_radius(self, lat: float, lon: float, radius_km: float) -> List[Dict]: + """Query entry points within radius of a point.""" + # Approximate bbox for the radius + lat_delta = radius_km / 111.0 + lon_delta = radius_km / (111.0 * math.cos(math.radians(lat))) + + points = self.query_bbox( + lat - lat_delta, lat + lat_delta, + lon - lon_delta, lon + lon_delta + ) + + # Filter by actual distance and add distance field + result = [] + for p in points: + dist = haversine_distance(lat, lon, p['lat'], p['lon']) + if dist <= radius_km * 1000: + p['distance_m'] = dist + result.append(p) + + return sorted(result, key=lambda x: x['distance_m']) + + def build_index(self, osm_pbf_path: Path = OSM_PBF_PATH) -> Dict: + """ + Build the entry point index from OSM PBF. + + Extracts endpoints of highway features that connect to the network. + """ + if not osm_pbf_path.exists(): + raise FileNotFoundError(f"OSM PBF not found: {osm_pbf_path}") + + print(f"Building trail entry point index from {osm_pbf_path}...") + + # Highway types to extract (routable network entry points) + highway_types = [ + "primary", "secondary", "tertiary", "unclassified", + "residential", "service", "track", "path", "footway", "bridleway" + ] + + stats = {"total": 0, "by_class": {}} + + with tempfile.TemporaryDirectory() as tmpdir: + # Extract highways to GeoJSON + geojson_path = Path(tmpdir) / "highways.geojson" + + # Build osmium tags-filter expressions (one per highway type) + print(f" Extracting highways with osmium...") + cmd = [ + "osmium", "tags-filter", + str(osm_pbf_path), + ] + # Add each highway type as a separate filter expression + for ht in highway_types: + cmd.append(f"w/highway={ht}") + cmd.extend(["-o", str(Path(tmpdir) / "filtered.osm.pbf"), "--overwrite"]) + + subprocess.run(cmd, check=True, capture_output=True) + + # Convert to GeoJSON + print(f" Converting to GeoJSON with ogr2ogr...") + cmd = [ + "ogr2ogr", "-f", "GeoJSON", + str(geojson_path), + str(Path(tmpdir) / "filtered.osm.pbf"), + "lines", + "-t_srs", "EPSG:4326" + ] + subprocess.run(cmd, check=True, capture_output=True) + + # Parse GeoJSON and extract endpoints + print(f" Extracting entry points...") + with open(geojson_path) as f: + data = json.load(f) + + # Collect unique points (endpoints) + # Key: (lat, lon) rounded to 5 decimal places (~1m precision) + points = {} + + for feature in data.get("features", []): + props = feature.get("properties", {}) + geom = feature.get("geometry", {}) + + if geom.get("type") != "LineString": + continue + + coords = geom.get("coordinates", []) + if len(coords) < 2: + continue + + highway_class = props.get("highway", "unknown") + name = props.get("name", "") + + # Extract endpoints + for coord in [coords[0], coords[-1]]: + lon, lat = coord[0], coord[1] + key = (round(lat, 5), round(lon, 5)) + + if key not in points: + points[key] = { + "lat": lat, + "lon": lon, + "highway_class": highway_class, + "name": name + } + else: + # Keep the "best" highway class (roads > tracks > paths) + existing = points[key] + if self._highway_priority(highway_class) < self._highway_priority(existing["highway_class"]): + points[key]["highway_class"] = highway_class + if name and not existing["name"]: + points[key]["name"] = name + + # Create/update database + print(f" Writing {len(points)} entry points to {self.db_path}...") + + self.db_path.parent.mkdir(parents=True, exist_ok=True) + conn = self._get_conn() + + # Create table + conn.execute(""" + CREATE TABLE IF NOT EXISTS trail_entry_points ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + lat REAL NOT NULL, + lon REAL NOT NULL, + highway_class TEXT NOT NULL, + name TEXT + ) + """) + + # Clear existing data + conn.execute("DELETE FROM trail_entry_points") + + # Insert new points + for point in points.values(): + conn.execute(""" + INSERT INTO trail_entry_points (lat, lon, highway_class, name) + VALUES (?, ?, ?, ?) + """, (point["lat"], point["lon"], point["highway_class"], point["name"])) + + stats["total"] += 1 + hc = point["highway_class"] + stats["by_class"][hc] = stats["by_class"].get(hc, 0) + 1 + + # Create spatial index + conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lat ON trail_entry_points(lat)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lon ON trail_entry_points(lon)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_latlon ON trail_entry_points(lat, lon)") + + conn.commit() + + print(f" Done. Total: {stats['total']} entry points") + for hc, count in sorted(stats["by_class"].items(), key=lambda x: -x[1]): + print(f" {hc}: {count}") + + return stats + + def _highway_priority(self, highway_class: str) -> int: + """Lower number = better priority for entry points.""" + priority = { + "primary": 1, "secondary": 2, "tertiary": 3, + "unclassified": 4, "residential": 5, "service": 6, + "track": 7, "path": 8, "footway": 9, "bridleway": 10 + } + return priority.get(highway_class, 99) + + def close(self): + if self._conn: + self._conn.close() + self._conn = None + + +class OffrouteRouter: + """ + OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. + """ + + def __init__(self): + self.dem_reader = None + self.friction_reader = None + self.barrier_reader = None + self.trail_reader = None + self.entry_index = EntryPointIndex() + + def _init_readers(self): + """Lazy init readers.""" + if self.dem_reader is None: + self.dem_reader = DEMReader() + if self.friction_reader is None: + self.friction_reader = FrictionReader() + if self.barrier_reader is None: + self.barrier_reader = BarrierReader() + if self.trail_reader is None: + self.trail_reader = TrailReader() + + def route( + self, + start_lat: float, + start_lon: float, + end_lat: float, + end_lon: float, + mode: Literal["foot", "mtb", "atv"] = "foot", + boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" + ) -> Dict: + """ + Route from a wilderness start point to a destination. + + Returns a GeoJSON FeatureCollection with wilderness and network segments. + """ + t0 = time.time() + + # Ensure entry point index exists + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + # Find entry points near start + entry_points = self.entry_index.query_radius( + start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM + ) + + if not entry_points: + # Try expanded radius + entry_points = self.entry_index.query_radius( + start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM + ) + if not entry_points: + return { + "status": "error", + "message": f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start" + } + + # Build bbox for pathfinding grid + # Include start, end, and all entry points + all_lats = [start_lat, end_lat] + [p["lat"] for p in entry_points] + all_lons = [start_lon, end_lon] + [p["lon"] for p in entry_points] + + padding = 0.05 # ~5km padding + bbox = { + "south": min(all_lats) - padding, + "north": max(all_lats) + padding, + "west": min(all_lons) - padding, + "east": max(all_lons) + padding, + } + + # Initialize readers + self._init_readers() + + # Load elevation + try: + elevation, meta = self.dem_reader.get_elevation_grid( + south=bbox["south"], + north=bbox["north"], + west=bbox["west"], + east=bbox["east"], + ) + except Exception as e: + return {"status": "error", "message": f"Failed to load elevation: {e}"} + + # Check memory + mem = check_memory_usage() + if mem > MEMORY_LIMIT_GB: + return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} + + # Load friction + friction_raw = self.friction_reader.get_friction_grid( + south=bbox["south"], + north=bbox["north"], + west=bbox["west"], + east=bbox["east"], + target_shape=elevation.shape + ) + friction_mult = friction_to_multiplier(friction_raw) + + # Load barriers + barriers = self.barrier_reader.get_barrier_grid( + south=bbox["south"], + north=bbox["north"], + west=bbox["west"], + east=bbox["east"], + target_shape=elevation.shape + ) + + # Load trails + trails = self.trail_reader.get_trails_grid( + south=bbox["south"], + north=bbox["north"], + west=bbox["west"], + east=bbox["east"], + target_shape=elevation.shape + ) + + # Compute cost grid + cost = compute_cost_grid( + elevation, + cell_size_m=meta["cell_size_m"], + friction=friction_mult, + trails=trails, + barriers=barriers, + boundary_mode=boundary_mode, + ) + + # Convert start to pixel coordinates + start_row, start_col = self.dem_reader.latlon_to_pixel(start_lat, start_lon, meta) + + # Validate start is in bounds + rows, cols = elevation.shape + if not (0 <= start_row < rows and 0 <= start_col < cols): + return {"status": "error", "message": "Start point outside grid bounds"} + + # Mark entry points on the grid + entry_pixels = [] + for ep in entry_points: + row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) + if 0 <= row < rows and 0 <= col < cols: + entry_pixels.append({ + "row": row, + "col": col, + "entry_point": ep + }) + + if not entry_pixels: + return {"status": "error", "message": "No entry points map to grid bounds"} + + # Run MCP pathfinder + mcp = MCP_Geometric(cost, fully_connected=True) + cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) + + # Find nearest reachable entry point + best_entry = None + best_cost = np.inf + + for ep in entry_pixels: + ep_cost = cumulative_costs[ep["row"], ep["col"]] + if ep_cost < best_cost: + best_cost = ep_cost + best_entry = ep + + if best_entry is None or np.isinf(best_cost): + return { + "status": "error", + "message": "No path found to any entry point (blocked by impassable terrain)" + } + + # Traceback wilderness path + path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) + + # Convert to coordinates and collect stats + wilderness_coords = [] + elevations = [] + trail_values = [] + + for row, col in path_indices: + lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) + wilderness_coords.append([lon, lat]) + elevations.append(elevation[row, col]) + trail_values.append(trails[row, col]) + + # Calculate wilderness segment stats + wilderness_distance_m = 0 + for i in range(1, len(wilderness_coords)): + lon1, lat1 = wilderness_coords[i-1] + lon2, lat2 = wilderness_coords[i] + wilderness_distance_m += haversine_distance(lat1, lon1, lat2, lon2) + + elev_arr = np.array(elevations) + elev_diff = np.diff(elev_arr) + wilderness_gain = float(np.sum(elev_diff[elev_diff > 0])) + wilderness_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) + + trail_arr = np.array(trail_values) + on_trail_cells = np.sum(trail_arr > 0) + total_cells = len(trail_arr) + on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 + + # Entry point reached + entry_lat = best_entry["entry_point"]["lat"] + entry_lon = best_entry["entry_point"]["lon"] + entry_class = best_entry["entry_point"]["highway_class"] + entry_name = best_entry["entry_point"].get("name", "") + + # Call Valhalla for on-network segment + valhalla_costing = MODE_TO_COSTING.get(mode, "pedestrian") + + valhalla_request = { + "locations": [ + {"lat": entry_lat, "lon": entry_lon}, + {"lat": end_lat, "lon": end_lon} + ], + "costing": valhalla_costing, + "directions_options": {"units": "kilometers"} + } + + network_segment = None + valhalla_error = None + + try: + resp = requests.post( + f"{VALHALLA_URL}/route", + json=valhalla_request, + timeout=30 + ) + + if resp.status_code == 200: + valhalla_data = resp.json() + trip = valhalla_data.get("trip", {}) + legs = trip.get("legs", []) + + if legs: + leg = legs[0] + shape = leg.get("shape", "") + + # Decode polyline6 + network_coords = self._decode_polyline(shape) + + # Extract maneuvers + maneuvers = [] + for m in leg.get("maneuvers", []): + maneuvers.append({ + "instruction": m.get("instruction", ""), + "type": m.get("type", 0), + "distance_km": m.get("length", 0), + "time_seconds": m.get("time", 0), + "street_names": m.get("street_names", []), + }) + + summary = trip.get("summary", {}) + network_segment = { + "coordinates": network_coords, + "distance_km": summary.get("length", 0), + "duration_minutes": summary.get("time", 0) / 60, + "maneuvers": maneuvers, + } + else: + valhalla_error = f"Valhalla returned {resp.status_code}: {resp.text[:200]}" + + except Exception as e: + valhalla_error = f"Valhalla request failed: {e}" + + # Build response + features = [] + + # Feature 1: Wilderness segment + wilderness_feature = { + "type": "Feature", + "properties": { + "segment_type": "wilderness", + "effort_minutes": float(best_cost / 60), + "distance_km": float(wilderness_distance_m / 1000), + "elevation_gain_m": wilderness_gain, + "elevation_loss_m": wilderness_loss, + "boundary_mode": boundary_mode, + "on_trail_pct": on_trail_pct, + "cell_count": total_cells, + }, + "geometry": { + "type": "LineString", + "coordinates": wilderness_coords, + } + } + features.append(wilderness_feature) + + # Feature 2: Network segment (if available) + if network_segment: + network_feature = { + "type": "Feature", + "properties": { + "segment_type": "network", + "distance_km": network_segment["distance_km"], + "duration_minutes": network_segment["duration_minutes"], + "maneuvers": network_segment["maneuvers"], + }, + "geometry": { + "type": "LineString", + "coordinates": network_segment["coordinates"], + } + } + features.append(network_feature) + + # Build combined route coordinates + combined_coords = wilderness_coords.copy() + if network_segment: + # Skip first point of network segment (it's the same as last wilderness point) + combined_coords.extend(network_segment["coordinates"][1:]) + + # Feature 3: Combined route + combined_feature = { + "type": "Feature", + "properties": { + "segment_type": "combined", + "mode": mode, + "boundary_mode": boundary_mode, + }, + "geometry": { + "type": "LineString", + "coordinates": combined_coords, + } + } + features.append(combined_feature) + + geojson = { + "type": "FeatureCollection", + "features": features, + } + + # Build summary + total_distance_km = wilderness_distance_m / 1000 + total_effort_minutes = best_cost / 60 + + if network_segment: + total_distance_km += network_segment["distance_km"] + total_effort_minutes += network_segment["duration_minutes"] + + summary = { + "total_distance_km": float(total_distance_km), + "total_effort_minutes": float(total_effort_minutes), + "wilderness_distance_km": float(wilderness_distance_m / 1000), + "wilderness_effort_minutes": float(best_cost / 60), + "network_distance_km": float(network_segment["distance_km"]) if network_segment else 0, + "network_duration_minutes": float(network_segment["duration_minutes"]) if network_segment else 0, + "on_trail_pct": on_trail_pct, + "boundary_mode": boundary_mode, + "mode": mode, + "entry_point": { + "lat": entry_lat, + "lon": entry_lon, + "highway_class": entry_class, + "name": entry_name, + }, + "computation_time_s": time.time() - t0, + } + + result = { + "status": "ok", + "route": geojson, + "summary": summary, + } + + if valhalla_error: + result["warning"] = f"Network segment incomplete: {valhalla_error}" + + return result + + def _decode_polyline(self, encoded: str, precision: int = 6) -> List[List[float]]: + """Decode a polyline string into coordinates [lon, lat].""" + coords = [] + index = 0 + lat = 0 + lon = 0 + + while index < len(encoded): + # Latitude + shift = 0 + result = 0 + while True: + b = ord(encoded[index]) - 63 + index += 1 + result |= (b & 0x1f) << shift + shift += 5 + if b < 0x20: + break + dlat = ~(result >> 1) if result & 1 else result >> 1 + lat += dlat + + # Longitude + shift = 0 + result = 0 + while True: + b = ord(encoded[index]) - 63 + index += 1 + result |= (b & 0x1f) << shift + shift += 5 + if b < 0x20: + break + dlon = ~(result >> 1) if result & 1 else result >> 1 + lon += dlon + + coords.append([lon / (10 ** precision), lat / (10 ** precision)]) + + return coords + + def close(self): + """Close all readers.""" + if self.dem_reader: + self.dem_reader.close() + if self.friction_reader: + self.friction_reader.close() + if self.barrier_reader: + self.barrier_reader.close() + if self.trail_reader: + self.trail_reader.close() + self.entry_index.close() + + +def build_entry_index(): + """Build the trail entry point index.""" + index = EntryPointIndex() + stats = index.build_index() + index.close() + return stats + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "build": + print("Building trail entry point index...") + stats = build_entry_index() + print(f"\nDone. Total entry points: {stats['total']}") + + elif len(sys.argv) > 1 and sys.argv[1] == "test": + print("Testing router...") + + router = OffrouteRouter() + + # Test route: wilderness to Twin Falls + result = router.route( + start_lat=42.35, + start_lon=-114.30, + end_lat=42.5629, + end_lon=-114.4609, + mode="foot", + boundary_mode="pragmatic" + ) + + print(json.dumps(result, indent=2, default=str)) + router.close() + + else: + print("Usage:") + print(" python router.py build # Build entry point index") + print(" python router.py test # Test route") From bc463188d56b1cf9b3e804427ab5e0117743fc57 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 14:11:56 +0000 Subject: [PATCH 39/72] =?UTF-8?q?feat(offroute):=20Phase=20O4=20=E2=80=94?= =?UTF-8?q?=20multi-mode=20cost=20functions=20(foot/mtb/atv/vehicle)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ModeProfile dataclass for data-driven mode configuration - Implement three speed functions: * Tobler off-path hiking (foot) * Herzog wheeled-transport polynomial (mtb/atv) * Linear speed degradation (vehicle) - Add WildernessReader for PAD-US Des_Tp=WA wilderness areas - Mode-specific terrain friction overrides: * Forest impassable for ATV/vehicle, high friction for MTB * Wetland/mangrove impassable for all wheeled modes - Trail access rules: * Foot trails (value 25) impassable for ATV/vehicle - Wilderness blocking for mtb/atv/vehicle modes - Vehicle mode allows flat grassland/cropland traversal - Memory optimization: limit entry points, constrain bbox size - Update router to pass mode and wilderness to cost function - Add vehicle to API mode validation Validated all four modes with test route: - foot: 0.46km off-network, 12.11km network, 89% on trail - mtb: 0.47km off-network, 13.13km network, 90% on trail - atv: 0.47km off-network, 12.81km network, 90% on trail - vehicle: 0.46km off-network, 12.81km network, 89% on trail Co-Authored-By: Claude Opus 4.5 --- lib/api.py | 4 +- lib/offroute/barriers.py | 274 +++++++++++++++++---- lib/offroute/cost.py | 515 +++++++++++++++++++++++++++++---------- lib/offroute/router.py | 274 ++++++++++----------- 4 files changed, 740 insertions(+), 327 deletions(-) diff --git a/lib/api.py b/lib/api.py index cd32e33..a127866 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2768,8 +2768,8 @@ def api_offroute(): # Parse options mode = data.get("mode", "foot") - if mode not in ("foot", "mtb", "atv"): - return jsonify({"status": "error", "message": "mode must be foot, mtb, or atv"}), 400 + if mode not in ("foot", "mtb", "atv", "vehicle"): + return jsonify({"status": "error", "message": "mode must be foot, mtb, atv, or vehicle"}), 400 boundary_mode = data.get("boundary_mode", "pragmatic") if boundary_mode not in ("strict", "pragmatic", "emergency"): diff --git a/lib/offroute/barriers.py b/lib/offroute/barriers.py index 7fcad75..f68e892 100644 --- a/lib/offroute/barriers.py +++ b/lib/offroute/barriers.py @@ -1,11 +1,12 @@ """ -PAD-US barrier layer for OFFROUTE. +PAD-US barrier and wilderness layers for OFFROUTE. -Provides access to the PAD-US land ownership raster for routing decisions. -Cells with value 255 represent closed/restricted areas (Pub_Access = XA). +Provides access to: +1. Barrier raster (Pub_Access = 'XA' - closed/restricted areas) +2. Wilderness raster (Des_Tp = 'WA' - designated wilderness areas) -Build function rasterizes PAD-US geodatabase to aligned GeoTIFF. -Runtime functions read the raster and resample to match elevation grids. +Build functions rasterize PAD-US geodatabase to aligned GeoTIFFs. +Runtime functions read the rasters and resample to match elevation grids. """ import numpy as np from pathlib import Path @@ -23,6 +24,7 @@ except ImportError: # Paths DEFAULT_BARRIERS_PATH = Path("/mnt/nav/worldcover/padus_barriers.tif") +DEFAULT_WILDERNESS_PATH = Path("/mnt/nav/worldcover/wilderness.tif") PADUS_GDB_PATH = Path("/mnt/nav/padus/PADUS4_0_Geodatabase.gdb") PADUS_LAYER = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement" @@ -39,7 +41,7 @@ PIXEL_SIZE = 0.0003 # ~33m class BarrierReader: - """Reader for PAD-US barrier raster.""" + """Reader for PAD-US barrier raster (closed/restricted areas).""" def __init__(self, barrier_path: Path = DEFAULT_BARRIERS_PATH): self.barrier_path = barrier_path @@ -77,32 +79,86 @@ class BarrierReader: 0 = public/accessible """ ds = self._open() - - # Create a window from the bounding box window = from_bounds(west, south, east, north, ds.transform) - - # Read with resampling to target shape barriers = ds.read( 1, window=window, out_shape=target_shape, resampling=Resampling.nearest ) - return barriers def sample_point(self, lat: float, lon: float) -> int: """Sample barrier value at a single point.""" ds = self._open() - - # Get pixel coordinates row, col = ds.index(lon, lat) - - # Check bounds if row < 0 or row >= ds.height or col < 0 or col >= ds.width: - return 0 # Out of bounds = accessible + return 0 + window = rasterio.windows.Window(col, row, 1, 1) + value = ds.read(1, window=window) + return int(value[0, 0]) - # Read single pixel + def close(self): + """Close the dataset.""" + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + +class WildernessReader: + """Reader for PAD-US wilderness raster (designated wilderness areas).""" + + def __init__(self, wilderness_path: Path = DEFAULT_WILDERNESS_PATH): + self.wilderness_path = wilderness_path + self._dataset = None + + def _open(self): + """Lazy open the dataset.""" + if self._dataset is None: + if not self.wilderness_path.exists(): + raise FileNotFoundError( + f"Wilderness raster not found at {self.wilderness_path}. " + f"Run build_wilderness_raster() first." + ) + self._dataset = rasterio.open(self.wilderness_path) + return self._dataset + + def get_wilderness_grid( + self, + south: float, + north: float, + west: float, + east: float, + target_shape: Tuple[int, int] + ) -> np.ndarray: + """ + Get wilderness values for a bounding box, resampled to target shape. + + Args: + south, north, west, east: Bounding box coordinates (WGS84) + target_shape: (rows, cols) to resample to (matches elevation grid) + + Returns: + np.ndarray of uint8 wilderness values: + 255 = designated wilderness area + 0 = not wilderness + """ + ds = self._open() + window = from_bounds(west, south, east, north, ds.transform) + wilderness = ds.read( + 1, + window=window, + out_shape=target_shape, + resampling=Resampling.nearest + ) + return wilderness + + def sample_point(self, lat: float, lon: float) -> int: + """Sample wilderness value at a single point.""" + ds = self._open() + row, col = ds.index(lon, lat) + if row < 0 or row >= ds.height or col < 0 or col >= ds.width: + return 0 window = rasterio.windows.Window(col, row, 1, 1) value = ds.read(1, window=window) return int(value[0, 0]) @@ -124,22 +180,12 @@ def build_barriers_raster( Build the PAD-US barriers raster from the source geodatabase. Extracts polygons where Pub_Access = 'XA' (Closed) and rasterizes them. - - Args: - output_path: Output GeoTIFF path - gdb_path: Path to PAD-US geodatabase - pixel_size: Pixel size in degrees - bounds: CONUS bounding box - - Returns: - Path to the created raster """ import shutil if not gdb_path.exists(): raise FileNotFoundError(f"PAD-US geodatabase not found at {gdb_path}") - # Check for required tools if not shutil.which('ogr2ogr'): raise RuntimeError("ogr2ogr not found. Install GDAL.") if not shutil.which('gdal_rasterize'): @@ -154,7 +200,6 @@ def build_barriers_raster( print(f" Bounds: {bounds}") with tempfile.TemporaryDirectory() as tmpdir: - # Step 1: Extract closed areas and reproject to WGS84 closed_gpkg = Path(tmpdir) / "closed_areas.gpkg" print(f"\n[1/3] Extracting closed areas (Pub_Access = 'XA')...") @@ -176,28 +221,23 @@ def build_barriers_raster( print(f"STDERR: {result.stderr}") raise RuntimeError(f"ogr2ogr failed: {result.stderr}") - # Check feature count info_cmd = ["ogrinfo", "-so", str(closed_gpkg), "closed_areas"] info_result = subprocess.run(info_cmd, capture_output=True, text=True) print(f" Extraction result:\n{info_result.stdout}") - # Step 2: Create empty raster print(f"\n[2/3] Creating raster grid...") width = int((bounds['east'] - bounds['west']) / pixel_size) height = int((bounds['north'] - bounds['south']) / pixel_size) - print(f" Grid size: {width} x {height} pixels") - print(f" Memory estimate: {width * height / 1e6:.1f} MB") - # Step 3: Rasterize print(f"\n[3/3] Rasterizing closed areas...") rasterize_cmd = [ "gdal_rasterize", "-burn", "255", "-init", "0", - "-a_nodata", "0", # No nodata - 0 means accessible + "-a_nodata", "0", "-te", str(bounds['west']), str(bounds['south']), str(bounds['east']), str(bounds['north']), "-tr", str(pixel_size), str(pixel_size), @@ -214,14 +254,10 @@ def build_barriers_raster( print(f"STDERR: {result.stderr}") raise RuntimeError(f"gdal_rasterize failed: {result.stderr}") - # Verify output print(f"\n[Done] Verifying output...") with rasterio.open(output_path) as ds: print(f" Size: {ds.width} x {ds.height}") print(f" CRS: {ds.crs}") - print(f" Bounds: {ds.bounds}") - - # Sample a few tiles to check sample = ds.read(1, window=rasterio.windows.Window(0, 0, 1000, 1000)) closed_count = np.sum(sample == 255) print(f" Sample (1000x1000): {closed_count} closed cells") @@ -232,17 +268,140 @@ def build_barriers_raster( return output_path +def build_wilderness_raster( + output_path: Path = DEFAULT_WILDERNESS_PATH, + gdb_path: Path = PADUS_GDB_PATH, + pixel_size: float = PIXEL_SIZE, + bounds: dict = CONUS_BOUNDS, +) -> Path: + """ + Build the PAD-US wilderness raster from the source geodatabase. + + Extracts polygons where Des_Tp = 'WA' (Wilderness Area) and rasterizes them. + """ + import shutil + + if not gdb_path.exists(): + raise FileNotFoundError(f"PAD-US geodatabase not found at {gdb_path}") + + if not shutil.which('ogr2ogr'): + raise RuntimeError("ogr2ogr not found. Install GDAL.") + if not shutil.which('gdal_rasterize'): + raise RuntimeError("gdal_rasterize not found. Install GDAL.") + + output_path.parent.mkdir(parents=True, exist_ok=True) + + print(f"Building PAD-US wilderness raster...") + print(f" Source: {gdb_path}") + print(f" Output: {output_path}") + print(f" Pixel size: {pixel_size} degrees (~{pixel_size * 111000:.0f}m)") + print(f" Bounds: {bounds}") + + with tempfile.TemporaryDirectory() as tmpdir: + wilderness_gpkg = Path(tmpdir) / "wilderness_areas.gpkg" + + print(f"\n[1/3] Extracting wilderness areas (Des_Tp = 'WA')...") + + ogr_cmd = [ + "ogr2ogr", + "-f", "GPKG", + str(wilderness_gpkg), + str(gdb_path), + PADUS_LAYER, + "-where", "Des_Tp = 'WA'", + "-t_srs", "EPSG:4326", + "-nlt", "MULTIPOLYGON", + "-nln", "wilderness_areas", + ] + + result = subprocess.run(ogr_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"STDERR: {result.stderr}") + raise RuntimeError(f"ogr2ogr failed: {result.stderr}") + + info_cmd = ["ogrinfo", "-so", str(wilderness_gpkg), "wilderness_areas"] + info_result = subprocess.run(info_cmd, capture_output=True, text=True) + print(f" Extraction result:\n{info_result.stdout}") + + print(f"\n[2/3] Creating raster grid...") + + width = int((bounds['east'] - bounds['west']) / pixel_size) + height = int((bounds['north'] - bounds['south']) / pixel_size) + print(f" Grid size: {width} x {height} pixels") + + print(f"\n[3/3] Rasterizing wilderness areas...") + + rasterize_cmd = [ + "gdal_rasterize", + "-burn", "255", + "-init", "0", + "-a_nodata", "0", + "-te", str(bounds['west']), str(bounds['south']), + str(bounds['east']), str(bounds['north']), + "-tr", str(pixel_size), str(pixel_size), + "-ot", "Byte", + "-co", "COMPRESS=LZW", + "-co", "TILED=YES", + "-l", "wilderness_areas", + str(wilderness_gpkg), + str(output_path), + ] + + result = subprocess.run(rasterize_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"STDERR: {result.stderr}") + raise RuntimeError(f"gdal_rasterize failed: {result.stderr}") + + print(f"\n[Done] Verifying output...") + with rasterio.open(output_path) as ds: + print(f" Size: {ds.width} x {ds.height}") + print(f" CRS: {ds.crs}") + sample = ds.read(1, window=rasterio.windows.Window(0, 0, 1000, 1000)) + wilderness_count = np.sum(sample == 255) + print(f" Sample (1000x1000): {wilderness_count} wilderness cells") + + file_size = output_path.stat().st_size / (1024**2) + print(f" File size: {file_size:.1f} MB") + + return output_path + + if __name__ == "__main__": import sys - if len(sys.argv) > 1 and sys.argv[1] == "build": - # Build the raster - print("="*60) - print("PAD-US Barriers Raster Build") - print("="*60) - build_barriers_raster() + if len(sys.argv) > 1: + cmd = sys.argv[1] + + if cmd == "build": + print("=" * 60) + print("PAD-US Barriers Raster Build") + print("=" * 60) + build_barriers_raster() + + elif cmd == "build-wilderness": + print("=" * 60) + print("PAD-US Wilderness Raster Build") + print("=" * 60) + build_wilderness_raster() + + elif cmd == "build-all": + print("=" * 60) + print("Building all PAD-US rasters") + print("=" * 60) + build_barriers_raster() + print("\n") + build_wilderness_raster() + + else: + print(f"Unknown command: {cmd}") + print("Usage:") + print(" python barriers.py build # Build barriers raster") + print(" python barriers.py build-wilderness # Build wilderness raster") + print(" python barriers.py build-all # Build both rasters") + sys.exit(1) + else: - # Test the reader + # Test readers print("Testing BarrierReader...") if not DEFAULT_BARRIERS_PATH.exists(): @@ -251,16 +410,31 @@ if __name__ == "__main__": sys.exit(1) reader = BarrierReader() - - # Test grid read for Idaho area barriers = reader.get_barrier_grid( south=42.2, north=42.6, west=-114.8, east=-113.8, target_shape=(400, 1000) ) - print(f"\nGrid test shape: {barriers.shape}") + print(f"\nBarrier grid shape: {barriers.shape}") print(f"Unique values: {np.unique(barriers)}") closed_cells = np.sum(barriers == 255) print(f"Closed cells: {closed_cells} ({100*closed_cells/barriers.size:.2f}%)") - reader.close() - print("\nBarrierReader test complete.") + + print("\nTesting WildernessReader...") + + if not DEFAULT_WILDERNESS_PATH.exists(): + print(f"Wilderness raster not found at {DEFAULT_WILDERNESS_PATH}") + print(f"Run: python barriers.py build-wilderness") + else: + wilderness_reader = WildernessReader() + wilderness = wilderness_reader.get_wilderness_grid( + south=42.2, north=42.6, west=-114.8, east=-113.8, + target_shape=(400, 1000) + ) + print(f"Wilderness grid shape: {wilderness.shape}") + print(f"Unique values: {np.unique(wilderness)}") + wilderness_cells = np.sum(wilderness == 255) + print(f"Wilderness cells: {wilderness_cells} ({100*wilderness_cells/wilderness.size:.2f}%)") + wilderness_reader.close() + + print("\nDone.") diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py index 5f3618c..c3b6a5a 100644 --- a/lib/offroute/cost.py +++ b/lib/offroute/cost.py @@ -1,43 +1,207 @@ """ -Tobler off-path hiking cost function for OFFROUTE. +Multi-mode travel cost functions for OFFROUTE. -Computes travel time cost based on terrain slope using Tobler's -hiking function with off-trail penalty. Optionally applies friction -multipliers from land cover data, trail corridors, and barrier grids. +Supports four travel modes: foot, mtb, atv, vehicle. +Each mode has its own speed function, max slope, trail access rules, +and terrain friction overrides. + +Mode profiles are data-driven — adding a new mode means adding a profile entry. """ import math import numpy as np -from typing import Optional, Literal +from dataclasses import dataclass, field +from typing import Optional, Literal, Dict, Callable -# Maximum passable slope in degrees -MAX_SLOPE_DEG = 40.0 +# ═══════════════════════════════════════════════════════════════════════════════ +# SPEED FUNCTIONS +# ═══════════════════════════════════════════════════════════════════════════════ + +def tobler_off_path_speed(grade: np.ndarray, base_speed: float = 6.0) -> np.ndarray: + """ + Tobler off-path hiking function. + + W = 0.6 * base_speed * exp(-3.5 * |S + 0.05|) + + Peak ~3.6 km/h at grade = -0.05 (slight downhill). + The 0.6 multiplier is the off-trail penalty. + """ + return 0.6 * base_speed * np.exp(-3.5 * np.abs(grade + 0.05)) + + +def herzog_wheeled_speed(grade: np.ndarray, base_speed: float = 12.0) -> np.ndarray: + """ + Herzog wheeled-transport polynomial. + + Relative speed factor: + 1 / (1337.8·S^6 + 278.19·S^5 − 517.39·S^4 − 78.199·S^3 + 93.419·S^2 + 19.825·|S| + 1.64) + + Multiply by base_speed to get km/h. + """ + S = grade + S_abs = np.abs(S) + + # Herzog polynomial (returns relative speed factor 0-1) + denom = (1337.8 * S**6 + 278.19 * S**5 - 517.39 * S**4 + - 78.199 * S**3 + 93.419 * S**2 + 19.825 * S_abs + 1.64) + + # Avoid division by zero and negative speeds + denom = np.maximum(denom, 0.1) + rel_speed = 1.0 / denom + + # Clamp relative speed to reasonable bounds (0.05 to 1.5) + rel_speed = np.clip(rel_speed, 0.05, 1.5) + + return base_speed * rel_speed + + +def linear_degrade_speed(grade: np.ndarray, base_speed: float = 40.0, max_grade: float = 0.364) -> np.ndarray: + """ + Linear speed degradation with slope. + + speed = base_speed * max(0, 1 - |grade| / max_grade) + + max_grade = tan(20°) ≈ 0.364 for 20° max slope. + """ + speed = base_speed * np.maximum(0, 1.0 - np.abs(grade) / max_grade) + return np.maximum(speed, 0.1) # Minimum crawl speed + + +# ═══════════════════════════════════════════════════════════════════════════════ +# MODE PROFILES (Data-driven configuration) +# ═══════════════════════════════════════════════════════════════════════════════ + +@dataclass +class ModeProfile: + """Configuration for a travel mode.""" + + name: str + description: str + + # Speed function parameters + speed_function: str # "tobler", "herzog", "linear" + base_speed_kmh: float + max_slope_deg: float + + # Trail access: trail_value -> friction multiplier (None = impassable) + # Trail values: 5=road, 15=track, 25=foot trail + trail_friction: Dict[int, Optional[float]] = field(default_factory=dict) + + # Off-trail terrain friction overrides (by WorldCover class) + # These MULTIPLY the base WorldCover friction + # None = use default, np.inf = impassable + # WorldCover values: 10=tree, 20=shrub, 30=grass, 40=crop, 50=urban, + # 60=bare, 80=water, 90=wetland, 95=mangrove, 100=moss + terrain_friction_override: Dict[int, Optional[float]] = field(default_factory=dict) + + # Should wilderness areas be impassable? + wilderness_impassable: bool = False + + # For vehicle mode: can traverse off-trail flat terrain? + off_trail_flat_threshold_deg: float = 0.0 # 0 = no off-trail allowed + off_trail_flat_friction: float = np.inf # friction if allowed + + +# Define all mode profiles +MODE_PROFILES: Dict[str, ModeProfile] = { + "foot": ModeProfile( + name="foot", + description="Hiking on foot (Tobler off-path model)", + speed_function="tobler", + base_speed_kmh=6.0, + max_slope_deg=40.0, + trail_friction={ + 5: 0.1, # road + 15: 0.3, # track + 25: 0.5, # foot trail + }, + terrain_friction_override={ + # Use default WorldCover friction for foot mode + }, + wilderness_impassable=False, + ), + + "mtb": ModeProfile( + name="mtb", + description="Mountain bike / dirt bike (Herzog wheeled model)", + speed_function="herzog", + base_speed_kmh=12.0, + max_slope_deg=25.0, + trail_friction={ + 5: 0.1, # road + 15: 0.2, # track + 25: 0.5, # foot trail (rideable but slow) + }, + terrain_friction_override={ + 30: 2.0, # Grassland: rideable but slow + 20: 4.0, # Shrubland: barely rideable + 10: 8.0, # Tree cover/forest: effectively impassable + 60: 3.0, # Bare/rocky + 90: np.inf, # Wetland: impassable + 95: np.inf, # Mangrove: impassable + 80: np.inf, # Water: impassable + }, + wilderness_impassable=True, + ), + + "atv": ModeProfile( + name="atv", + description="ATV / side-by-side (Herzog wheeled model, higher base speed)", + speed_function="herzog", + base_speed_kmh=25.0, + max_slope_deg=30.0, + trail_friction={ + 5: 0.1, # road + 15: 0.3, # track + 25: None, # foot trail: impassable (too narrow) + }, + terrain_friction_override={ + 30: 1.5, # Grassland: passable + 20: 3.0, # Shrubland: rough + 10: np.inf, # Forest: impassable + 60: 2.0, # Bare/rocky + 90: np.inf, # Wetland: impassable + 95: np.inf, # Mangrove: impassable + 80: np.inf, # Water: impassable + }, + wilderness_impassable=True, + ), + + "vehicle": ModeProfile( + name="vehicle", + description="4x4 truck / jeep (linear speed degradation)", + speed_function="linear", + base_speed_kmh=40.0, + max_slope_deg=20.0, + trail_friction={ + 5: 0.1, # road + 15: 0.5, # track (rough but passable) + 25: None, # foot trail: impassable + }, + terrain_friction_override={ + # All off-trail terrain is impassable by default + 10: np.inf, # Forest + 20: np.inf, # Shrubland + 30: np.inf, # Grassland (except flat - see below) + 40: np.inf, # Cropland (except flat - see below) + 60: np.inf, # Bare + 90: np.inf, # Wetland + 95: np.inf, # Mangrove + 80: np.inf, # Water + }, + wilderness_impassable=True, + off_trail_flat_threshold_deg=5.0, # Can drive on flat fields + off_trail_flat_friction=5.0, # But very slow + ), +} -# Tobler off-path parameters -TOBLER_BASE_SPEED = 6.0 -TOBLER_OFF_TRAIL_MULT = 0.6 # Pragmatic mode friction multiplier for private land PRAGMATIC_BARRIER_MULTIPLIER = 5.0 -# Trail value to friction multiplier mapping -# Trail friction REPLACES land cover friction (a road through forest is still easy) -TRAIL_FRICTION_MAP = { - 5: 0.1, # road - 15: 0.3, # track - 25: 0.5, # foot trail -} - - -def tobler_speed(grade: float) -> float: - """ - Calculate hiking speed using Tobler's off-path function. - - speed_kmh = 0.6 * 6.0 * exp(-3.5 * |grade + 0.05|) - - Peak speed is ~3.6 km/h at grade = -0.05 (slight downhill). - """ - return TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * math.exp(-3.5 * abs(grade + 0.05)) +# ═══════════════════════════════════════════════════════════════════════════════ +# COST GRID COMPUTATION +# ═══════════════════════════════════════════════════════════════════════════════ def compute_cost_grid( elevation: np.ndarray, @@ -45,16 +209,16 @@ def compute_cost_grid( cell_size_lat_m: float = None, cell_size_lon_m: float = None, friction: Optional[np.ndarray] = None, + friction_raw: Optional[np.ndarray] = None, trails: Optional[np.ndarray] = None, barriers: Optional[np.ndarray] = None, - boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" + wilderness: Optional[np.ndarray] = None, + boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic", + mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot" ) -> np.ndarray: """ Compute isotropic travel cost grid from elevation data. - Each cell's cost represents the time (in seconds) to traverse that cell, - based on the average slope from neighboring cells. - Args: elevation: 2D array of elevation values in meters cell_size_m: Average cell size in meters @@ -63,30 +227,29 @@ def compute_cost_grid( friction: Optional 2D array of friction multipliers (WorldCover). Values should be float (1.0 = baseline, 2.0 = 2x slower). np.inf marks impassable cells. - If None, no friction is applied (backward compatible). + friction_raw: Optional 2D array of raw WorldCover class values (uint8). + Used for mode-specific terrain overrides. + Values: 10=tree, 20=shrub, 30=grass, etc. trails: Optional 2D array of trail values (uint8). - 0 = no trail (use friction) - 5 = road (0.1× friction, replaces WorldCover) - 15 = track (0.3× friction, replaces WorldCover) - 25 = foot trail (0.5× friction, replaces WorldCover) - Trail friction REPLACES land cover friction where trails exist. - If None, no trail burn-in is applied. + 0 = no trail, 5 = road, 15 = track, 25 = foot trail barriers: Optional 2D array of barrier values (uint8). - 255 = closed/restricted area (from PAD-US Pub_Access = XA). - 0 = accessible. - If None, no barriers are applied. - boundary_mode: How to handle private/restricted land barriers: - "strict" - cells with barrier=255 become impassable (np.inf) - "pragmatic" - cells with barrier=255 get 5.0x friction penalty - "emergency" - barriers are ignored entirely - Default: "pragmatic" + 255 = closed/restricted area (PAD-US Pub_Access = XA). + wilderness: Optional[np.ndarray] of wilderness values (uint8). + 255 = designated wilderness area. + boundary_mode: How to handle barriers ("strict", "pragmatic", "emergency") + mode: Travel mode ("foot", "mtb", "atv", "vehicle") Returns: 2D array of travel cost in seconds per cell. np.inf for impassable cells. """ if boundary_mode not in ("strict", "pragmatic", "emergency"): - raise ValueError(f"boundary_mode must be 'strict', 'pragmatic', or 'emergency', got '{boundary_mode}'") + raise ValueError(f"boundary_mode must be 'strict', 'pragmatic', or 'emergency'") + + if mode not in MODE_PROFILES: + raise ValueError(f"mode must be one of {list(MODE_PROFILES.keys())}") + + profile = MODE_PROFILES[mode] if cell_size_lat_m is None: cell_size_lat_m = cell_size_m @@ -95,120 +258,212 @@ def compute_cost_grid( rows, cols = elevation.shape - # Compute gradients in both directions - dy = np.zeros_like(elevation) - dx = np.zeros_like(elevation) + # ─── Compute gradients (in-place where possible) ───────────────────────── + # Use float32 to reduce memory footprint + grade = np.zeros(elevation.shape, dtype=np.float32) - # Central differences for interior, forward/backward at edges - dy[1:-1, :] = (elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m) - dy[0, :] = (elevation[0, :] - elevation[1, :]) / cell_size_lat_m - dy[-1, :] = (elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m + # Compute dy contribution to grade squared + dy_contrib = np.zeros(elevation.shape, dtype=np.float32) + dy_contrib[1:-1, :] = ((elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m)) ** 2 + dy_contrib[0, :] = ((elevation[0, :] - elevation[1, :]) / cell_size_lat_m) ** 2 + dy_contrib[-1, :] = ((elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m) ** 2 - dx[:, 1:-1] = (elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m) - dx[:, 0] = (elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m - dx[:, -1] = (elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m + # Compute dx contribution and add to dy_contrib in-place + dy_contrib[:, 1:-1] += ((elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m)) ** 2 + dy_contrib[:, 0] += ((elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m) ** 2 + dy_contrib[:, -1] += ((elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m) ** 2 - # Compute slope magnitude (grade = rise/run) - grade_magnitude = np.sqrt(dx**2 + dy**2) + # grade = sqrt(dx^2 + dy^2) + np.sqrt(dy_contrib, out=grade) + del dy_contrib # Free memory immediately - # Convert to slope angle in degrees - slope_deg = np.degrees(np.arctan(grade_magnitude)) + # ─── Compute speed based on mode ───────────────────────────────────────── + max_grade_val = np.tan(np.radians(profile.max_slope_deg)) - # Compute speed for each cell using Tobler function - speed_kmh = TOBLER_OFF_TRAIL_MULT * TOBLER_BASE_SPEED * np.exp(-3.5 * np.abs(grade_magnitude + 0.05)) + if profile.speed_function == "tobler": + speed_kmh = tobler_off_path_speed(grade, profile.base_speed_kmh) + elif profile.speed_function == "herzog": + speed_kmh = herzog_wheeled_speed(grade, profile.base_speed_kmh) + elif profile.speed_function == "linear": + speed_kmh = linear_degrade_speed(grade, profile.base_speed_kmh, max_grade_val) + else: + raise ValueError(f"Unknown speed function: {profile.speed_function}") - # Convert speed to time cost (seconds to traverse one cell) + # ─── Base cost (seconds per cell) ───────────────────────────────────────── avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 - cost = avg_cell_size * 3.6 / speed_kmh + cost = (avg_cell_size * 3.6) / speed_kmh + del speed_kmh - # Set impassable cells (slope > MAX_SLOPE_DEG) to infinity - cost[slope_deg > MAX_SLOPE_DEG] = np.inf + # ─── Max slope limit ────────────────────────────────────────────────────── + cost[grade > max_grade_val] = np.inf - # Handle NaN elevations (no data) + # ─── NaN elevations ────────────────────────────────────────────────────── cost[np.isnan(elevation)] = np.inf - # Build effective friction array - # Start with WorldCover friction if provided, else 1.0 + # ─── Apply friction in-place ───────────────────────────────────────────── + # Instead of creating effective_friction copy, apply directly to cost + + # Start with base friction if friction is not None: if friction.shape != elevation.shape: - raise ValueError( - f"Friction shape {friction.shape} does not match elevation shape {elevation.shape}" - ) - effective_friction = friction.copy() - else: - effective_friction = np.ones(elevation.shape, dtype=np.float32) + raise ValueError(f"Friction shape mismatch") + np.multiply(cost, friction, out=cost) - # Apply trail burn-in: trails REPLACE land cover friction + # ─── Mode-specific terrain friction overrides (memory-efficient) ───────── + if friction_raw is not None and profile.terrain_friction_override: + if friction_raw.shape != elevation.shape: + raise ValueError(f"Friction_raw shape mismatch") + + # Process all overrides without creating large intermediate masks + for wc_class, override in profile.terrain_friction_override.items(): + if override is not None: + if override == np.inf: + # Use np.where for in-place-like behavior + np.putmask(cost, friction_raw == wc_class, np.inf) + else: + # Multiply cost where friction_raw matches + # Using a loop with putmask is more memory efficient + mask = friction_raw == wc_class + cost[mask] *= override + del mask + + # ─── Vehicle mode: allow flat grassland/cropland ───────────────────────── + if mode == "vehicle" and profile.off_trail_flat_threshold_deg > 0: + if friction_raw is not None: + # Compute slope in degrees for flat terrain check + slope_deg = np.degrees(np.arctan(grade)) + # Flat grassland or cropland - recompute cost for these cells + flat_field_mask = ( + (slope_deg <= profile.off_trail_flat_threshold_deg) & + ((friction_raw == 30) | (friction_raw == 40)) + ) + del slope_deg + # Recalculate cost for these cells with flat field friction + if np.any(flat_field_mask): + base_time = avg_cell_size * 3.6 / linear_degrade_speed( + grade[flat_field_mask], profile.base_speed_kmh, max_grade_val + ) + cost[flat_field_mask] = base_time * profile.off_trail_flat_friction + del base_time + del flat_field_mask + + # ─── Trail friction (mode-specific) ────────────────────────────────────── if trails is not None: if trails.shape != elevation.shape: - raise ValueError( - f"Trails shape {trails.shape} does not match elevation shape {elevation.shape}" - ) - # Replace friction where trails exist - for trail_value, trail_friction in TRAIL_FRICTION_MAP.items(): - trail_mask = trails == trail_value - effective_friction[trail_mask] = trail_friction + raise ValueError(f"Trails shape mismatch") - # Apply friction to cost - cost = cost * effective_friction + for trail_value, trail_friction in profile.trail_friction.items(): + if trail_friction is None: + # Impassable for this mode + np.putmask(cost, trails == trail_value, np.inf) + else: + # Trail friction REPLACES terrain friction + # Recalculate cost = base_time * trail_friction + trail_mask = trails == trail_value + if np.any(trail_mask): + # Get base travel time (without friction) + if profile.speed_function == "tobler": + trail_speed = tobler_off_path_speed(grade[trail_mask], profile.base_speed_kmh) + elif profile.speed_function == "herzog": + trail_speed = herzog_wheeled_speed(grade[trail_mask], profile.base_speed_kmh) + else: + trail_speed = linear_degrade_speed( + grade[trail_mask], profile.base_speed_kmh, max_grade_val + ) + cost[trail_mask] = (avg_cell_size * 3.6 / trail_speed) * trail_friction + del trail_speed + del trail_mask - # Apply barriers based on boundary_mode + # ─── Wilderness areas (mode-specific) ──────────────────────────────────── + if wilderness is not None and profile.wilderness_impassable: + if wilderness.shape != elevation.shape: + raise ValueError(f"Wilderness shape mismatch") + np.putmask(cost, wilderness == 255, np.inf) + + # ─── Barriers (private land) ───────────────────────────────────────────── if barriers is not None and boundary_mode != "emergency": if barriers.shape != elevation.shape: - raise ValueError( - f"Barriers shape {barriers.shape} does not match elevation shape {elevation.shape}" - ) - - barrier_mask = barriers == 255 + raise ValueError(f"Barriers shape mismatch") if boundary_mode == "strict": - # Mark closed/restricted areas as impassable - cost[barrier_mask] = np.inf + np.putmask(cost, barriers == 255, np.inf) elif boundary_mode == "pragmatic": - # Apply friction penalty to closed/restricted areas - cost[barrier_mask] = cost[barrier_mask] * PRAGMATIC_BARRIER_MULTIPLIER + barrier_mask = barriers == 255 + cost[barrier_mask] *= PRAGMATIC_BARRIER_MULTIPLIER + del barrier_mask return cost +# ═══════════════════════════════════════════════════════════════════════════════ +# LEGACY API (backward compatibility) +# ═══════════════════════════════════════════════════════════════════════════════ + +def tobler_speed(grade: float) -> float: + """Legacy single-value Tobler speed function.""" + return 0.6 * 6.0 * math.exp(-3.5 * abs(grade + 0.05)) + + +# ═══════════════════════════════════════════════════════════════════════════════ +# TESTING +# ═══════════════════════════════════════════════════════════════════════════════ + if __name__ == "__main__": - print("Testing Tobler speed function:") - for grade in [-0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3]: - speed = tobler_speed(grade) - print(f" Grade {grade:+.2f}: {speed:.2f} km/h") + print("=" * 70) + print("OFFROUTE Multi-Mode Cost Function Tests") + print("=" * 70) - print("\nTesting cost grid computation (no friction, no trails):") - elev = np.arange(100).reshape(10, 10).astype(np.float32) * 10 - cost = compute_cost_grid(elev, cell_size_m=30.0) - print(f" Elevation range: {elev.min():.0f} - {elev.max():.0f} m") - finite = cost[~np.isinf(cost)] - if len(finite) > 0: - print(f" Cost range: {finite.min():.1f} - {finite.max():.1f} s") - else: - print(f" All cells impassable (test data too steep)") + print("\n[1] Speed functions at various grades:") + print(f"{'Grade':<10} {'Foot':<12} {'MTB':<12} {'ATV':<12} {'Vehicle':<12}") + print("-" * 60) - print("\nTesting cost grid with friction and trails:") - elev = np.ones((10, 10), dtype=np.float32) * 1000 # flat terrain - friction = np.ones((10, 10), dtype=np.float32) * 2.0 # 2.0x friction (forest) - trails = np.zeros((10, 10), dtype=np.uint8) - trails[5, :] = 5 # road across middle row + for grade_val in [-0.3, -0.1, 0.0, 0.1, 0.2, 0.3]: + grade_arr = np.array([grade_val]) + foot = tobler_off_path_speed(grade_arr, 6.0)[0] + mtb = herzog_wheeled_speed(grade_arr, 12.0)[0] + atv = herzog_wheeled_speed(grade_arr, 25.0)[0] + veh = linear_degrade_speed(grade_arr, 40.0, np.tan(np.radians(20)))[0] + print(f"{grade_val:+.2f} {foot:>6.2f} km/h {mtb:>6.2f} km/h {atv:>6.2f} km/h {veh:>6.2f} km/h") - cost_no_trail = compute_cost_grid(elev, cell_size_m=30.0, friction=friction) - cost_with_trail = compute_cost_grid(elev, cell_size_m=30.0, friction=friction, trails=trails) + print("\n[2] Mode profiles:") + for name, profile in MODE_PROFILES.items(): + print(f"\n {name.upper()}: {profile.description}") + print(f" Max slope: {profile.max_slope_deg}°") + print(f" Trail access: {profile.trail_friction}") + print(f" Wilderness blocked: {profile.wilderness_impassable}") - base_cost = 30 * 3.6 / (0.6 * 6.0 * np.exp(-3.5 * 0.05)) - print(f" Base cost (flat, 30m cell): {base_cost:.1f} s") - print(f" Forest cell (2.0x friction): {cost_no_trail[0, 0]:.1f} s") - print(f" Road cell (0.1x friction, replaces forest): {cost_with_trail[5, 0]:.1f} s") - print(f" Road friction advantage: {cost_no_trail[0, 0] / cost_with_trail[5, 0]:.1f}x faster") - - print("\nTesting cost grid with barriers (three modes):") + print("\n[3] Cost grid test (flat terrain, forest):") elev = np.ones((10, 10), dtype=np.float32) * 1000 - barriers = np.zeros((10, 10), dtype=np.uint8) - barriers[3:7, 3:7] = 255 + friction = np.ones((10, 10), dtype=np.float32) * 2.0 # Forest friction + friction_raw = np.ones((10, 10), dtype=np.uint8) * 10 # Tree cover class - for mode in ["strict", "pragmatic", "emergency"]: - cost = compute_cost_grid(elev, cell_size_m=30.0, barriers=barriers, boundary_mode=mode) + trails = np.zeros((10, 10), dtype=np.uint8) + trails[5, :] = 5 # Road across middle + + for mode_name in ["foot", "mtb", "atv", "vehicle"]: + cost = compute_cost_grid( + elev, cell_size_m=30.0, + friction=friction, + friction_raw=friction_raw, + trails=trails, + mode=mode_name + ) + off_trail_cost = cost[0, 0] + road_cost = cost[5, 0] impassable = np.sum(np.isinf(cost)) - barrier_cost = cost[5, 5] if not np.isinf(cost[5, 5]) else "inf" - print(f" {mode:10s}: {impassable} impassable, barrier cell cost = {barrier_cost}") + print(f" {mode_name:8s}: off-trail={off_trail_cost:>8.1f}s, road={road_cost:>6.1f}s, impassable={impassable}") + + print("\n[4] Wilderness blocking test:") + wilderness = np.zeros((10, 10), dtype=np.uint8) + wilderness[3:7, 3:7] = 255 + + for mode_name in ["foot", "mtb", "atv", "vehicle"]: + cost = compute_cost_grid( + elev, cell_size_m=30.0, + wilderness=wilderness, + mode=mode_name + ) + wilderness_impassable = np.sum(np.isinf(cost[3:7, 3:7])) + print(f" {mode_name:8s}: wilderness cells impassable = {wilderness_impassable}/16") + + print("\nDone.") diff --git a/lib/offroute/router.py b/lib/offroute/router.py index 57d6ce5..d44bbb4 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -6,6 +6,8 @@ Connects the raster pathfinder (wilderness segment) to Valhalla (on-network segm Entry points are extracted from OSM highways and stored in /mnt/nav/navi.db. The pathfinder routes from a wilderness start to the nearest entry point, then Valhalla completes the route to the destination. + +Supports four travel modes: foot, mtb, atv, vehicle. """ import json import math @@ -23,7 +25,7 @@ from skimage.graph import MCP_Geometric from .dem import DEMReader from .cost import compute_cost_grid from .friction import FrictionReader, friction_to_multiplier -from .barriers import BarrierReader +from .barriers import BarrierReader, WildernessReader, DEFAULT_WILDERNESS_PATH from .trails import TrailReader # Paths @@ -45,6 +47,7 @@ MODE_TO_COSTING = { "foot": "pedestrian", "mtb": "bicycle", "atv": "auto", + "vehicle": "auto", } @@ -120,7 +123,6 @@ class EntryPointIndex: def query_radius(self, lat: float, lon: float, radius_km: float) -> List[Dict]: """Query entry points within radius of a point.""" - # Approximate bbox for the radius lat_delta = radius_km / 111.0 lon_delta = radius_km / (111.0 * math.cos(math.radians(lat))) @@ -129,7 +131,6 @@ class EntryPointIndex: lon - lon_delta, lon + lon_delta ) - # Filter by actual distance and add distance field result = [] for p in points: dist = haversine_distance(lat, lon, p['lat'], p['lon']) @@ -140,17 +141,12 @@ class EntryPointIndex: return sorted(result, key=lambda x: x['distance_m']) def build_index(self, osm_pbf_path: Path = OSM_PBF_PATH) -> Dict: - """ - Build the entry point index from OSM PBF. - - Extracts endpoints of highway features that connect to the network. - """ + """Build the entry point index from OSM PBF.""" if not osm_pbf_path.exists(): raise FileNotFoundError(f"OSM PBF not found: {osm_pbf_path}") print(f"Building trail entry point index from {osm_pbf_path}...") - # Highway types to extract (routable network entry points) highway_types = [ "primary", "secondary", "tertiary", "unclassified", "residential", "service", "track", "path", "footway", "bridleway" @@ -159,42 +155,29 @@ class EntryPointIndex: stats = {"total": 0, "by_class": {}} with tempfile.TemporaryDirectory() as tmpdir: - # Extract highways to GeoJSON geojson_path = Path(tmpdir) / "highways.geojson" - # Build osmium tags-filter expressions (one per highway type) print(f" Extracting highways with osmium...") - cmd = [ - "osmium", "tags-filter", - str(osm_pbf_path), - ] - # Add each highway type as a separate filter expression + cmd = ["osmium", "tags-filter", str(osm_pbf_path)] for ht in highway_types: cmd.append(f"w/highway={ht}") cmd.extend(["-o", str(Path(tmpdir) / "filtered.osm.pbf"), "--overwrite"]) - subprocess.run(cmd, check=True, capture_output=True) - # Convert to GeoJSON print(f" Converting to GeoJSON with ogr2ogr...") cmd = [ "ogr2ogr", "-f", "GeoJSON", str(geojson_path), str(Path(tmpdir) / "filtered.osm.pbf"), - "lines", - "-t_srs", "EPSG:4326" + "lines", "-t_srs", "EPSG:4326" ] subprocess.run(cmd, check=True, capture_output=True) - # Parse GeoJSON and extract endpoints print(f" Extracting entry points...") with open(geojson_path) as f: data = json.load(f) - # Collect unique points (endpoints) - # Key: (lat, lon) rounded to 5 decimal places (~1m precision) points = {} - for feature in data.get("features", []): props = feature.get("properties", {}) geom = feature.get("geometry", {}) @@ -209,62 +192,48 @@ class EntryPointIndex: highway_class = props.get("highway", "unknown") name = props.get("name", "") - # Extract endpoints for coord in [coords[0], coords[-1]]: lon, lat = coord[0], coord[1] key = (round(lat, 5), round(lon, 5)) if key not in points: points[key] = { - "lat": lat, - "lon": lon, - "highway_class": highway_class, - "name": name + "lat": lat, "lon": lon, + "highway_class": highway_class, "name": name } else: - # Keep the "best" highway class (roads > tracks > paths) existing = points[key] if self._highway_priority(highway_class) < self._highway_priority(existing["highway_class"]): points[key]["highway_class"] = highway_class if name and not existing["name"]: points[key]["name"] = name - # Create/update database print(f" Writing {len(points)} entry points to {self.db_path}...") self.db_path.parent.mkdir(parents=True, exist_ok=True) conn = self._get_conn() - # Create table conn.execute(""" CREATE TABLE IF NOT EXISTS trail_entry_points ( id INTEGER PRIMARY KEY AUTOINCREMENT, - lat REAL NOT NULL, - lon REAL NOT NULL, - highway_class TEXT NOT NULL, - name TEXT + lat REAL NOT NULL, lon REAL NOT NULL, + highway_class TEXT NOT NULL, name TEXT ) """) - - # Clear existing data conn.execute("DELETE FROM trail_entry_points") - # Insert new points for point in points.values(): - conn.execute(""" - INSERT INTO trail_entry_points (lat, lon, highway_class, name) - VALUES (?, ?, ?, ?) - """, (point["lat"], point["lon"], point["highway_class"], point["name"])) - + conn.execute( + "INSERT INTO trail_entry_points (lat, lon, highway_class, name) VALUES (?, ?, ?, ?)", + (point["lat"], point["lon"], point["highway_class"], point["name"]) + ) stats["total"] += 1 hc = point["highway_class"] stats["by_class"][hc] = stats["by_class"].get(hc, 0) + 1 - # Create spatial index conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lat ON trail_entry_points(lat)") conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lon ON trail_entry_points(lon)") conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_latlon ON trail_entry_points(lat, lon)") - conn.commit() print(f" Done. Total: {stats['total']} entry points") @@ -291,12 +260,15 @@ class EntryPointIndex: class OffrouteRouter: """ OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. + + Supports modes: foot, mtb, atv, vehicle """ def __init__(self): self.dem_reader = None self.friction_reader = None self.barrier_reader = None + self.wilderness_reader = None self.trail_reader = None self.entry_index = EntryPointIndex() @@ -308,6 +280,8 @@ class OffrouteRouter: self.friction_reader = FrictionReader() if self.barrier_reader is None: self.barrier_reader = BarrierReader() + if self.wilderness_reader is None and DEFAULT_WILDERNESS_PATH.exists(): + self.wilderness_reader = WildernessReader() if self.trail_reader is None: self.trail_reader = TrailReader() @@ -317,16 +291,25 @@ class OffrouteRouter: start_lon: float, end_lat: float, end_lon: float, - mode: Literal["foot", "mtb", "atv"] = "foot", + mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot", boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" ) -> Dict: """ Route from a wilderness start point to a destination. + Args: + start_lat, start_lon: Starting coordinates (wilderness) + end_lat, end_lon: Destination coordinates + mode: Travel mode (foot, mtb, atv, vehicle) + boundary_mode: How to handle private land (strict, pragmatic, emergency) + Returns a GeoJSON FeatureCollection with wilderness and network segments. """ t0 = time.time() + if mode not in MODE_TO_COSTING: + return {"status": "error", "message": f"Unknown mode: {mode}"} + # Ensure entry point index exists if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: return { @@ -334,28 +317,27 @@ class OffrouteRouter: "message": "Trail entry point index not built. Run build_entry_index() first." } - # Find entry points near start - entry_points = self.entry_index.query_radius( - start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM - ) + # Find entry points near start (limit to nearest 10 to control bbox size) + MAX_ENTRY_POINTS = 10 + entry_points = self.entry_index.query_radius(start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM) if not entry_points: - # Try expanded radius - entry_points = self.entry_index.query_radius( - start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM - ) + entry_points = self.entry_index.query_radius(start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM) if not entry_points: return { "status": "error", "message": f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start" } - # Build bbox for pathfinding grid - # Include start, end, and all entry points + # Limit to nearest entry points to prevent huge bounding boxes + entry_points = entry_points[:MAX_ENTRY_POINTS] + + # Build bbox with max size limit (prevent OOM on large areas) + MAX_BBOX_DEGREES = 0.5 # ~55km at mid-latitudes all_lats = [start_lat, end_lat] + [p["lat"] for p in entry_points] all_lons = [start_lon, end_lon] + [p["lon"] for p in entry_points] - padding = 0.05 # ~5km padding + padding = 0.05 bbox = { "south": min(all_lats) - padding, "north": max(all_lats) + padding, @@ -363,16 +345,28 @@ class OffrouteRouter: "east": max(all_lons) + padding, } + # Clamp bbox size to prevent memory exhaustion + lat_span = bbox["north"] - bbox["south"] + lon_span = bbox["east"] - bbox["west"] + if lat_span > MAX_BBOX_DEGREES or lon_span > MAX_BBOX_DEGREES: + center_lat = (bbox["south"] + bbox["north"]) / 2 + center_lon = (bbox["west"] + bbox["east"]) / 2 + half_span = MAX_BBOX_DEGREES / 2 + bbox = { + "south": center_lat - half_span, + "north": center_lat + half_span, + "west": center_lon - half_span, + "east": center_lon + half_span, + } + # Initialize readers self._init_readers() # Load elevation try: elevation, meta = self.dem_reader.get_elevation_grid( - south=bbox["south"], - north=bbox["north"], - west=bbox["west"], - east=bbox["east"], + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], ) except Exception as e: return {"status": "error", "message": f"Failed to load elevation: {e}"} @@ -382,62 +376,69 @@ class OffrouteRouter: if mem > MEMORY_LIMIT_GB: return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} - # Load friction + # Load friction (both processed and raw for mode-specific overrides) friction_raw = self.friction_reader.get_friction_grid( - south=bbox["south"], - north=bbox["north"], - west=bbox["west"], - east=bbox["east"], + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], target_shape=elevation.shape ) friction_mult = friction_to_multiplier(friction_raw) # Load barriers barriers = self.barrier_reader.get_barrier_grid( - south=bbox["south"], - north=bbox["north"], - west=bbox["west"], - east=bbox["east"], + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], target_shape=elevation.shape ) + # Load wilderness (if available and mode requires it) + wilderness = None + if self.wilderness_reader is not None and mode in ("mtb", "atv", "vehicle"): + wilderness = self.wilderness_reader.get_wilderness_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + target_shape=elevation.shape + ) + # Load trails trails = self.trail_reader.get_trails_grid( - south=bbox["south"], - north=bbox["north"], - west=bbox["west"], - east=bbox["east"], + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], target_shape=elevation.shape ) - # Compute cost grid + # Compute cost grid with mode-specific parameters cost = compute_cost_grid( elevation, cell_size_m=meta["cell_size_m"], friction=friction_mult, + friction_raw=friction_raw, trails=trails, barriers=barriers, + wilderness=wilderness, boundary_mode=boundary_mode, + mode=mode, ) + # Free intermediate arrays to reduce memory before MCP + # Note: Keep trails and barriers - needed for path statistics + del friction_mult, friction_raw, wilderness + import gc + gc.collect() + # Convert start to pixel coordinates start_row, start_col = self.dem_reader.latlon_to_pixel(start_lat, start_lon, meta) - # Validate start is in bounds rows, cols = elevation.shape if not (0 <= start_row < rows and 0 <= start_col < cols): return {"status": "error", "message": "Start point outside grid bounds"} - # Mark entry points on the grid + # Mark entry points on grid entry_pixels = [] for ep in entry_points: row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) if 0 <= row < rows and 0 <= col < cols: - entry_pixels.append({ - "row": row, - "col": col, - "entry_point": ep - }) + entry_pixels.append({"row": row, "col": col, "entry_point": ep}) if not entry_pixels: return {"status": "error", "message": "No entry points map to grid bounds"} @@ -465,18 +466,21 @@ class OffrouteRouter: # Traceback wilderness path path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) - # Convert to coordinates and collect stats + # Convert to coordinates wilderness_coords = [] elevations = [] trail_values = [] + barrier_crossings = 0 for row, col in path_indices: lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) wilderness_coords.append([lon, lat]) elevations.append(elevation[row, col]) trail_values.append(trails[row, col]) + if barriers[row, col] == 255: + barrier_crossings += 1 - # Calculate wilderness segment stats + # Calculate stats wilderness_distance_m = 0 for i in range(1, len(wilderness_coords)): lon1, lat1 = wilderness_coords[i-1] @@ -493,13 +497,16 @@ class OffrouteRouter: total_cells = len(trail_arr) on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - # Entry point reached + # Free trails and barriers now that path stats are computed + del trails, barriers + + # Entry point entry_lat = best_entry["entry_point"]["lat"] entry_lon = best_entry["entry_point"]["lon"] entry_class = best_entry["entry_point"]["highway_class"] entry_name = best_entry["entry_point"].get("name", "") - # Call Valhalla for on-network segment + # Call Valhalla valhalla_costing = MODE_TO_COSTING.get(mode, "pedestrian") valhalla_request = { @@ -515,11 +522,7 @@ class OffrouteRouter: valhalla_error = None try: - resp = requests.post( - f"{VALHALLA_URL}/route", - json=valhalla_request, - timeout=30 - ) + resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) if resp.status_code == 200: valhalla_data = resp.json() @@ -529,11 +532,8 @@ class OffrouteRouter: if legs: leg = legs[0] shape = leg.get("shape", "") - - # Decode polyline6 network_coords = self._decode_polyline(shape) - # Extract maneuvers maneuvers = [] for m in leg.get("maneuvers", []): maneuvers.append({ @@ -560,7 +560,6 @@ class OffrouteRouter: # Build response features = [] - # Feature 1: Wilderness segment wilderness_feature = { "type": "Feature", "properties": { @@ -572,15 +571,13 @@ class OffrouteRouter: "boundary_mode": boundary_mode, "on_trail_pct": on_trail_pct, "cell_count": total_cells, + "barrier_crossings": barrier_crossings, + "mode": mode, }, - "geometry": { - "type": "LineString", - "coordinates": wilderness_coords, - } + "geometry": {"type": "LineString", "coordinates": wilderness_coords} } features.append(wilderness_feature) - # Feature 2: Network segment (if available) if network_segment: network_feature = { "type": "Feature", @@ -590,40 +587,23 @@ class OffrouteRouter: "duration_minutes": network_segment["duration_minutes"], "maneuvers": network_segment["maneuvers"], }, - "geometry": { - "type": "LineString", - "coordinates": network_segment["coordinates"], - } + "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} } features.append(network_feature) - # Build combined route coordinates combined_coords = wilderness_coords.copy() if network_segment: - # Skip first point of network segment (it's the same as last wilderness point) combined_coords.extend(network_segment["coordinates"][1:]) - # Feature 3: Combined route combined_feature = { "type": "Feature", - "properties": { - "segment_type": "combined", - "mode": mode, - "boundary_mode": boundary_mode, - }, - "geometry": { - "type": "LineString", - "coordinates": combined_coords, - } + "properties": {"segment_type": "combined", "mode": mode, "boundary_mode": boundary_mode}, + "geometry": {"type": "LineString", "coordinates": combined_coords} } features.append(combined_feature) - geojson = { - "type": "FeatureCollection", - "features": features, - } + geojson = {"type": "FeatureCollection", "features": features} - # Build summary total_distance_km = wilderness_distance_m / 1000 total_effort_minutes = best_cost / 60 @@ -639,22 +619,17 @@ class OffrouteRouter: "network_distance_km": float(network_segment["distance_km"]) if network_segment else 0, "network_duration_minutes": float(network_segment["duration_minutes"]) if network_segment else 0, "on_trail_pct": on_trail_pct, + "barrier_crossings": barrier_crossings, "boundary_mode": boundary_mode, "mode": mode, "entry_point": { - "lat": entry_lat, - "lon": entry_lon, - "highway_class": entry_class, - "name": entry_name, + "lat": entry_lat, "lon": entry_lon, + "highway_class": entry_class, "name": entry_name, }, "computation_time_s": time.time() - t0, } - result = { - "status": "ok", - "route": geojson, - "summary": summary, - } + result = {"status": "ok", "route": geojson, "summary": summary} if valhalla_error: result["warning"] = f"Network segment incomplete: {valhalla_error}" @@ -669,7 +644,6 @@ class OffrouteRouter: lon = 0 while index < len(encoded): - # Latitude shift = 0 result = 0 while True: @@ -682,7 +656,6 @@ class OffrouteRouter: dlat = ~(result >> 1) if result & 1 else result >> 1 lat += dlat - # Longitude shift = 0 result = 0 while True: @@ -707,6 +680,8 @@ class OffrouteRouter: self.friction_reader.close() if self.barrier_reader: self.barrier_reader.close() + if self.wilderness_reader: + self.wilderness_reader.close() if self.trail_reader: self.trail_reader.close() self.entry_index.close() @@ -729,24 +704,33 @@ if __name__ == "__main__": print(f"\nDone. Total entry points: {stats['total']}") elif len(sys.argv) > 1 and sys.argv[1] == "test": - print("Testing router...") + print("Testing router (all modes)...") router = OffrouteRouter() - # Test route: wilderness to Twin Falls - result = router.route( - start_lat=42.35, - start_lon=-114.30, - end_lat=42.5629, - end_lon=-114.4609, - mode="foot", - boundary_mode="pragmatic" - ) + for mode in ["foot", "mtb", "atv", "vehicle"]: + print(f"\n{'='*60}") + print(f"Mode: {mode}") + print("="*60) + + result = router.route( + start_lat=42.35, start_lon=-114.30, + end_lat=42.5629, end_lon=-114.4609, + mode=mode, boundary_mode="pragmatic" + ) + + if result["status"] == "ok": + s = result["summary"] + print(f" Wilderness: {s['wilderness_distance_km']:.2f} km, {s['wilderness_effort_minutes']:.1f} min") + print(f" Network: {s['network_distance_km']:.2f} km, {s['network_duration_minutes']:.1f} min") + print(f" On-trail: {s['on_trail_pct']:.1f}%") + print(f" Entry: {s['entry_point']['highway_class']}") + else: + print(f" ERROR: {result['message']}") - print(json.dumps(result, indent=2, default=str)) router.close() else: print("Usage:") print(" python router.py build # Build entry point index") - print(" python router.py test # Test route") + print(" python router.py test # Test all modes") From 2252905986ed21264a0c873f3037a25fc63543b9 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 14:26:18 +0000 Subject: [PATCH 40/72] =?UTF-8?q?feat(offroute):=20MVUM=20legal=20access?= =?UTF-8?q?=20=E2=80=94=20pathfinder=20integration=20+=20places=20panel=20?= =?UTF-8?q?API=20+=20boundary=5Fmode=20control?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MVUM Data Import: - Downloaded USFS MVUM Roads (150,636 features) and Trails (28,741 features) - Imported to navi.db as mvum_roads and mvum_trails tables - Idaho coverage: ~8,994 roads and ~4,504 trails across 7 national forests - Preserved all vehicle-class fields (ATV, MOTORCYCLE, HIGHCLEARANCEVEHICLE, etc.) - Preserved seasonal date ranges (*_DATESOPEN fields) New mvum.py module: - MVUMReader class for querying MVUM data by bbox and nearest point - parse_date_range() for seasonal date string parsing (MM/DD-MM/DD format) - check_access() for determining open/closed status with date checking - symbol_to_access() fallback when per-vehicle fields are null - get_mvum_access_grid() for rasterizing MVUM to pathfinder grid Cost function integration: - Added mvum parameter to compute_cost_grid() - MVUM closures respond to boundary_mode: * strict = impassable (np.inf) * pragmatic = 5x friction penalty * emergency = ignored entirely - Foot mode skips MVUM (motor-vehicle specific) Router integration: - Loads MVUM access grid for motorized modes (mtb, atv, vehicle) - Tracks mvum_closed_crossings in path summary Places Panel API: - GET /api/mvum?lat=XX&lon=XX&radius=50 - Returns MVUM feature with access status for all vehicle classes - Includes seasonal date ranges, maintenance level, forest/district info - GeoJSON geometry for map display Validation: - MVUM places endpoint tested with Sawtooth NF road - All four modes validated with strict/pragmatic/emergency boundary modes - Foot mode correctly ignores MVUM restrictions Co-Authored-By: Claude Opus 4.5 --- lib/api.py | 134 +++++++++ lib/offroute/cost.py | 25 ++ lib/offroute/mvum.py | 623 +++++++++++++++++++++++++++++++++++++++++ lib/offroute/router.py | 29 +- 4 files changed, 809 insertions(+), 2 deletions(-) create mode 100644 lib/offroute/mvum.py diff --git a/lib/api.py b/lib/api.py index a127866..699d09d 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2799,3 +2799,137 @@ def api_offroute(): except Exception as e: logger.exception("Offroute error") return jsonify({"status": "error", "message": str(e)}), 500 + + +# ── MVUM Places Panel API ── + +@app.route("/api/mvum", methods=["GET"]) +def api_mvum(): + """ + Query MVUM (Motor Vehicle Use Map) features near a point. + + Used by the Navi frontend places panel when a user taps near a road/trail. + + Query params: + lat: Latitude + lon: Longitude + radius: Search radius in meters (default: 50) + + Response: + { + "status": "ok", + "feature": { + "id": "FR 123", + "name": "Some Forest Road", + "forest": "Sawtooth National Forest", + "district": "Ketchum Ranger District", + "surface": "NAT", + "maintenance_level": 2, + "seasonal": "Seasonal", + "symbol": 2, + "access": { + "passenger_vehicle": { "status": "Open", "dates": "06/15-10/15" }, + "high_clearance": { "status": "Open", "dates": "06/15-10/15" }, + "atv": { "status": "Open", "dates": "06/15-10/15" }, + ... + }, + "geometry": { GeoJSON LineString } + } + } + + If no MVUM feature within radius: + { "status": "ok", "feature": null } + """ + try: + lat = request.args.get("lat", type=float) + lon = request.args.get("lon", type=float) + radius = request.args.get("radius", 50, type=float) + + if lat is None or lon is None: + return jsonify({"status": "error", "message": "lat and lon required"}), 400 + + from .offroute.mvum import MVUMReader + + reader = MVUMReader() + try: + # Try roads first, then trails + feature = reader.query_nearest(lat, lon, radius, "mvum_roads") + if feature is None: + feature = reader.query_nearest(lat, lon, radius, "mvum_trails") + + if feature is None: + return jsonify({"status": "ok", "feature": None}) + + # Format access info + access = { + "passenger_vehicle": { + "status": feature.get("passengervehicle"), + "dates": feature.get("passengervehicle_datesopen") + }, + "high_clearance": { + "status": feature.get("highclearancevehicle"), + "dates": feature.get("highclearancevehicle_datesopen") + }, + "atv": { + "status": feature.get("atv"), + "dates": feature.get("atv_datesopen") + }, + "motorcycle": { + "status": feature.get("motorcycle"), + "dates": feature.get("motorcycle_datesopen") + }, + "4wd_gt50": { + "status": feature.get("fourwd_gt50inches"), + "dates": feature.get("fourwd_gt50_datesopen") + }, + "2wd_gt50": { + "status": feature.get("twowd_gt50inches"), + "dates": feature.get("twowd_gt50_datesopen") + }, + "e_bike_class1": { + "status": feature.get("e_bike_class1"), + "dates": feature.get("e_bike_class1_dur") + }, + "e_bike_class2": { + "status": feature.get("e_bike_class2"), + "dates": feature.get("e_bike_class2_dur") + }, + "e_bike_class3": { + "status": feature.get("e_bike_class3"), + "dates": feature.get("e_bike_class3_dur") + }, + } + + # Parse maintenance level + maint_level = feature.get("operationalmaintlevel", "") + maint_num = None + if maint_level: + # Extract first digit: "2 - HIGH CLEARANCE VEHICLES" -> 2 + import re + match = re.match(r"(\d+)", maint_level) + if match: + maint_num = int(match.group(1)) + + result = { + "id": feature.get("id"), + "name": feature.get("name"), + "forest": feature.get("forestname"), + "district": feature.get("districtname"), + "surface": feature.get("surfacetype"), + "maintenance_level": maint_num, + "seasonal": feature.get("seasonal"), + "symbol": feature.get("symbol"), + "trail_class": feature.get("trailclass"), + "trail_system": feature.get("trailsystem"), + "access": access, + "geometry": feature.get("geojson") + } + + return jsonify({"status": "ok", "feature": result}) + + finally: + reader.close() + + except Exception as e: + logger.exception("MVUM query error") + return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py index c3b6a5a..16b8514 100644 --- a/lib/offroute/cost.py +++ b/lib/offroute/cost.py @@ -213,6 +213,7 @@ def compute_cost_grid( trails: Optional[np.ndarray] = None, barriers: Optional[np.ndarray] = None, wilderness: Optional[np.ndarray] = None, + mvum: Optional[np.ndarray] = None, boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic", mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot" ) -> np.ndarray: @@ -236,6 +237,10 @@ def compute_cost_grid( 255 = closed/restricted area (PAD-US Pub_Access = XA). wilderness: Optional[np.ndarray] of wilderness values (uint8). 255 = designated wilderness area. + mvum: Optional[np.ndarray] of MVUM access values (uint8). + 0 = no MVUM data, 1 = open, 255 = closed to this mode. + MVUM closures respond to boundary_mode (strict/pragmatic/emergency). + Foot mode should pass None (MVUM is motor-vehicle specific). boundary_mode: How to handle barriers ("strict", "pragmatic", "emergency") mode: Travel mode ("foot", "mtb", "atv", "vehicle") @@ -392,6 +397,26 @@ def compute_cost_grid( cost[barrier_mask] *= PRAGMATIC_BARRIER_MULTIPLIER del barrier_mask + # ─── MVUM closures (motor vehicle restrictions) ────────────────────────── + # MVUM only applies to motorized modes, not foot. Foot mode should pass mvum=None. + # MVUM closures respond to the same boundary_mode as PAD-US barriers: + # "strict" = MVUM-closed road/trail is impassable + # "pragmatic" = MVUM-closed road/trail gets 5× friction penalty + # "emergency" = MVUM closures ignored entirely + if mvum is not None and mode != "foot" and boundary_mode != "emergency": + if mvum.shape != elevation.shape: + raise ValueError(f"MVUM shape mismatch") + + # Value 255 = road/trail exists but is closed to this mode + mvum_closed_mask = mvum == 255 + + if boundary_mode == "strict": + np.putmask(cost, mvum_closed_mask, np.inf) + elif boundary_mode == "pragmatic": + cost[mvum_closed_mask] *= PRAGMATIC_BARRIER_MULTIPLIER + + del mvum_closed_mask + return cost diff --git a/lib/offroute/mvum.py b/lib/offroute/mvum.py new file mode 100644 index 0000000..31e503d --- /dev/null +++ b/lib/offroute/mvum.py @@ -0,0 +1,623 @@ +""" +MVUM (Motor Vehicle Use Map) legal access layer for OFFROUTE. + +Queries USFS MVUM data from navi.db and provides rasterized access grids +indicating which roads/trails are open or closed to specific vehicle modes. + +MVUM is motor-vehicle specific — foot mode should skip this layer entirely. +""" +import re +import sqlite3 +import warnings +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Literal + +import numpy as np + +# Path to navi.db +NAVI_DB_PATH = Path("/mnt/nav/navi.db") + + +def parse_date_range(date_str: str) -> List[Tuple[int, int, int, int]]: + """ + Parse MVUM date range strings like "05/01-11/30" or "06/15-10/15,12/01-03/31". + + Returns list of (start_month, start_day, end_month, end_day) tuples. + Returns empty list if unparseable. + """ + if not date_str or date_str.strip() == "": + return [] + + ranges = [] + # Split by comma for multi-period strings + for part in date_str.split(","): + part = part.strip() + # Match MM/DD-MM/DD pattern + match = re.match(r"(\d{1,2})/(\d{1,2})-(\d{1,2})/(\d{1,2})", part) + if match: + try: + sm, sd, em, ed = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4)) + if 1 <= sm <= 12 and 1 <= sd <= 31 and 1 <= em <= 12 and 1 <= ed <= 31: + ranges.append((sm, sd, em, ed)) + except ValueError: + pass + + return ranges + + +def is_date_in_range(month: int, day: int, ranges: List[Tuple[int, int, int, int]]) -> bool: + """ + Check if a given month/day falls within any of the date ranges. + Handles ranges that wrap around year end (e.g., 12/01-03/31). + """ + if not ranges: + return True # No ranges = assume open + + date_num = month * 100 + day # Simple numeric comparison + + for sm, sd, em, ed in ranges: + start_num = sm * 100 + sd + end_num = em * 100 + ed + + if start_num <= end_num: + # Normal range (e.g., 05/01-11/30) + if start_num <= date_num <= end_num: + return True + else: + # Wrapping range (e.g., 12/01-03/31) + if date_num >= start_num or date_num <= end_num: + return True + + return False + + +def check_access( + status_field: Optional[str], + dates_field: Optional[str], + seasonal: Optional[str], + check_date: Optional[Tuple[int, int]] = None +) -> Optional[bool]: + """ + Determine if a road/trail is open to a vehicle type. + + Args: + status_field: Value of vehicle-class field (e.g., "open", null) + dates_field: Value of *_DATESOPEN field (e.g., "05/01-11/30") + seasonal: Value of SEASONAL field ("yearlong", "seasonal") + check_date: Optional (month, day) tuple to check against date ranges + + Returns: + True = open + False = closed + None = no data (field not populated, defer to SYMBOL) + """ + if status_field is None or status_field.strip() == "": + return None # No data + + status = status_field.strip().lower() + + if status != "open": + return False # Explicitly closed or restricted + + # Status is "open" - check seasonal restrictions + if check_date is not None: + month, day = check_date + + # Parse date ranges + if dates_field: + ranges = parse_date_range(dates_field) + if ranges: + return is_date_in_range(month, day, ranges) + + # No date field but seasonal = "yearlong" means always open + if seasonal and seasonal.strip().lower() == "yearlong": + return True + + # Seasonal with no dates - assume open (data quality issue) + if seasonal and seasonal.strip().lower() == "seasonal": + warnings.warn(f"Seasonal road/trail with no DATESOPEN, assuming open") + return True + + return True # Open with no date check + + +def get_mode_field(mode: str) -> Tuple[str, str]: + """ + Get the MVUM field names for a given travel mode. + + Returns (status_field, dates_field) tuple. + """ + mode_mapping = { + "atv": ("atv", "atv_datesopen"), + "motorcycle": ("motorcycle", "motorcycle_datesopen"), + "mtb": ("e_bike_class1", "e_bike_class1_dur"), # Closest analog for e-bikes + "vehicle": ("highclearancevehicle", "highclearancevehicle_datesopen"), + "passenger": ("passengervehicle", "passengervehicle_datesopen"), + } + + return mode_mapping.get(mode, ("highclearancevehicle", "highclearancevehicle_datesopen")) + + +def symbol_to_access(symbol: str, mode: str, maint_level: Optional[str] = None) -> Optional[bool]: + """ + Fallback: interpret SYMBOL field when per-vehicle-class fields are null. + + MVUM SYMBOL meanings (roads): + 1 = Open to all vehicles + 2 = Open to highway legal vehicles only + 3 = Road closed to motorized + 4 = Road open seasonally + 11 = Administrative use only + 12 = Decommissioned + + For trails, similar logic applies based on TRAILCLASS. + """ + if symbol is None: + return None + + sym = str(symbol).strip() + + # Symbol 1: Open to all + if sym == "1": + return True + + # Symbol 2: Highway legal only + if sym == "2": + # ATVs/motorcycles typically not highway legal + if mode in ("atv", "motorcycle"): + return False + return True + + # Symbol 3: Closed to motorized + if sym == "3": + return False + + # Symbol 4: Seasonally open (assume open if no date check) + if sym == "4": + return True + + # Symbol 11/12: Administrative/decommissioned = closed + if sym in ("11", "12"): + return False + + # Unknown symbol - defer + return None + + +class MVUMReader: + """ + Reader for MVUM data from navi.db. + + Queries roads and trails by bounding box and returns access grids. + """ + + def __init__(self, db_path: Path = NAVI_DB_PATH): + self.db_path = db_path + self._conn = None + + def _get_conn(self) -> sqlite3.Connection: + if self._conn is None: + if not self.db_path.exists(): + raise FileNotFoundError(f"navi.db not found at {self.db_path}") + self._conn = sqlite3.connect(str(self.db_path)) + self._conn.row_factory = sqlite3.Row + # Load Spatialite extension if available + try: + self._conn.enable_load_extension(True) + self._conn.load_extension("mod_spatialite") + except Exception: + pass # Spatialite not available, will use manual bbox queries + return self._conn + + def table_exists(self, table_name: str) -> bool: + """Check if an MVUM table exists.""" + conn = self._get_conn() + cur = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name=?", + (table_name,) + ) + return cur.fetchone() is not None + + def query_roads_bbox( + self, + south: float, north: float, west: float, east: float, + mode: str = "atv", + check_date: Optional[Tuple[int, int]] = None + ) -> List[Dict]: + """ + Query MVUM roads within a bounding box. + + Returns list of dicts with access info for the given mode. + """ + if not self.table_exists("mvum_roads"): + return [] + + conn = self._get_conn() + + # Query using bbox on geometry + # Since we don't have spatialite, we'll query all and filter in Python + # For production, consider pre-computing bbox columns + cur = conn.execute(""" + SELECT ogc_fid, id, name, symbol, operationalmaintlevel, seasonal, + atv, atv_datesopen, motorcycle, motorcycle_datesopen, + highclearancevehicle, highclearancevehicle_datesopen, + passengervehicle, passengervehicle_datesopen, + e_bike_class1, e_bike_class1_dur, + shape + FROM mvum_roads + """) + + status_field, dates_field = get_mode_field(mode) + results = [] + + for row in cur: + # Parse geometry to check bbox intersection + # The shape is stored as WKB blob + shape = row["shape"] + if shape is None: + continue + + # Quick bbox check using geometry extent + # Since we don't have Spatialite functions, we'll include all + # and let the rasterization handle it + + access = check_access( + row[status_field] if status_field in row.keys() else None, + row[dates_field] if dates_field in row.keys() else None, + row["seasonal"], + check_date + ) + + # Fallback to SYMBOL if no per-vehicle data + if access is None: + access = symbol_to_access(row["symbol"], mode, row["operationalmaintlevel"]) + + if access is not None: + results.append({ + "id": row["id"], + "name": row["name"], + "access": access, + "symbol": row["symbol"], + "maint_level": row["operationalmaintlevel"], + "shape": shape, + }) + + return results + + def query_trails_bbox( + self, + south: float, north: float, west: float, east: float, + mode: str = "atv", + check_date: Optional[Tuple[int, int]] = None + ) -> List[Dict]: + """ + Query MVUM trails within a bounding box. + """ + if not self.table_exists("mvum_trails"): + return [] + + conn = self._get_conn() + + cur = conn.execute(""" + SELECT ogc_fid, id, name, symbol, seasonal, trailclass, + atv, atv_datesopen, motorcycle, motorcycle_datesopen, + highclearancevehicle, highclearancevehicle_datesopen, + passengervehicle, passengervehicle_datesopen, + e_bike_class1, e_bike_class1_dur, + shape + FROM mvum_trails + """) + + status_field, dates_field = get_mode_field(mode) + results = [] + + for row in cur: + shape = row["shape"] + if shape is None: + continue + + access = check_access( + row[status_field] if status_field in row.keys() else None, + row[dates_field] if dates_field in row.keys() else None, + row["seasonal"], + check_date + ) + + if access is None: + access = symbol_to_access(row["symbol"], mode) + + if access is not None: + results.append({ + "id": row["id"], + "name": row["name"], + "access": access, + "symbol": row["symbol"], + "trail_class": row["trailclass"], + "shape": shape, + }) + + return results + + def query_nearest( + self, + lat: float, lon: float, + radius_m: float = 50, + table: str = "mvum_roads" + ) -> Optional[Dict]: + """ + Query the nearest MVUM feature to a point. + + Used for the places panel API. + """ + if not self.table_exists(table): + return None + + conn = self._get_conn() + + # Convert radius to degrees (approximate) + radius_deg = radius_m / 111000 + + # Query features in bbox around point + if table == "mvum_roads": + cur = conn.execute(""" + SELECT ogc_fid, id, name, forestname, districtname, symbol, + operationalmaintlevel, surfacetype, seasonal, jurisdiction, + passengervehicle, passengervehicle_datesopen, + highclearancevehicle, highclearancevehicle_datesopen, + atv, atv_datesopen, motorcycle, motorcycle_datesopen, + fourwd_gt50inches, fourwd_gt50_datesopen, + twowd_gt50inches, twowd_gt50_datesopen, + e_bike_class1, e_bike_class1_dur, + e_bike_class2, e_bike_class2_dur, + e_bike_class3, e_bike_class3_dur, + shape + FROM mvum_roads + LIMIT 1000 + """) + else: + cur = conn.execute(""" + SELECT ogc_fid, id, name, forestname, districtname, symbol, + seasonal, jurisdiction, trailclass, trailsystem, + passengervehicle, passengervehicle_datesopen, + highclearancevehicle, highclearancevehicle_datesopen, + atv, atv_datesopen, motorcycle, motorcycle_datesopen, + fourwd_gt50inches, fourwd_gt50_datesopen, + twowd_gt50inches, twowd_gt50_datesopen, + e_bike_class1, e_bike_class1_dur, + e_bike_class2, e_bike_class2_dur, + e_bike_class3, e_bike_class3_dur, + shape + FROM mvum_trails + LIMIT 1000 + """) + + # Find nearest feature + # This is a simplified approach - for production, use spatial index + try: + from shapely import wkb + from shapely.geometry import Point + + query_point = Point(lon, lat) + nearest = None + min_dist = float('inf') + + for row in cur: + try: + geom = wkb.loads(row["shape"]) + dist = query_point.distance(geom) + if dist < min_dist and dist < radius_deg: + min_dist = dist + nearest = dict(row) + nearest["geometry"] = geom + except Exception: + continue + + if nearest: + # Convert geometry to GeoJSON + nearest["geojson"] = nearest["geometry"].__geo_interface__ + del nearest["geometry"] + del nearest["shape"] + return nearest + + except ImportError: + warnings.warn("shapely not available for nearest query") + + return None + + def close(self): + if self._conn: + self._conn.close() + self._conn = None + + +def get_mvum_access_grid( + south: float, north: float, west: float, east: float, + target_shape: Tuple[int, int], + mode: Literal["foot", "mtb", "atv", "vehicle"] = "atv", + check_date: Optional[str] = None, + db_path: Path = NAVI_DB_PATH +) -> np.ndarray: + """ + Get MVUM access grid for pathfinding. + + Args: + south, north, west, east: Bounding box (WGS84) + target_shape: (rows, cols) to match elevation grid + mode: Travel mode (foot skips MVUM entirely) + check_date: Optional "MM/DD" string for seasonal checking + db_path: Path to navi.db + + Returns: + np.ndarray of uint8: + 0 = no MVUM data (defer to existing trail/friction logic) + 1 = road/trail is OPEN to this vehicle mode + 255 = road/trail EXISTS but is CLOSED to this mode + """ + # Foot mode bypasses MVUM entirely + if mode == "foot": + return np.zeros(target_shape, dtype=np.uint8) + + # Parse check_date if provided + parsed_date = None + if check_date: + match = re.match(r"(\d{1,2})/(\d{1,2})", check_date) + if match: + parsed_date = (int(match.group(1)), int(match.group(2))) + + # Initialize output grid + grid = np.zeros(target_shape, dtype=np.uint8) + rows, cols = target_shape + + # Pixel size + pixel_lat = (north - south) / rows + pixel_lon = (east - west) / cols + + reader = MVUMReader(db_path) + + try: + # Query roads and trails + roads = reader.query_roads_bbox(south, north, west, east, mode, parsed_date) + trails = reader.query_trails_bbox(south, north, west, east, mode, parsed_date) + + # Rasterize features + try: + from shapely import wkb + + for features in [roads, trails]: + for feat in features: + try: + geom = wkb.loads(feat["shape"]) + + # Get geometry bounds + minx, miny, maxx, maxy = geom.bounds + + # Check if intersects our bbox + if maxx < west or minx > east or maxy < south or miny > north: + continue + + # Rasterize line + value = 1 if feat["access"] else 255 + + # Simple line rasterization + if geom.geom_type in ("LineString", "MultiLineString"): + if geom.geom_type == "MultiLineString": + coords_list = [list(line.coords) for line in geom.geoms] + else: + coords_list = [list(geom.coords)] + + for coords in coords_list: + for i in range(len(coords) - 1): + x1, y1 = coords[i] + x2, y2 = coords[i + 1] + + # Convert to pixel coordinates + col1 = int((x1 - west) / pixel_lon) + row1 = int((north - y1) / pixel_lat) + col2 = int((x2 - west) / pixel_lon) + row2 = int((north - y2) / pixel_lat) + + # Bresenham's line algorithm + _draw_line(grid, row1, col1, row2, col2, value) + + except Exception as e: + continue + + except ImportError: + warnings.warn("shapely not available, MVUM rasterization skipped") + + finally: + reader.close() + + return grid + + +def _draw_line(grid: np.ndarray, r1: int, c1: int, r2: int, c2: int, value: int): + """Draw a line on the grid using Bresenham's algorithm.""" + rows, cols = grid.shape + + dr = abs(r2 - r1) + dc = abs(c2 - c1) + sr = 1 if r1 < r2 else -1 + sc = 1 if c1 < c2 else -1 + err = dr - dc + + r, c = r1, c1 + + while True: + if 0 <= r < rows and 0 <= c < cols: + # Only overwrite if current value is 0 (no data) or we're marking closed + if grid[r, c] == 0 or value == 255: + grid[r, c] = value + + if r == r2 and c == c2: + break + + e2 = 2 * err + if e2 > -dc: + err -= dc + r += sr + if e2 < dr: + err += dr + c += sc + + +if __name__ == "__main__": + import sys + + print("=" * 60) + print("MVUM Reader Test") + print("=" * 60) + + reader = MVUMReader() + + if not reader.table_exists("mvum_roads"): + print("ERROR: mvum_roads table not found in navi.db") + sys.exit(1) + + # Test bbox query (Sawtooth NF area) + print("\n[1] Testing bbox query (Sawtooth NF area)...") + roads = reader.query_roads_bbox( + south=43.5, north=44.0, west=-115.0, east=-114.0, + mode="atv" + ) + print(f" Found {len(roads)} roads") + + open_count = sum(1 for r in roads if r["access"]) + closed_count = sum(1 for r in roads if not r["access"]) + print(f" Open to ATV: {open_count}") + print(f" Closed to ATV: {closed_count}") + + # Test with seasonal date + print("\n[2] Testing with date check (July 15)...") + roads_summer = reader.query_roads_bbox( + south=43.5, north=44.0, west=-115.0, east=-114.0, + mode="atv", + check_date=(7, 15) + ) + open_summer = sum(1 for r in roads_summer if r["access"]) + print(f" Open to ATV on 07/15: {open_summer}") + + print("\n[3] Testing with date check (January 15)...") + roads_winter = reader.query_roads_bbox( + south=43.5, north=44.0, west=-115.0, east=-114.0, + mode="atv", + check_date=(1, 15) + ) + open_winter = sum(1 for r in roads_winter if r["access"]) + print(f" Open to ATV on 01/15: {open_winter}") + + # Test grid generation + print("\n[4] Testing grid generation...") + grid = get_mvum_access_grid( + south=43.5, north=44.0, west=-115.0, east=-114.0, + target_shape=(500, 1000), + mode="atv" + ) + print(f" Grid shape: {grid.shape}") + print(f" No data (0): {np.sum(grid == 0)}") + print(f" Open (1): {np.sum(grid == 1)}") + print(f" Closed (255): {np.sum(grid == 255)}") + + reader.close() + print("\nDone.") diff --git a/lib/offroute/router.py b/lib/offroute/router.py index d44bbb4..c37bddf 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -27,6 +27,7 @@ from .cost import compute_cost_grid from .friction import FrictionReader, friction_to_multiplier from .barriers import BarrierReader, WildernessReader, DEFAULT_WILDERNESS_PATH from .trails import TrailReader +from .mvum import get_mvum_access_grid # Paths NAVI_DB_PATH = Path("/mnt/nav/navi.db") @@ -407,6 +408,22 @@ class OffrouteRouter: target_shape=elevation.shape ) + # Load MVUM access data (only for motorized modes) + # MVUM is motor-vehicle specific — foot mode skips entirely + mvum = None + if mode in ("mtb", "atv", "vehicle"): + try: + mvum = get_mvum_access_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + target_shape=elevation.shape, + mode=mode, + check_date=None, # TODO: accept date parameter + ) + except Exception as e: + # MVUM data may not be available - continue without it + pass + # Compute cost grid with mode-specific parameters cost = compute_cost_grid( elevation, @@ -416,12 +433,13 @@ class OffrouteRouter: trails=trails, barriers=barriers, wilderness=wilderness, + mvum=mvum, boundary_mode=boundary_mode, mode=mode, ) # Free intermediate arrays to reduce memory before MCP - # Note: Keep trails and barriers - needed for path statistics + # Note: Keep trails, barriers, and mvum - needed for path statistics del friction_mult, friction_raw, wilderness import gc gc.collect() @@ -471,6 +489,7 @@ class OffrouteRouter: elevations = [] trail_values = [] barrier_crossings = 0 + mvum_closed_crossings = 0 for row, col in path_indices: lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) @@ -479,6 +498,8 @@ class OffrouteRouter: trail_values.append(trails[row, col]) if barriers[row, col] == 255: barrier_crossings += 1 + if mvum is not None and mvum[row, col] == 255: + mvum_closed_crossings += 1 # Calculate stats wilderness_distance_m = 0 @@ -497,8 +518,10 @@ class OffrouteRouter: total_cells = len(trail_arr) on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - # Free trails and barriers now that path stats are computed + # Free trails, barriers, and mvum now that path stats are computed del trails, barriers + if mvum is not None: + del mvum # Entry point entry_lat = best_entry["entry_point"]["lat"] @@ -572,6 +595,7 @@ class OffrouteRouter: "on_trail_pct": on_trail_pct, "cell_count": total_cells, "barrier_crossings": barrier_crossings, + "mvum_closed_crossings": mvum_closed_crossings, "mode": mode, }, "geometry": {"type": "LineString", "coordinates": wilderness_coords} @@ -620,6 +644,7 @@ class OffrouteRouter: "network_duration_minutes": float(network_segment["duration_minutes"]) if network_segment else 0, "on_trail_pct": on_trail_pct, "barrier_crossings": barrier_crossings, + "mvum_closed_crossings": mvum_closed_crossings, "boundary_mode": boundary_mode, "mode": mode, "entry_point": { From ff0721c23ef00a978478c2708c9e0055a63be549 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 19:03:31 +0000 Subject: [PATCH 41/72] offroute: wilderness always uses foot mode for pathfinding The wilderness segment now ALWAYS uses foot mode for MCP pathfinding. The user's selected mode only affects: 1. Entry point selection (MODE_TO_VALID_HIGHWAYS filtering) 2. Valhalla costing for the network segment This ensures vehicles can navigate through wilderness (on foot) to reach roads, rather than failing when no vehicle-accessible path exists. Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 137 ++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 56 deletions(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index c37bddf..e1883f7 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -7,7 +7,10 @@ Entry points are extracted from OSM highways and stored in /mnt/nav/navi.db. The pathfinder routes from a wilderness start to the nearest entry point, then Valhalla completes the route to the destination. -Supports four travel modes: foot, mtb, atv, vehicle. +IMPORTANT: The wilderness segment ALWAYS uses foot mode for pathfinding. +The user's selected mode affects: + 1. Which entry points are valid (foot=any, mtb=tracks+roads, vehicle=roads only) + 2. The Valhalla costing profile for the network segment """ import json import math @@ -51,6 +54,19 @@ MODE_TO_COSTING = { "vehicle": "auto", } +# Mode to valid entry point highway classes +# foot = any trail/track/road, mtb = tracks and roads, vehicle = roads only +MODE_TO_VALID_HIGHWAYS = { + "foot": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track", "path", "footway", "bridleway"}, + "mtb": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track"}, + "atv": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track"}, + "vehicle": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service"}, +} + def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: """Calculate distance between two points in meters.""" @@ -122,8 +138,16 @@ class EntryPointIndex: return [dict(row) for row in cur.fetchall()] - def query_radius(self, lat: float, lon: float, radius_km: float) -> List[Dict]: - """Query entry points within radius of a point.""" + def query_radius(self, lat: float, lon: float, radius_km: float, + valid_highways: Optional[set] = None) -> List[Dict]: + """ + Query entry points within radius of a point. + + Args: + lat, lon: Center point + radius_km: Search radius in kilometers + valid_highways: Optional set of valid highway classes to filter by + """ lat_delta = radius_km / 111.0 lon_delta = radius_km / (111.0 * math.cos(math.radians(lat))) @@ -134,6 +158,10 @@ class EntryPointIndex: result = [] for p in points: + # Filter by highway class if specified + if valid_highways and p['highway_class'] not in valid_highways: + continue + dist = haversine_distance(lat, lon, p['lat'], p['lon']) if dist <= radius_km * 1000: p['distance_m'] = dist @@ -262,7 +290,8 @@ class OffrouteRouter: """ OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. - Supports modes: foot, mtb, atv, vehicle + IMPORTANT: Wilderness segment ALWAYS uses foot mode for pathfinding. + User's mode affects entry point selection and Valhalla costing only. """ def __init__(self): @@ -301,9 +330,14 @@ class OffrouteRouter: Args: start_lat, start_lon: Starting coordinates (wilderness) end_lat, end_lon: Destination coordinates - mode: Travel mode (foot, mtb, atv, vehicle) + mode: Travel mode (foot, mtb, atv, vehicle) - affects entry points and network routing boundary_mode: How to handle private land (strict, pragmatic, emergency) + IMPORTANT: Wilderness pathfinding ALWAYS uses foot mode. + The user's mode only affects: + 1. Which entry points are valid targets + 2. The Valhalla costing for the network segment + Returns a GeoJSON FeatureCollection with wilderness and network segments. """ t0 = time.time() @@ -318,17 +352,29 @@ class OffrouteRouter: "message": "Trail entry point index not built. Run build_entry_index() first." } - # Find entry points near start (limit to nearest 10 to control bbox size) + # Get valid highway classes for this mode + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + + # Find entry points near start, filtered by mode MAX_ENTRY_POINTS = 10 - entry_points = self.entry_index.query_radius(start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM) + entry_points = self.entry_index.query_radius( + start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) if not entry_points: - entry_points = self.entry_index.query_radius(start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM) + # Try expanded radius + entry_points = self.entry_index.query_radius( + start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) if not entry_points: - return { - "status": "error", - "message": f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start" - } + # For non-foot modes, the error is about no suitable roads/trails + if mode == "vehicle": + msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try a different mode." + elif mode in ("mtb", "atv"): + msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try foot mode." + else: + msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start." + return {"status": "error", "message": msg} # Limit to nearest entry points to prevent huge bounding boxes entry_points = entry_points[:MAX_ENTRY_POINTS] @@ -392,15 +438,6 @@ class OffrouteRouter: target_shape=elevation.shape ) - # Load wilderness (if available and mode requires it) - wilderness = None - if self.wilderness_reader is not None and mode in ("mtb", "atv", "vehicle"): - wilderness = self.wilderness_reader.get_wilderness_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - # Load trails trails = self.trail_reader.get_trails_grid( south=bbox["south"], north=bbox["north"], @@ -408,23 +445,11 @@ class OffrouteRouter: target_shape=elevation.shape ) - # Load MVUM access data (only for motorized modes) - # MVUM is motor-vehicle specific — foot mode skips entirely - mvum = None - if mode in ("mtb", "atv", "vehicle"): - try: - mvum = get_mvum_access_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape, - mode=mode, - check_date=None, # TODO: accept date parameter - ) - except Exception as e: - # MVUM data may not be available - continue without it - pass + # WILDERNESS PATHFINDING ALWAYS USES FOOT MODE + # This is the key change: we don't load wilderness grid or MVUM for pathfinding + # because foot mode can traverse wilderness and doesn't need motor-vehicle access - # Compute cost grid with mode-specific parameters + # Compute cost grid with FOOT MODE (always for wilderness segment) cost = compute_cost_grid( elevation, cell_size_m=meta["cell_size_m"], @@ -432,15 +457,14 @@ class OffrouteRouter: friction_raw=friction_raw, trails=trails, barriers=barriers, - wilderness=wilderness, - mvum=mvum, + wilderness=None, # Foot mode ignores wilderness restrictions + mvum=None, # Foot mode doesn't use MVUM boundary_mode=boundary_mode, - mode=mode, + mode="foot", # ALWAYS foot for wilderness pathfinding ) # Free intermediate arrays to reduce memory before MCP - # Note: Keep trails, barriers, and mvum - needed for path statistics - del friction_mult, friction_raw, wilderness + del friction_mult, friction_raw import gc gc.collect() @@ -489,7 +513,6 @@ class OffrouteRouter: elevations = [] trail_values = [] barrier_crossings = 0 - mvum_closed_crossings = 0 for row, col in path_indices: lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) @@ -498,8 +521,6 @@ class OffrouteRouter: trail_values.append(trails[row, col]) if barriers[row, col] == 255: barrier_crossings += 1 - if mvum is not None and mvum[row, col] == 255: - mvum_closed_crossings += 1 # Calculate stats wilderness_distance_m = 0 @@ -518,10 +539,8 @@ class OffrouteRouter: total_cells = len(trail_arr) on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - # Free trails, barriers, and mvum now that path stats are computed + # Free trails and barriers del trails, barriers - if mvum is not None: - del mvum # Entry point entry_lat = best_entry["entry_point"]["lat"] @@ -529,7 +548,7 @@ class OffrouteRouter: entry_class = best_entry["entry_point"]["highway_class"] entry_name = best_entry["entry_point"].get("name", "") - # Call Valhalla + # Call Valhalla with USER'S SELECTED MODE (not foot) valhalla_costing = MODE_TO_COSTING.get(mode, "pedestrian") valhalla_request = { @@ -595,8 +614,7 @@ class OffrouteRouter: "on_trail_pct": on_trail_pct, "cell_count": total_cells, "barrier_crossings": barrier_crossings, - "mvum_closed_crossings": mvum_closed_crossings, - "mode": mode, + "wilderness_mode": "foot", # Always foot for wilderness }, "geometry": {"type": "LineString", "coordinates": wilderness_coords} } @@ -610,6 +628,7 @@ class OffrouteRouter: "distance_km": network_segment["distance_km"], "duration_minutes": network_segment["duration_minutes"], "maneuvers": network_segment["maneuvers"], + "network_mode": mode, # User's selected mode }, "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} } @@ -621,7 +640,12 @@ class OffrouteRouter: combined_feature = { "type": "Feature", - "properties": {"segment_type": "combined", "mode": mode, "boundary_mode": boundary_mode}, + "properties": { + "segment_type": "combined", + "wilderness_mode": "foot", + "network_mode": mode, + "boundary_mode": boundary_mode + }, "geometry": {"type": "LineString", "coordinates": combined_coords} } features.append(combined_feature) @@ -644,9 +668,9 @@ class OffrouteRouter: "network_duration_minutes": float(network_segment["duration_minutes"]) if network_segment else 0, "on_trail_pct": on_trail_pct, "barrier_crossings": barrier_crossings, - "mvum_closed_crossings": mvum_closed_crossings, "boundary_mode": boundary_mode, - "mode": mode, + "wilderness_mode": "foot", # Always foot + "network_mode": mode, # User's selection "entry_point": { "lat": entry_lat, "lon": entry_lon, "highway_class": entry_class, "name": entry_name, @@ -730,6 +754,7 @@ if __name__ == "__main__": elif len(sys.argv) > 1 and sys.argv[1] == "test": print("Testing router (all modes)...") + print("NOTE: Wilderness always uses foot mode. User mode affects entry points + network.") router = OffrouteRouter() @@ -746,8 +771,8 @@ if __name__ == "__main__": if result["status"] == "ok": s = result["summary"] - print(f" Wilderness: {s['wilderness_distance_km']:.2f} km, {s['wilderness_effort_minutes']:.1f} min") - print(f" Network: {s['network_distance_km']:.2f} km, {s['network_duration_minutes']:.1f} min") + print(f" Wilderness: {s['wilderness_distance_km']:.2f} km, {s['wilderness_effort_minutes']:.1f} min (foot)") + print(f" Network: {s['network_distance_km']:.2f} km, {s['network_duration_minutes']:.1f} min ({mode})") print(f" On-trail: {s['on_trail_pct']:.1f}%") print(f" Entry: {s['entry_point']['highway_class']}") else: From 58347415bcd36d44e39653a6ed6af541f30faef9 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 21:11:53 +0000 Subject: [PATCH 42/72] offroute: bidirectional wilderness routing (all 4 scenarios) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support all four routing scenarios: A: off-network → on-network (wilderness then Valhalla) B: off-network → off-network (wilderness, Valhalla, wilderness) C: on-network → off-network (Valhalla then wilderness) D: on-network → on-network (pure Valhalla passthrough) Off-network detection via Valhalla /locate endpoint: - Snap distance > 500m = off-network Key implementation details: - _locate_on_network() helper for network detection - route() dispatches to scenario-specific handlers - _pathfind_wilderness() extracted for reuse (runs MCP) - _valhalla_route() helper for network segments - _build_response() unifies GeoJSON output format Memory management: - Sequential MCP runs for scenario B (not parallel) - gc.collect() after each MCP run - Bbox centered on wilderness origin, not distant destination Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 843 +++++++++++++++++++++++++++++++++-------- 1 file changed, 693 insertions(+), 150 deletions(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index e1883f7..79c9089 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -1,17 +1,20 @@ """ -OFFROUTE Router — Wilderness to network path orchestration. +OFFROUTE Router — Bidirectional wilderness-to-network path orchestration. -Connects the raster pathfinder (wilderness segment) to Valhalla (on-network segment). +Supports four routing scenarios: + A: off-network start → on-network end (wilderness then Valhalla) + B: off-network start → off-network end (wilderness, Valhalla, wilderness) + C: on-network start → off-network end (Valhalla then wilderness) + D: on-network start → on-network end (pure Valhalla passthrough) -Entry points are extracted from OSM highways and stored in /mnt/nav/navi.db. -The pathfinder routes from a wilderness start to the nearest entry point, -then Valhalla completes the route to the destination. +Off-network detection: Valhalla /locate snap distance > 500m = off-network. IMPORTANT: The wilderness segment ALWAYS uses foot mode for pathfinding. The user's selected mode affects: 1. Which entry points are valid (foot=any, mtb=tracks+roads, vehicle=roads only) 2. The Valhalla costing profile for the network segment """ +import gc import json import math import sqlite3 @@ -46,6 +49,9 @@ EXPANDED_SEARCH_RADIUS_KM = 100 # Memory limit MEMORY_LIMIT_GB = 12 +# Off-network detection threshold (meters) +OFF_NETWORK_THRESHOLD_M = 500 + # Mode to Valhalla costing mapping MODE_TO_COSTING = { "foot": "pedestrian", @@ -290,6 +296,12 @@ class OffrouteRouter: """ OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. + Supports four scenarios: + A: off-network start → on-network end + B: off-network start → off-network end + C: on-network start → off-network end + D: on-network start → on-network end (pure Valhalla) + IMPORTANT: Wilderness segment ALWAYS uses foot mode for pathfinding. User's mode affects entry point selection and Valhalla costing only. """ @@ -315,6 +327,49 @@ class OffrouteRouter: if self.trail_reader is None: self.trail_reader = TrailReader() + def _locate_on_network(self, lat: float, lon: float, mode: str) -> Dict: + """ + Check if a point is on the routable network using Valhalla's /locate. + + Returns: + { + "on_network": bool, + "snap_distance_m": float, + "snapped_lat": float, + "snapped_lon": float + } + """ + costing = MODE_TO_COSTING.get(mode, "pedestrian") + try: + resp = requests.post( + f"{VALHALLA_URL}/locate", + json={"locations": [{"lat": lat, "lon": lon}], "costing": costing}, + timeout=10 + ) + + if resp.status_code == 200: + data = resp.json() + if data and len(data) > 0 and data[0].get("edges"): + edge = data[0]["edges"][0] + snap_lat = edge.get("correlated_lat", lat) + snap_lon = edge.get("correlated_lon", lon) + snap_dist = haversine_distance(lat, lon, snap_lat, snap_lon) + return { + "on_network": snap_dist <= OFF_NETWORK_THRESHOLD_M, + "snap_distance_m": snap_dist, + "snapped_lat": snap_lat, + "snapped_lon": snap_lon + } + except Exception: + pass + + return { + "on_network": False, + "snap_distance_m": float('inf'), + "snapped_lat": lat, + "snapped_lon": lon + } + def route( self, start_lat: float, @@ -325,26 +380,167 @@ class OffrouteRouter: boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" ) -> Dict: """ - Route from a wilderness start point to a destination. + Route between two points, handling all four scenarios. + + Scenarios: + A: off-network start → on-network end (wilderness then network) + B: off-network start → off-network end (wilderness, network, wilderness) + C: on-network start → off-network end (network then wilderness) + D: on-network start → on-network end (pure network) Args: - start_lat, start_lon: Starting coordinates (wilderness) + start_lat, start_lon: Starting coordinates end_lat, end_lon: Destination coordinates - mode: Travel mode (foot, mtb, atv, vehicle) - affects entry points and network routing + mode: Travel mode (foot, mtb, atv, vehicle) boundary_mode: How to handle private land (strict, pragmatic, emergency) - IMPORTANT: Wilderness pathfinding ALWAYS uses foot mode. - The user's mode only affects: - 1. Which entry points are valid targets - 2. The Valhalla costing for the network segment - - Returns a GeoJSON FeatureCollection with wilderness and network segments. + Returns a GeoJSON FeatureCollection with route segments. """ - t0 = time.time() - if mode not in MODE_TO_COSTING: return {"status": "error", "message": f"Unknown mode: {mode}"} + # Detect network status for both endpoints + start_status = self._locate_on_network(start_lat, start_lon, mode) + end_status = self._locate_on_network(end_lat, end_lon, mode) + + start_off_network = not start_status["on_network"] + end_off_network = not end_status["on_network"] + + # Dispatch to appropriate handler + if not start_off_network and not end_off_network: + # Scenario D: on-network → on-network (pure Valhalla) + return self._route_D_network_only( + start_lat, start_lon, end_lat, end_lon, mode + ) + elif not start_off_network and end_off_network: + # Scenario C: on-network → off-network + return self._route_C_network_to_wilderness( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + elif start_off_network and not end_off_network: + # Scenario A: off-network → on-network + return self._route_A_wilderness_to_network( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + else: + # Scenario B: off-network → off-network + return self._route_B_wilderness_both( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + + def _route_D_network_only( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str + ) -> Dict: + """ + Scenario D: Both endpoints on-network. Pure Valhalla routing. + """ + t0 = time.time() + costing = MODE_TO_COSTING.get(mode, "pedestrian") + + valhalla_request = { + "locations": [ + {"lat": start_lat, "lon": start_lon}, + {"lat": end_lat, "lon": end_lon} + ], + "costing": costing, + "directions_options": {"units": "kilometers"} + } + + try: + resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) + + if resp.status_code != 200: + return { + "status": "error", + "message": f"Network routing failed: {resp.text[:200]}" + } + + valhalla_data = resp.json() + trip = valhalla_data.get("trip", {}) + legs = trip.get("legs", []) + + if not legs: + return {"status": "error", "message": "No route found"} + + leg = legs[0] + shape = leg.get("shape", "") + network_coords = self._decode_polyline(shape) + + maneuvers = [] + for m in leg.get("maneuvers", []): + maneuvers.append({ + "instruction": m.get("instruction", ""), + "type": m.get("type", 0), + "distance_km": m.get("length", 0), + "time_seconds": m.get("time", 0), + "street_names": m.get("street_names", []), + }) + + summary = trip.get("summary", {}) + distance_km = summary.get("length", 0) + duration_min = summary.get("time", 0) / 60 + + # Build response in same format as wilderness routes + network_feature = { + "type": "Feature", + "properties": { + "segment_type": "network", + "distance_km": distance_km, + "duration_minutes": duration_min, + "maneuvers": maneuvers, + "network_mode": mode, + }, + "geometry": {"type": "LineString", "coordinates": network_coords} + } + + combined_feature = { + "type": "Feature", + "properties": { + "segment_type": "combined", + "network_mode": mode, + }, + "geometry": {"type": "LineString", "coordinates": network_coords} + } + + geojson = {"type": "FeatureCollection", "features": [network_feature, combined_feature]} + + result = { + "status": "ok", + "route": geojson, + "summary": { + "total_distance_km": float(distance_km), + "total_effort_minutes": float(duration_min), + "wilderness_distance_km": 0.0, + "wilderness_effort_minutes": 0.0, + "network_distance_km": float(distance_km), + "network_duration_minutes": float(duration_min), + "on_trail_pct": 100.0, + "barrier_crossings": 0, + "network_mode": mode, + "scenario": "D", + "computation_time_s": time.time() - t0, + } + } + return result + + except Exception as e: + return {"status": "error", "message": f"Network routing failed: {e}"} + + def _route_A_wilderness_to_network( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario A: Off-network start → on-network end. + Wilderness pathfinding from start to entry point, then Valhalla to end. + """ + t0 = time.time() + # Ensure entry point index exists if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: return { @@ -362,12 +558,10 @@ class OffrouteRouter: ) if not entry_points: - # Try expanded radius entry_points = self.entry_index.query_radius( start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways ) if not entry_points: - # For non-foot modes, the error is about no suitable roads/trails if mode == "vehicle": msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try a different mode." elif mode in ("mtb", "atv"): @@ -376,13 +570,243 @@ class OffrouteRouter: msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start." return {"status": "error", "message": msg} - # Limit to nearest entry points to prevent huge bounding boxes entry_points = entry_points[:MAX_ENTRY_POINTS] - # Build bbox with max size limit (prevent OOM on large areas) - MAX_BBOX_DEGREES = 0.5 # ~55km at mid-latitudes - all_lats = [start_lat, end_lat] + [p["lat"] for p in entry_points] - all_lons = [start_lon, end_lon] + [p["lon"] for p in entry_points] + # Run wilderness pathfinding + wilderness_result = self._pathfind_wilderness( + start_lat, start_lon, end_lat, end_lon, + entry_points, boundary_mode, "start" + ) + + if wilderness_result.get("status") == "error": + return wilderness_result + + # Extract results + wilderness_coords = wilderness_result["coords"] + wilderness_stats = wilderness_result["stats"] + best_entry = wilderness_result["entry_point"] + + entry_lat = best_entry["lat"] + entry_lon = best_entry["lon"] + + # Call Valhalla from entry point to destination + network_result = self._valhalla_route(entry_lat, entry_lon, end_lat, end_lon, mode) + + # Build response + return self._build_response( + wilderness_start=wilderness_coords, + wilderness_start_stats=wilderness_stats, + network_segment=network_result.get("segment"), + wilderness_end=None, + wilderness_end_stats=None, + mode=mode, + boundary_mode=boundary_mode, + entry_start=best_entry, + entry_end=None, + scenario="A", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _route_C_network_to_wilderness( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario C: On-network start → off-network end. + Valhalla from start to entry point, then wilderness pathfinding to end. + """ + t0 = time.time() + + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + + # Find entry points near END (destination) + MAX_ENTRY_POINTS = 10 + entry_points = self.entry_index.query_radius( + end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + + if not entry_points: + entry_points = self.entry_index.query_radius( + end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points: + if mode == "vehicle": + msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try a different mode." + elif mode in ("mtb", "atv"): + msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try foot mode." + else: + msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of destination." + return {"status": "error", "message": msg} + + entry_points = entry_points[:MAX_ENTRY_POINTS] + + # Run wilderness pathfinding FROM END toward entry points + wilderness_result = self._pathfind_wilderness( + end_lat, end_lon, start_lat, start_lon, + entry_points, boundary_mode, "end" + ) + + if wilderness_result.get("status") == "error": + return wilderness_result + + # The path is from end→entry, reverse it for display (entry→end) + wilderness_coords = list(reversed(wilderness_result["coords"])) + wilderness_stats = wilderness_result["stats"] + best_entry = wilderness_result["entry_point"] + + entry_lat = best_entry["lat"] + entry_lon = best_entry["lon"] + + # Call Valhalla from start to entry point + network_result = self._valhalla_route(start_lat, start_lon, entry_lat, entry_lon, mode) + + # Build response (network first, then wilderness) + return self._build_response( + wilderness_start=None, + wilderness_start_stats=None, + network_segment=network_result.get("segment"), + wilderness_end=wilderness_coords, + wilderness_end_stats=wilderness_stats, + mode=mode, + boundary_mode=boundary_mode, + entry_start=None, + entry_end=best_entry, + scenario="C", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _route_B_wilderness_both( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario B: Off-network start → off-network end. + Wilderness from start to entry_A, Valhalla entry_A to entry_B, wilderness from entry_B to end. + """ + t0 = time.time() + + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + MAX_ENTRY_POINTS = 10 + + # Find entry points near START + entry_points_start = self.entry_index.query_radius( + start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_start: + entry_points_start = self.entry_index.query_radius( + start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_start: + return {"status": "error", "message": f"No entry points found near start within {EXPANDED_SEARCH_RADIUS_KM}km."} + entry_points_start = entry_points_start[:MAX_ENTRY_POINTS] + + # Find entry points near END + entry_points_end = self.entry_index.query_radius( + end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_end: + entry_points_end = self.entry_index.query_radius( + end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_end: + return {"status": "error", "message": f"No entry points found near destination within {EXPANDED_SEARCH_RADIUS_KM}km."} + entry_points_end = entry_points_end[:MAX_ENTRY_POINTS] + + # Phase 1: Wilderness pathfinding from START + wilderness_start_result = self._pathfind_wilderness( + start_lat, start_lon, end_lat, end_lon, + entry_points_start, boundary_mode, "start" + ) + + if wilderness_start_result.get("status") == "error": + return wilderness_start_result + + wilderness_start_coords = wilderness_start_result["coords"] + wilderness_start_stats = wilderness_start_result["stats"] + entry_A = wilderness_start_result["entry_point"] + + # Phase 2: Wilderness pathfinding from END (run after freeing phase 1 memory) + wilderness_end_result = self._pathfind_wilderness( + end_lat, end_lon, start_lat, start_lon, + entry_points_end, boundary_mode, "end" + ) + + if wilderness_end_result.get("status") == "error": + return wilderness_end_result + + # Reverse the end wilderness path (it's end→entry, we want entry→end for display) + wilderness_end_coords = list(reversed(wilderness_end_result["coords"])) + wilderness_end_stats = wilderness_end_result["stats"] + entry_B = wilderness_end_result["entry_point"] + + # Phase 3: Valhalla from entry_A to entry_B + network_result = self._valhalla_route( + entry_A["lat"], entry_A["lon"], + entry_B["lat"], entry_B["lon"], + mode + ) + + # Build response + return self._build_response( + wilderness_start=wilderness_start_coords, + wilderness_start_stats=wilderness_start_stats, + network_segment=network_result.get("segment"), + wilderness_end=wilderness_end_coords, + wilderness_end_stats=wilderness_end_stats, + mode=mode, + boundary_mode=boundary_mode, + entry_start=entry_A, + entry_end=entry_B, + scenario="B", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _pathfind_wilderness( + self, + origin_lat: float, origin_lon: float, + dest_lat: float, dest_lon: float, + entry_points: List[Dict], + boundary_mode: str, + label: str + ) -> Dict: + """ + Run MCP wilderness pathfinding from origin toward entry points. + + Args: + origin_lat, origin_lon: Starting point for pathfinding + dest_lat, dest_lon: Ultimate destination (for bbox calculation) + entry_points: List of candidate entry points + boundary_mode: How to handle barriers + label: "start" or "end" for error messages + + Returns: + {"status": "ok", "coords": [...], "stats": {...}, "entry_point": {...}} + or {"status": "error", "message": "..."} + """ + # Build bbox - only include origin and entry points, NOT distant destination + # The destination is handled by Valhalla, wilderness only needs to reach entry points + MAX_BBOX_DEGREES = 0.5 + all_lats = [origin_lat] + [p["lat"] for p in entry_points] + all_lons = [origin_lon] + [p["lon"] for p in entry_points] padding = 0.05 bbox = { @@ -392,18 +816,16 @@ class OffrouteRouter: "east": max(all_lons) + padding, } - # Clamp bbox size to prevent memory exhaustion + # Clamp bbox size, centering on origin lat_span = bbox["north"] - bbox["south"] lon_span = bbox["east"] - bbox["west"] if lat_span > MAX_BBOX_DEGREES or lon_span > MAX_BBOX_DEGREES: - center_lat = (bbox["south"] + bbox["north"]) / 2 - center_lon = (bbox["west"] + bbox["east"]) / 2 half_span = MAX_BBOX_DEGREES / 2 bbox = { - "south": center_lat - half_span, - "north": center_lat + half_span, - "west": center_lon - half_span, - "east": center_lon + half_span, + "south": origin_lat - half_span, + "north": origin_lat + half_span, + "west": origin_lon - half_span, + "east": origin_lon + half_span, } # Initialize readers @@ -416,14 +838,14 @@ class OffrouteRouter: west=bbox["west"], east=bbox["east"], ) except Exception as e: - return {"status": "error", "message": f"Failed to load elevation: {e}"} + return {"status": "error", "message": f"Failed to load elevation for {label}: {e}"} # Check memory mem = check_memory_usage() if mem > MEMORY_LIMIT_GB: return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} - # Load friction (both processed and raw for mode-specific overrides) + # Load friction friction_raw = self.friction_reader.get_friction_grid( south=bbox["south"], north=bbox["north"], west=bbox["west"], east=bbox["east"], @@ -445,11 +867,7 @@ class OffrouteRouter: target_shape=elevation.shape ) - # WILDERNESS PATHFINDING ALWAYS USES FOOT MODE - # This is the key change: we don't load wilderness grid or MVUM for pathfinding - # because foot mode can traverse wilderness and doesn't need motor-vehicle access - - # Compute cost grid with FOOT MODE (always for wilderness segment) + # Compute cost grid (ALWAYS foot mode for wilderness) cost = compute_cost_grid( elevation, cell_size_m=meta["cell_size_m"], @@ -457,25 +875,24 @@ class OffrouteRouter: friction_raw=friction_raw, trails=trails, barriers=barriers, - wilderness=None, # Foot mode ignores wilderness restrictions - mvum=None, # Foot mode doesn't use MVUM + wilderness=None, + mvum=None, boundary_mode=boundary_mode, - mode="foot", # ALWAYS foot for wilderness pathfinding + mode="foot", ) - # Free intermediate arrays to reduce memory before MCP + # Free intermediate arrays del friction_mult, friction_raw - import gc gc.collect() - # Convert start to pixel coordinates - start_row, start_col = self.dem_reader.latlon_to_pixel(start_lat, start_lon, meta) + # Convert origin to pixel coordinates + origin_row, origin_col = self.dem_reader.latlon_to_pixel(origin_lat, origin_lon, meta) rows, cols = elevation.shape - if not (0 <= start_row < rows and 0 <= start_col < cols): - return {"status": "error", "message": "Start point outside grid bounds"} + if not (0 <= origin_row < rows and 0 <= origin_col < cols): + return {"status": "error", "message": f"{label.capitalize()} point outside grid bounds"} - # Mark entry points on grid + # Map entry points to pixels entry_pixels = [] for ep in entry_points: row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) @@ -483,11 +900,11 @@ class OffrouteRouter: entry_pixels.append({"row": row, "col": col, "entry_point": ep}) if not entry_pixels: - return {"status": "error", "message": "No entry points map to grid bounds"} + return {"status": "error", "message": f"No entry points map to grid bounds for {label}"} - # Run MCP pathfinder + # Run MCP mcp = MCP_Geometric(cost, fully_connected=True) - cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) + cumulative_costs, traceback = mcp.find_costs([(origin_row, origin_col)]) # Find nearest reachable entry point best_entry = None @@ -502,67 +919,88 @@ class OffrouteRouter: if best_entry is None or np.isinf(best_cost): return { "status": "error", - "message": "No path found to any entry point (blocked by impassable terrain)" + "message": f"No path found from {label} to any entry point (blocked by impassable terrain)" } - # Traceback wilderness path + # Traceback path path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) - # Convert to coordinates - wilderness_coords = [] + # Convert to coordinates and collect stats + coords = [] elevations = [] trail_values = [] barrier_crossings = 0 for row, col in path_indices: lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) - wilderness_coords.append([lon, lat]) + coords.append([lon, lat]) elevations.append(elevation[row, col]) trail_values.append(trails[row, col]) if barriers[row, col] == 255: barrier_crossings += 1 - # Calculate stats - wilderness_distance_m = 0 - for i in range(1, len(wilderness_coords)): - lon1, lat1 = wilderness_coords[i-1] - lon2, lat2 = wilderness_coords[i] - wilderness_distance_m += haversine_distance(lat1, lon1, lat2, lon2) + # Calculate distance + distance_m = 0 + for i in range(1, len(coords)): + lon1, lat1 = coords[i-1] + lon2, lat2 = coords[i] + distance_m += haversine_distance(lat1, lon1, lat2, lon2) + # Elevation stats elev_arr = np.array(elevations) elev_diff = np.diff(elev_arr) - wilderness_gain = float(np.sum(elev_diff[elev_diff > 0])) - wilderness_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) + elev_gain = float(np.sum(elev_diff[elev_diff > 0])) + elev_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) + # Trail stats trail_arr = np.array(trail_values) on_trail_cells = np.sum(trail_arr > 0) total_cells = len(trail_arr) on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - # Free trails and barriers - del trails, barriers + # Free memory + del mcp, cumulative_costs, traceback, cost, trails, barriers, elevation + gc.collect() - # Entry point - entry_lat = best_entry["entry_point"]["lat"] - entry_lon = best_entry["entry_point"]["lon"] - entry_class = best_entry["entry_point"]["highway_class"] - entry_name = best_entry["entry_point"].get("name", "") + return { + "status": "ok", + "coords": coords, + "stats": { + "distance_km": distance_m / 1000, + "effort_minutes": best_cost / 60, + "elevation_gain_m": elev_gain, + "elevation_loss_m": elev_loss, + "on_trail_pct": on_trail_pct, + "barrier_crossings": barrier_crossings, + "cell_count": total_cells, + }, + "entry_point": best_entry["entry_point"] + } - # Call Valhalla with USER'S SELECTED MODE (not foot) - valhalla_costing = MODE_TO_COSTING.get(mode, "pedestrian") + def _valhalla_route( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str + ) -> Dict: + """ + Call Valhalla for network routing. + + Returns: + {"segment": {...}, "error": None} on success + {"segment": None, "error": "..."} on failure + """ + costing = MODE_TO_COSTING.get(mode, "pedestrian") valhalla_request = { "locations": [ - {"lat": entry_lat, "lon": entry_lon}, + {"lat": start_lat, "lon": start_lon}, {"lat": end_lat, "lon": end_lon} ], - "costing": valhalla_costing, + "costing": costing, "directions_options": {"units": "kilometers"} } - network_segment = None - valhalla_error = None - try: resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) @@ -574,7 +1012,7 @@ class OffrouteRouter: if legs: leg = legs[0] shape = leg.get("shape", "") - network_coords = self._decode_polyline(shape) + coords = self._decode_polyline(shape) maneuvers = [] for m in leg.get("maneuvers", []): @@ -587,97 +1025,184 @@ class OffrouteRouter: }) summary = trip.get("summary", {}) - network_segment = { - "coordinates": network_coords, - "distance_km": summary.get("length", 0), - "duration_minutes": summary.get("time", 0) / 60, - "maneuvers": maneuvers, + return { + "segment": { + "coordinates": coords, + "distance_km": summary.get("length", 0), + "duration_minutes": summary.get("time", 0) / 60, + "maneuvers": maneuvers, + }, + "error": None } - else: - valhalla_error = f"Valhalla returned {resp.status_code}: {resp.text[:200]}" + + return {"segment": None, "error": f"Valhalla returned {resp.status_code}: {resp.text[:200]}"} except Exception as e: - valhalla_error = f"Valhalla request failed: {e}" + return {"segment": None, "error": f"Valhalla request failed: {e}"} - # Build response + def _build_response( + self, + wilderness_start: Optional[List], + wilderness_start_stats: Optional[Dict], + network_segment: Optional[Dict], + wilderness_end: Optional[List], + wilderness_end_stats: Optional[Dict], + mode: str, + boundary_mode: str, + entry_start: Optional[Dict], + entry_end: Optional[Dict], + scenario: str, + t0: float, + valhalla_error: Optional[str] + ) -> Dict: + """Build the final GeoJSON response.""" features = [] - wilderness_feature = { - "type": "Feature", - "properties": { - "segment_type": "wilderness", - "effort_minutes": float(best_cost / 60), - "distance_km": float(wilderness_distance_m / 1000), - "elevation_gain_m": wilderness_gain, - "elevation_loss_m": wilderness_loss, - "boundary_mode": boundary_mode, - "on_trail_pct": on_trail_pct, - "cell_count": total_cells, - "barrier_crossings": barrier_crossings, - "wilderness_mode": "foot", # Always foot for wilderness - }, - "geometry": {"type": "LineString", "coordinates": wilderness_coords} - } - features.append(wilderness_feature) + # Wilderness start segment + if wilderness_start and wilderness_start_stats: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "wilderness", + "segment_position": "start", + "effort_minutes": float(wilderness_start_stats["effort_minutes"]), + "distance_km": float(wilderness_start_stats["distance_km"]), + "elevation_gain_m": wilderness_start_stats["elevation_gain_m"], + "elevation_loss_m": wilderness_start_stats["elevation_loss_m"], + "boundary_mode": boundary_mode, + "on_trail_pct": wilderness_start_stats["on_trail_pct"], + "barrier_crossings": wilderness_start_stats["barrier_crossings"], + "wilderness_mode": "foot", + }, + "geometry": {"type": "LineString", "coordinates": wilderness_start} + }) + # Network segment if network_segment: - network_feature = { + features.append({ "type": "Feature", "properties": { "segment_type": "network", "distance_km": network_segment["distance_km"], "duration_minutes": network_segment["duration_minutes"], "maneuvers": network_segment["maneuvers"], - "network_mode": mode, # User's selected mode + "network_mode": mode, }, "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} - } - features.append(network_feature) + }) - combined_coords = wilderness_coords.copy() + # Wilderness end segment + if wilderness_end and wilderness_end_stats: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "wilderness", + "segment_position": "end", + "effort_minutes": float(wilderness_end_stats["effort_minutes"]), + "distance_km": float(wilderness_end_stats["distance_km"]), + "elevation_gain_m": wilderness_end_stats["elevation_gain_m"], + "elevation_loss_m": wilderness_end_stats["elevation_loss_m"], + "boundary_mode": boundary_mode, + "on_trail_pct": wilderness_end_stats["on_trail_pct"], + "barrier_crossings": wilderness_end_stats["barrier_crossings"], + "wilderness_mode": "foot", + }, + "geometry": {"type": "LineString", "coordinates": wilderness_end} + }) + + # Combined path + combined_coords = [] + if wilderness_start: + combined_coords.extend(wilderness_start) if network_segment: - combined_coords.extend(network_segment["coordinates"][1:]) + # Skip first coord if we already have wilderness_start (avoid duplicate) + start_idx = 1 if wilderness_start else 0 + combined_coords.extend(network_segment["coordinates"][start_idx:]) + if wilderness_end: + # Skip first coord (avoid duplicate with network end) + start_idx = 1 if (wilderness_start or network_segment) else 0 + combined_coords.extend(wilderness_end[start_idx:]) - combined_feature = { - "type": "Feature", - "properties": { - "segment_type": "combined", - "wilderness_mode": "foot", - "network_mode": mode, - "boundary_mode": boundary_mode - }, - "geometry": {"type": "LineString", "coordinates": combined_coords} - } - features.append(combined_feature) + if combined_coords: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "combined", + "wilderness_mode": "foot", + "network_mode": mode, + "boundary_mode": boundary_mode, + "scenario": scenario, + }, + "geometry": {"type": "LineString", "coordinates": combined_coords} + }) geojson = {"type": "FeatureCollection", "features": features} - total_distance_km = wilderness_distance_m / 1000 - total_effort_minutes = best_cost / 60 + # Calculate totals + total_distance_km = 0.0 + total_effort_minutes = 0.0 + wilderness_distance_km = 0.0 + wilderness_effort_minutes = 0.0 + network_distance_km = 0.0 + network_duration_minutes = 0.0 + barrier_crossings = 0 + on_trail_pct = 0.0 + + if wilderness_start_stats: + wilderness_distance_km += wilderness_start_stats["distance_km"] + wilderness_effort_minutes += wilderness_start_stats["effort_minutes"] + barrier_crossings += wilderness_start_stats["barrier_crossings"] + on_trail_pct = wilderness_start_stats["on_trail_pct"] + + if wilderness_end_stats: + wilderness_distance_km += wilderness_end_stats["distance_km"] + wilderness_effort_minutes += wilderness_end_stats["effort_minutes"] + barrier_crossings += wilderness_end_stats["barrier_crossings"] + # Average on-trail percentage if we have both + if wilderness_start_stats: + on_trail_pct = (on_trail_pct + wilderness_end_stats["on_trail_pct"]) / 2 + else: + on_trail_pct = wilderness_end_stats["on_trail_pct"] if network_segment: - total_distance_km += network_segment["distance_km"] - total_effort_minutes += network_segment["duration_minutes"] + network_distance_km = network_segment["distance_km"] + network_duration_minutes = network_segment["duration_minutes"] + + total_distance_km = wilderness_distance_km + network_distance_km + total_effort_minutes = wilderness_effort_minutes + network_duration_minutes summary = { "total_distance_km": float(total_distance_km), "total_effort_minutes": float(total_effort_minutes), - "wilderness_distance_km": float(wilderness_distance_m / 1000), - "wilderness_effort_minutes": float(best_cost / 60), - "network_distance_km": float(network_segment["distance_km"]) if network_segment else 0, - "network_duration_minutes": float(network_segment["duration_minutes"]) if network_segment else 0, - "on_trail_pct": on_trail_pct, + "wilderness_distance_km": float(wilderness_distance_km), + "wilderness_effort_minutes": float(wilderness_effort_minutes), + "network_distance_km": float(network_distance_km), + "network_duration_minutes": float(network_duration_minutes), + "on_trail_pct": float(on_trail_pct), "barrier_crossings": barrier_crossings, "boundary_mode": boundary_mode, - "wilderness_mode": "foot", # Always foot - "network_mode": mode, # User's selection - "entry_point": { - "lat": entry_lat, "lon": entry_lon, - "highway_class": entry_class, "name": entry_name, - }, + "wilderness_mode": "foot", + "network_mode": mode, + "scenario": scenario, "computation_time_s": time.time() - t0, } + if entry_start: + summary["entry_point_start"] = { + "lat": entry_start["lat"], + "lon": entry_start["lon"], + "highway_class": entry_start["highway_class"], + "name": entry_start.get("name", ""), + } + + if entry_end: + summary["entry_point_end"] = { + "lat": entry_end["lat"], + "lon": entry_end["lon"], + "highway_class": entry_end["highway_class"], + "name": entry_end.get("name", ""), + } + result = {"status": "ok", "route": geojson, "summary": summary} if valhalla_error: @@ -753,28 +1278,46 @@ if __name__ == "__main__": print(f"\nDone. Total entry points: {stats['total']}") elif len(sys.argv) > 1 and sys.argv[1] == "test": - print("Testing router (all modes)...") - print("NOTE: Wilderness always uses foot mode. User mode affects entry points + network.") + print("Testing router (all scenarios)...") + print("=" * 60) router = OffrouteRouter() - for mode in ["foot", "mtb", "atv", "vehicle"]: - print(f"\n{'='*60}") - print(f"Mode: {mode}") - print("="*60) + # Test points + wilderness_start = (44.0543, -115.4237) # Off-network + wilderness_end = (45.2, -115.5) # Deep wilderness (Frank Church) + road_start = (43.6150, -116.2023) # Boise downtown (on-network) + road_end = (43.5867, -116.5625) # Nampa (on-network) + + tests = [ + ("A: wilderness→road", wilderness_start, (44.0814, -115.5021)), + ("B: wilderness→wilderness", wilderness_start, wilderness_end), + ("C: road→wilderness", road_start, wilderness_start), + ("D: road→road", road_start, road_end), + ] + + for label, (slat, slon), (elat, elon) in tests: + print(f"\n{label}") + print("-" * 40) result = router.route( - start_lat=42.35, start_lon=-114.30, - end_lat=42.5629, end_lon=-114.4609, - mode=mode, boundary_mode="pragmatic" + start_lat=slat, start_lon=slon, + end_lat=elat, end_lon=elon, + mode="foot", boundary_mode="pragmatic" ) if result["status"] == "ok": s = result["summary"] - print(f" Wilderness: {s['wilderness_distance_km']:.2f} km, {s['wilderness_effort_minutes']:.1f} min (foot)") - print(f" Network: {s['network_distance_km']:.2f} km, {s['network_duration_minutes']:.1f} min ({mode})") - print(f" On-trail: {s['on_trail_pct']:.1f}%") - print(f" Entry: {s['entry_point']['highway_class']}") + print(f" Scenario: {s.get('scenario', '?')}") + print(f" Total: {s['total_distance_km']:.2f} km, {s['total_effort_minutes']:.1f} min") + print(f" Wilderness: {s['wilderness_distance_km']:.2f} km") + print(f" Network: {s['network_distance_km']:.2f} km") + if s.get('entry_point_start'): + ep = s['entry_point_start'] + print(f" Entry (start): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") + if s.get('entry_point_end'): + ep = s['entry_point_end'] + print(f" Entry (end): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") else: print(f" ERROR: {result['message']}") @@ -783,4 +1326,4 @@ if __name__ == "__main__": else: print("Usage:") print(" python router.py build # Build entry point index") - print(" python router.py test # Test all modes") + print(" python router.py test # Test all scenarios") From 87a4741b8d4ebecfc276256d7f73d4481bd8a216 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 21:19:04 +0000 Subject: [PATCH 43/72] =?UTF-8?q?offroute:=20raise=20bbox=20limit=20to=202?= =?UTF-8?q?.0=C2=B0=20(~220km=20coverage)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index 79c9089..ccc956d 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -804,7 +804,7 @@ class OffrouteRouter: """ # Build bbox - only include origin and entry points, NOT distant destination # The destination is handled by Valhalla, wilderness only needs to reach entry points - MAX_BBOX_DEGREES = 0.5 + MAX_BBOX_DEGREES = 2.0 all_lats = [origin_lat] + [p["lat"] for p in entry_points] all_lons = [origin_lon] + [p["lon"] for p in entry_points] From cf758476b4d44be559664a2e92211566dd7d92aa Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 21:55:31 +0000 Subject: [PATCH 44/72] offroute: add auto mode for standard driving routes Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index ccc956d..6981e89 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -53,7 +53,8 @@ MEMORY_LIMIT_GB = 12 OFF_NETWORK_THRESHOLD_M = 500 # Mode to Valhalla costing mapping -MODE_TO_COSTING = { +MODE_TO_COSTING = { + "auto": "auto", "foot": "pedestrian", "mtb": "bicycle", "atv": "auto", @@ -62,7 +63,9 @@ MODE_TO_COSTING = { # Mode to valid entry point highway classes # foot = any trail/track/road, mtb = tracks and roads, vehicle = roads only -MODE_TO_VALID_HIGHWAYS = { +MODE_TO_VALID_HIGHWAYS = { + "auto": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service"}, "foot": {"primary", "secondary", "tertiary", "unclassified", "residential", "service", "track", "path", "footway", "bridleway"}, "mtb": {"primary", "secondary", "tertiary", "unclassified", "residential", From 686b35710a55b369d8eafe6f554f58e795a36289 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 22:37:49 +0000 Subject: [PATCH 45/72] api: add auto mode to offroute endpoint validation Co-Authored-By: Claude Opus 4.5 --- lib/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api.py b/lib/api.py index 699d09d..949a0cc 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2768,8 +2768,8 @@ def api_offroute(): # Parse options mode = data.get("mode", "foot") - if mode not in ("foot", "mtb", "atv", "vehicle"): - return jsonify({"status": "error", "message": "mode must be foot, mtb, atv, or vehicle"}), 400 + if mode not in ("auto", "foot", "mtb", "atv", "vehicle"): + return jsonify({"status": "error", "message": "mode must be auto, foot, mtb, atv, or vehicle"}), 400 boundary_mode = data.get("boundary_mode", "pragmatic") if boundary_mode not in ("strict", "pragmatic", "emergency"): From 05c24f95f635bc856894f8f869448716042124ab Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 8 May 2026 23:27:06 +0000 Subject: [PATCH 46/72] offroute: tighten off-network threshold to 10m Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index 6981e89..0cc3ccd 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -50,7 +50,7 @@ EXPANDED_SEARCH_RADIUS_KM = 100 MEMORY_LIMIT_GB = 12 # Off-network detection threshold (meters) -OFF_NETWORK_THRESHOLD_M = 500 +OFF_NETWORK_THRESHOLD_M = 10 # Mode to Valhalla costing mapping MODE_TO_COSTING = { From b4e33eb0484c99bcf53d76e71a92a1d3dd43ab40 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 9 May 2026 03:28:58 +0000 Subject: [PATCH 47/72] offroute: PostGIS entry points with 100m densification and land_status tagging - Migrate EntryPointIndex from SQLite to PostGIS (padus database) - Densify highway LineStrings at 100m intervals via Shapely interpolate - 2.94M entry points from 476k lines (4x more coverage) - Tag each entry point with land_status via ST_Intersects against padus_sub - 1.64M public (56%), 1.30M unknown (44%) - Add geography GIST index for fast radius queries (~25ms) - Increase OFF_NETWORK_THRESHOLD_M from 10m to 50m for GPS accuracy - PBF path and PostGIS DSN configurable via home.yaml Co-Authored-By: Claude Opus 4.5 --- config/profiles/home.yaml | 17 +- lib/offroute/router.py | 2799 +++++++++++++++++++------------------ 2 files changed, 1483 insertions(+), 1333 deletions(-) diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 5269812..474ffb2 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -6,13 +6,13 @@ profile: home region_name: "North America" tileset: - url: "/tiles/na.pmtiles" + url: "/tiles/planet/current.pmtiles" bounds: [-168, 14, -52, 72] max_zoom: 15 attribution: "Protomaps © OSM" tileset_hillshade: - url: "/tiles/hillshade-na.pmtiles" + url: "/tiles/planet-dem.pmtiles" encoding: "terrarium" max_zoom: 12 @@ -33,14 +33,14 @@ services: features: has_nominatim_details: true - has_kiwix_wiki: false + has_kiwix_wiki: true has_hillshade: true has_3d_terrain: false has_traffic_overlay: true has_landclass: true has_public_lands_layer: true has_contours: true - has_contours_test: true + has_contours_test: false has_contours_test_10ft: false has_address_book_write: false has_overture_enrichment: true @@ -48,7 +48,16 @@ features: has_contacts: true has_wiki_rewriting: true has_wiki_discovery: false + has_usfs_trails: true + has_blm_trails: true defaults: center: [42.5736, -114.6066] zoom: 10 + +# Offroute wilderness routing +offroute: + osm_pbf_path: "/mnt/nav/sources/idaho-latest.osm.pbf" + densify_interval_m: 100 + postgis_dsn: "dbname=padus" + diff --git a/lib/offroute/router.py b/lib/offroute/router.py index 0cc3ccd..4b988ab 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -1,1332 +1,1473 @@ -""" -OFFROUTE Router — Bidirectional wilderness-to-network path orchestration. - -Supports four routing scenarios: - A: off-network start → on-network end (wilderness then Valhalla) - B: off-network start → off-network end (wilderness, Valhalla, wilderness) - C: on-network start → off-network end (Valhalla then wilderness) - D: on-network start → on-network end (pure Valhalla passthrough) - -Off-network detection: Valhalla /locate snap distance > 500m = off-network. - -IMPORTANT: The wilderness segment ALWAYS uses foot mode for pathfinding. -The user's selected mode affects: - 1. Which entry points are valid (foot=any, mtb=tracks+roads, vehicle=roads only) - 2. The Valhalla costing profile for the network segment -""" -import gc -import json -import math -import sqlite3 -import subprocess -import tempfile -import time -from pathlib import Path -from typing import Dict, List, Optional, Tuple, Literal - -import numpy as np -import requests -from skimage.graph import MCP_Geometric - -from .dem import DEMReader -from .cost import compute_cost_grid -from .friction import FrictionReader, friction_to_multiplier -from .barriers import BarrierReader, WildernessReader, DEFAULT_WILDERNESS_PATH -from .trails import TrailReader -from .mvum import get_mvum_access_grid - -# Paths -NAVI_DB_PATH = Path("/mnt/nav/navi.db") -OSM_PBF_PATH = Path("/mnt/nav/sources/idaho-latest.osm.pbf") - -# Valhalla endpoint -VALHALLA_URL = "http://localhost:8002" - -# Search radius for entry points (km) -DEFAULT_SEARCH_RADIUS_KM = 50 -EXPANDED_SEARCH_RADIUS_KM = 100 - -# Memory limit -MEMORY_LIMIT_GB = 12 - -# Off-network detection threshold (meters) -OFF_NETWORK_THRESHOLD_M = 10 - -# Mode to Valhalla costing mapping +""" +OFFROUTE Router — Bidirectional wilderness-to-network path orchestration. + +Supports four routing scenarios: + A: off-network start → on-network end (wilderness then Valhalla) + B: off-network start → off-network end (wilderness, Valhalla, wilderness) + C: on-network start → off-network end (Valhalla then wilderness) + D: on-network start → on-network end (pure Valhalla passthrough) + +Off-network detection: Valhalla /locate snap distance > 500m = off-network. + +IMPORTANT: The wilderness segment ALWAYS uses foot mode for pathfinding. +The user's selected mode affects: + 1. Which entry points are valid (foot=any, mtb=tracks+roads, vehicle=roads only) + 2. The Valhalla costing profile for the network segment +""" +import gc +import json +import math +import subprocess +import tempfile +import time +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Literal, Set + +import numpy as np +import requests +import psycopg2 +import psycopg2.extras +from shapely.geometry import LineString +from skimage.graph import MCP_Geometric + +from .dem import DEMReader +from .cost import compute_cost_grid +from .friction import FrictionReader, friction_to_multiplier +from .barriers import BarrierReader, WildernessReader, DEFAULT_WILDERNESS_PATH +from .trails import TrailReader +from .mvum import get_mvum_access_grid +from ..deployment_config import get_deployment_config + +# Load configuration +_deploy_config = get_deployment_config() +_offroute_config = _deploy_config.get("offroute", {}) + +# Paths (configurable via home.yaml) +OSM_PBF_PATH = Path(_offroute_config.get("osm_pbf_path", "/mnt/nav/sources/idaho-latest.osm.pbf")) +DENSIFY_INTERVAL_M = _offroute_config.get("densify_interval_m", 100) +POSTGIS_DSN = _offroute_config.get("postgis_dsn", "dbname=padus user=postgres") + +# Legacy SQLite path (still used by MVUM) +NAVI_DB_PATH = Path("/mnt/nav/navi.db") + +# Valhalla endpoint +VALHALLA_URL = "http://localhost:8002" + +# Search radius for entry points (km) +DEFAULT_SEARCH_RADIUS_KM = 50 +EXPANDED_SEARCH_RADIUS_KM = 100 + +# Memory limit +MEMORY_LIMIT_GB = 12 + +# Off-network detection threshold (meters) +OFF_NETWORK_THRESHOLD_M = 50 + +# Mode to Valhalla costing mapping MODE_TO_COSTING = { - "auto": "auto", - "foot": "pedestrian", - "mtb": "bicycle", - "atv": "auto", - "vehicle": "auto", -} - -# Mode to valid entry point highway classes -# foot = any trail/track/road, mtb = tracks and roads, vehicle = roads only + "auto": "auto", + "foot": "pedestrian", + "mtb": "bicycle", + "atv": "auto", + "vehicle": "auto", +} + +# Mode to valid entry point highway classes +# foot = any trail/track/road, mtb = tracks and roads, vehicle = roads only MODE_TO_VALID_HIGHWAYS = { "auto": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service"}, - "foot": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track", "path", "footway", "bridleway"}, - "mtb": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track"}, - "atv": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track"}, - "vehicle": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service"}, -} - - -def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: - """Calculate distance between two points in meters.""" - R = 6371000 - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2 - c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) - return R * c - - -def check_memory_usage() -> float: - """Check current memory usage in GB.""" - try: - import psutil - process = psutil.Process() - return process.memory_info().rss / (1024**3) - except ImportError: - return 0 - - -class EntryPointIndex: - """ - Trail entry point index for wilderness-to-network handoff. - - Entry points are endpoints and intersections of OSM highways - that connect wilderness areas to the routable network. - """ - - def __init__(self, db_path: Path = NAVI_DB_PATH): - self.db_path = db_path - self._conn = None - - def _get_conn(self) -> sqlite3.Connection: - if self._conn is None: - self._conn = sqlite3.connect(str(self.db_path)) - self._conn.row_factory = sqlite3.Row - return self._conn - - def table_exists(self) -> bool: - """Check if trail_entry_points table exists.""" - if not self.db_path.exists(): - return False - conn = self._get_conn() - cur = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='trail_entry_points'" - ) - return cur.fetchone() is not None - - def get_entry_point_count(self) -> int: - """Get count of entry points.""" - if not self.table_exists(): - return 0 - conn = self._get_conn() - cur = conn.execute("SELECT COUNT(*) FROM trail_entry_points") - return cur.fetchone()[0] - - def query_bbox(self, south: float, north: float, west: float, east: float) -> List[Dict]: - """Query entry points within a bounding box.""" - if not self.table_exists(): - return [] - - conn = self._get_conn() - cur = conn.execute(""" - SELECT id, lat, lon, highway_class, name - FROM trail_entry_points - WHERE lat >= ? AND lat <= ? AND lon >= ? AND lon <= ? - """, (south, north, west, east)) - - return [dict(row) for row in cur.fetchall()] - - def query_radius(self, lat: float, lon: float, radius_km: float, - valid_highways: Optional[set] = None) -> List[Dict]: - """ - Query entry points within radius of a point. - - Args: - lat, lon: Center point - radius_km: Search radius in kilometers - valid_highways: Optional set of valid highway classes to filter by - """ - lat_delta = radius_km / 111.0 - lon_delta = radius_km / (111.0 * math.cos(math.radians(lat))) - - points = self.query_bbox( - lat - lat_delta, lat + lat_delta, - lon - lon_delta, lon + lon_delta - ) - - result = [] - for p in points: - # Filter by highway class if specified - if valid_highways and p['highway_class'] not in valid_highways: - continue - - dist = haversine_distance(lat, lon, p['lat'], p['lon']) - if dist <= radius_km * 1000: - p['distance_m'] = dist - result.append(p) - - return sorted(result, key=lambda x: x['distance_m']) - - def build_index(self, osm_pbf_path: Path = OSM_PBF_PATH) -> Dict: - """Build the entry point index from OSM PBF.""" - if not osm_pbf_path.exists(): - raise FileNotFoundError(f"OSM PBF not found: {osm_pbf_path}") - - print(f"Building trail entry point index from {osm_pbf_path}...") - - highway_types = [ - "primary", "secondary", "tertiary", "unclassified", - "residential", "service", "track", "path", "footway", "bridleway" - ] - - stats = {"total": 0, "by_class": {}} - - with tempfile.TemporaryDirectory() as tmpdir: - geojson_path = Path(tmpdir) / "highways.geojson" - - print(f" Extracting highways with osmium...") - cmd = ["osmium", "tags-filter", str(osm_pbf_path)] - for ht in highway_types: - cmd.append(f"w/highway={ht}") - cmd.extend(["-o", str(Path(tmpdir) / "filtered.osm.pbf"), "--overwrite"]) - subprocess.run(cmd, check=True, capture_output=True) - - print(f" Converting to GeoJSON with ogr2ogr...") - cmd = [ - "ogr2ogr", "-f", "GeoJSON", - str(geojson_path), - str(Path(tmpdir) / "filtered.osm.pbf"), - "lines", "-t_srs", "EPSG:4326" - ] - subprocess.run(cmd, check=True, capture_output=True) - - print(f" Extracting entry points...") - with open(geojson_path) as f: - data = json.load(f) - - points = {} - for feature in data.get("features", []): - props = feature.get("properties", {}) - geom = feature.get("geometry", {}) - - if geom.get("type") != "LineString": - continue - - coords = geom.get("coordinates", []) - if len(coords) < 2: - continue - - highway_class = props.get("highway", "unknown") - name = props.get("name", "") - - for coord in [coords[0], coords[-1]]: - lon, lat = coord[0], coord[1] - key = (round(lat, 5), round(lon, 5)) - - if key not in points: - points[key] = { - "lat": lat, "lon": lon, - "highway_class": highway_class, "name": name - } - else: - existing = points[key] - if self._highway_priority(highway_class) < self._highway_priority(existing["highway_class"]): - points[key]["highway_class"] = highway_class - if name and not existing["name"]: - points[key]["name"] = name - - print(f" Writing {len(points)} entry points to {self.db_path}...") - - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_conn() - - conn.execute(""" - CREATE TABLE IF NOT EXISTS trail_entry_points ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - lat REAL NOT NULL, lon REAL NOT NULL, - highway_class TEXT NOT NULL, name TEXT - ) - """) - conn.execute("DELETE FROM trail_entry_points") - - for point in points.values(): - conn.execute( - "INSERT INTO trail_entry_points (lat, lon, highway_class, name) VALUES (?, ?, ?, ?)", - (point["lat"], point["lon"], point["highway_class"], point["name"]) - ) - stats["total"] += 1 - hc = point["highway_class"] - stats["by_class"][hc] = stats["by_class"].get(hc, 0) + 1 - - conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lat ON trail_entry_points(lat)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_lon ON trail_entry_points(lon)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_entry_latlon ON trail_entry_points(lat, lon)") - conn.commit() - - print(f" Done. Total: {stats['total']} entry points") - for hc, count in sorted(stats["by_class"].items(), key=lambda x: -x[1]): - print(f" {hc}: {count}") - - return stats - - def _highway_priority(self, highway_class: str) -> int: - """Lower number = better priority for entry points.""" - priority = { - "primary": 1, "secondary": 2, "tertiary": 3, - "unclassified": 4, "residential": 5, "service": 6, - "track": 7, "path": 8, "footway": 9, "bridleway": 10 - } - return priority.get(highway_class, 99) - - def close(self): - if self._conn: - self._conn.close() - self._conn = None - - -class OffrouteRouter: - """ - OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. - - Supports four scenarios: - A: off-network start → on-network end - B: off-network start → off-network end - C: on-network start → off-network end - D: on-network start → on-network end (pure Valhalla) - - IMPORTANT: Wilderness segment ALWAYS uses foot mode for pathfinding. - User's mode affects entry point selection and Valhalla costing only. - """ - - def __init__(self): - self.dem_reader = None - self.friction_reader = None - self.barrier_reader = None - self.wilderness_reader = None - self.trail_reader = None - self.entry_index = EntryPointIndex() - - def _init_readers(self): - """Lazy init readers.""" - if self.dem_reader is None: - self.dem_reader = DEMReader() - if self.friction_reader is None: - self.friction_reader = FrictionReader() - if self.barrier_reader is None: - self.barrier_reader = BarrierReader() - if self.wilderness_reader is None and DEFAULT_WILDERNESS_PATH.exists(): - self.wilderness_reader = WildernessReader() - if self.trail_reader is None: - self.trail_reader = TrailReader() - - def _locate_on_network(self, lat: float, lon: float, mode: str) -> Dict: - """ - Check if a point is on the routable network using Valhalla's /locate. - - Returns: - { - "on_network": bool, - "snap_distance_m": float, - "snapped_lat": float, - "snapped_lon": float - } - """ - costing = MODE_TO_COSTING.get(mode, "pedestrian") - try: - resp = requests.post( - f"{VALHALLA_URL}/locate", - json={"locations": [{"lat": lat, "lon": lon}], "costing": costing}, - timeout=10 - ) - - if resp.status_code == 200: - data = resp.json() - if data and len(data) > 0 and data[0].get("edges"): - edge = data[0]["edges"][0] - snap_lat = edge.get("correlated_lat", lat) - snap_lon = edge.get("correlated_lon", lon) - snap_dist = haversine_distance(lat, lon, snap_lat, snap_lon) - return { - "on_network": snap_dist <= OFF_NETWORK_THRESHOLD_M, - "snap_distance_m": snap_dist, - "snapped_lat": snap_lat, - "snapped_lon": snap_lon - } - except Exception: - pass - - return { - "on_network": False, - "snap_distance_m": float('inf'), - "snapped_lat": lat, - "snapped_lon": lon - } - - def route( - self, - start_lat: float, - start_lon: float, - end_lat: float, - end_lon: float, - mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot", - boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" - ) -> Dict: - """ - Route between two points, handling all four scenarios. - - Scenarios: - A: off-network start → on-network end (wilderness then network) - B: off-network start → off-network end (wilderness, network, wilderness) - C: on-network start → off-network end (network then wilderness) - D: on-network start → on-network end (pure network) - - Args: - start_lat, start_lon: Starting coordinates - end_lat, end_lon: Destination coordinates - mode: Travel mode (foot, mtb, atv, vehicle) - boundary_mode: How to handle private land (strict, pragmatic, emergency) - - Returns a GeoJSON FeatureCollection with route segments. - """ - if mode not in MODE_TO_COSTING: - return {"status": "error", "message": f"Unknown mode: {mode}"} - - # Detect network status for both endpoints - start_status = self._locate_on_network(start_lat, start_lon, mode) - end_status = self._locate_on_network(end_lat, end_lon, mode) - - start_off_network = not start_status["on_network"] - end_off_network = not end_status["on_network"] - - # Dispatch to appropriate handler - if not start_off_network and not end_off_network: - # Scenario D: on-network → on-network (pure Valhalla) - return self._route_D_network_only( - start_lat, start_lon, end_lat, end_lon, mode - ) - elif not start_off_network and end_off_network: - # Scenario C: on-network → off-network - return self._route_C_network_to_wilderness( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - elif start_off_network and not end_off_network: - # Scenario A: off-network → on-network - return self._route_A_wilderness_to_network( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - else: - # Scenario B: off-network → off-network - return self._route_B_wilderness_both( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - - def _route_D_network_only( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str - ) -> Dict: - """ - Scenario D: Both endpoints on-network. Pure Valhalla routing. - """ - t0 = time.time() - costing = MODE_TO_COSTING.get(mode, "pedestrian") - - valhalla_request = { - "locations": [ - {"lat": start_lat, "lon": start_lon}, - {"lat": end_lat, "lon": end_lon} - ], - "costing": costing, - "directions_options": {"units": "kilometers"} - } - - try: - resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) - - if resp.status_code != 200: - return { - "status": "error", - "message": f"Network routing failed: {resp.text[:200]}" - } - - valhalla_data = resp.json() - trip = valhalla_data.get("trip", {}) - legs = trip.get("legs", []) - - if not legs: - return {"status": "error", "message": "No route found"} - - leg = legs[0] - shape = leg.get("shape", "") - network_coords = self._decode_polyline(shape) - - maneuvers = [] - for m in leg.get("maneuvers", []): - maneuvers.append({ - "instruction": m.get("instruction", ""), - "type": m.get("type", 0), - "distance_km": m.get("length", 0), - "time_seconds": m.get("time", 0), - "street_names": m.get("street_names", []), - }) - - summary = trip.get("summary", {}) - distance_km = summary.get("length", 0) - duration_min = summary.get("time", 0) / 60 - - # Build response in same format as wilderness routes - network_feature = { - "type": "Feature", - "properties": { - "segment_type": "network", - "distance_km": distance_km, - "duration_minutes": duration_min, - "maneuvers": maneuvers, - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_coords} - } - - combined_feature = { - "type": "Feature", - "properties": { - "segment_type": "combined", - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_coords} - } - - geojson = {"type": "FeatureCollection", "features": [network_feature, combined_feature]} - - result = { - "status": "ok", - "route": geojson, - "summary": { - "total_distance_km": float(distance_km), - "total_effort_minutes": float(duration_min), - "wilderness_distance_km": 0.0, - "wilderness_effort_minutes": 0.0, - "network_distance_km": float(distance_km), - "network_duration_minutes": float(duration_min), - "on_trail_pct": 100.0, - "barrier_crossings": 0, - "network_mode": mode, - "scenario": "D", - "computation_time_s": time.time() - t0, - } - } - return result - - except Exception as e: - return {"status": "error", "message": f"Network routing failed: {e}"} - - def _route_A_wilderness_to_network( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario A: Off-network start → on-network end. - Wilderness pathfinding from start to entry point, then Valhalla to end. - """ - t0 = time.time() - - # Ensure entry point index exists - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - # Get valid highway classes for this mode - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - - # Find entry points near start, filtered by mode - MAX_ENTRY_POINTS = 10 - entry_points = self.entry_index.query_radius( - start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - - if not entry_points: - entry_points = self.entry_index.query_radius( - start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points: - if mode == "vehicle": - msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try a different mode." - elif mode in ("mtb", "atv"): - msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try foot mode." - else: - msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start." - return {"status": "error", "message": msg} - - entry_points = entry_points[:MAX_ENTRY_POINTS] - - # Run wilderness pathfinding - wilderness_result = self._pathfind_wilderness( - start_lat, start_lon, end_lat, end_lon, - entry_points, boundary_mode, "start" - ) - - if wilderness_result.get("status") == "error": - return wilderness_result - - # Extract results - wilderness_coords = wilderness_result["coords"] - wilderness_stats = wilderness_result["stats"] - best_entry = wilderness_result["entry_point"] - - entry_lat = best_entry["lat"] - entry_lon = best_entry["lon"] - - # Call Valhalla from entry point to destination - network_result = self._valhalla_route(entry_lat, entry_lon, end_lat, end_lon, mode) - - # Build response - return self._build_response( - wilderness_start=wilderness_coords, - wilderness_start_stats=wilderness_stats, - network_segment=network_result.get("segment"), - wilderness_end=None, - wilderness_end_stats=None, - mode=mode, - boundary_mode=boundary_mode, - entry_start=best_entry, - entry_end=None, - scenario="A", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _route_C_network_to_wilderness( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario C: On-network start → off-network end. - Valhalla from start to entry point, then wilderness pathfinding to end. - """ - t0 = time.time() - - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - - # Find entry points near END (destination) - MAX_ENTRY_POINTS = 10 - entry_points = self.entry_index.query_radius( - end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - - if not entry_points: - entry_points = self.entry_index.query_radius( - end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points: - if mode == "vehicle": - msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try a different mode." - elif mode in ("mtb", "atv"): - msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try foot mode." - else: - msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of destination." - return {"status": "error", "message": msg} - - entry_points = entry_points[:MAX_ENTRY_POINTS] - - # Run wilderness pathfinding FROM END toward entry points - wilderness_result = self._pathfind_wilderness( - end_lat, end_lon, start_lat, start_lon, - entry_points, boundary_mode, "end" - ) - - if wilderness_result.get("status") == "error": - return wilderness_result - - # The path is from end→entry, reverse it for display (entry→end) - wilderness_coords = list(reversed(wilderness_result["coords"])) - wilderness_stats = wilderness_result["stats"] - best_entry = wilderness_result["entry_point"] - - entry_lat = best_entry["lat"] - entry_lon = best_entry["lon"] - - # Call Valhalla from start to entry point - network_result = self._valhalla_route(start_lat, start_lon, entry_lat, entry_lon, mode) - - # Build response (network first, then wilderness) - return self._build_response( - wilderness_start=None, - wilderness_start_stats=None, - network_segment=network_result.get("segment"), - wilderness_end=wilderness_coords, - wilderness_end_stats=wilderness_stats, - mode=mode, - boundary_mode=boundary_mode, - entry_start=None, - entry_end=best_entry, - scenario="C", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _route_B_wilderness_both( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario B: Off-network start → off-network end. - Wilderness from start to entry_A, Valhalla entry_A to entry_B, wilderness from entry_B to end. - """ - t0 = time.time() - - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - MAX_ENTRY_POINTS = 10 - - # Find entry points near START - entry_points_start = self.entry_index.query_radius( - start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_start: - entry_points_start = self.entry_index.query_radius( - start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_start: - return {"status": "error", "message": f"No entry points found near start within {EXPANDED_SEARCH_RADIUS_KM}km."} - entry_points_start = entry_points_start[:MAX_ENTRY_POINTS] - - # Find entry points near END - entry_points_end = self.entry_index.query_radius( - end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_end: - entry_points_end = self.entry_index.query_radius( - end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_end: - return {"status": "error", "message": f"No entry points found near destination within {EXPANDED_SEARCH_RADIUS_KM}km."} - entry_points_end = entry_points_end[:MAX_ENTRY_POINTS] - - # Phase 1: Wilderness pathfinding from START - wilderness_start_result = self._pathfind_wilderness( - start_lat, start_lon, end_lat, end_lon, - entry_points_start, boundary_mode, "start" - ) - - if wilderness_start_result.get("status") == "error": - return wilderness_start_result - - wilderness_start_coords = wilderness_start_result["coords"] - wilderness_start_stats = wilderness_start_result["stats"] - entry_A = wilderness_start_result["entry_point"] - - # Phase 2: Wilderness pathfinding from END (run after freeing phase 1 memory) - wilderness_end_result = self._pathfind_wilderness( - end_lat, end_lon, start_lat, start_lon, - entry_points_end, boundary_mode, "end" - ) - - if wilderness_end_result.get("status") == "error": - return wilderness_end_result - - # Reverse the end wilderness path (it's end→entry, we want entry→end for display) - wilderness_end_coords = list(reversed(wilderness_end_result["coords"])) - wilderness_end_stats = wilderness_end_result["stats"] - entry_B = wilderness_end_result["entry_point"] - - # Phase 3: Valhalla from entry_A to entry_B - network_result = self._valhalla_route( - entry_A["lat"], entry_A["lon"], - entry_B["lat"], entry_B["lon"], - mode - ) - - # Build response - return self._build_response( - wilderness_start=wilderness_start_coords, - wilderness_start_stats=wilderness_start_stats, - network_segment=network_result.get("segment"), - wilderness_end=wilderness_end_coords, - wilderness_end_stats=wilderness_end_stats, - mode=mode, - boundary_mode=boundary_mode, - entry_start=entry_A, - entry_end=entry_B, - scenario="B", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _pathfind_wilderness( - self, - origin_lat: float, origin_lon: float, - dest_lat: float, dest_lon: float, - entry_points: List[Dict], - boundary_mode: str, - label: str - ) -> Dict: - """ - Run MCP wilderness pathfinding from origin toward entry points. - - Args: - origin_lat, origin_lon: Starting point for pathfinding - dest_lat, dest_lon: Ultimate destination (for bbox calculation) - entry_points: List of candidate entry points - boundary_mode: How to handle barriers - label: "start" or "end" for error messages - - Returns: - {"status": "ok", "coords": [...], "stats": {...}, "entry_point": {...}} - or {"status": "error", "message": "..."} - """ - # Build bbox - only include origin and entry points, NOT distant destination - # The destination is handled by Valhalla, wilderness only needs to reach entry points - MAX_BBOX_DEGREES = 2.0 - all_lats = [origin_lat] + [p["lat"] for p in entry_points] - all_lons = [origin_lon] + [p["lon"] for p in entry_points] - - padding = 0.05 - bbox = { - "south": min(all_lats) - padding, - "north": max(all_lats) + padding, - "west": min(all_lons) - padding, - "east": max(all_lons) + padding, - } - - # Clamp bbox size, centering on origin - lat_span = bbox["north"] - bbox["south"] - lon_span = bbox["east"] - bbox["west"] - if lat_span > MAX_BBOX_DEGREES or lon_span > MAX_BBOX_DEGREES: - half_span = MAX_BBOX_DEGREES / 2 - bbox = { - "south": origin_lat - half_span, - "north": origin_lat + half_span, - "west": origin_lon - half_span, - "east": origin_lon + half_span, - } - - # Initialize readers - self._init_readers() - - # Load elevation - try: - elevation, meta = self.dem_reader.get_elevation_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - ) - except Exception as e: - return {"status": "error", "message": f"Failed to load elevation for {label}: {e}"} - - # Check memory - mem = check_memory_usage() - if mem > MEMORY_LIMIT_GB: - return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} - - # Load friction - friction_raw = self.friction_reader.get_friction_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - friction_mult = friction_to_multiplier(friction_raw) - - # Load barriers - barriers = self.barrier_reader.get_barrier_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - - # Load trails - trails = self.trail_reader.get_trails_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - - # Compute cost grid (ALWAYS foot mode for wilderness) - cost = compute_cost_grid( - elevation, - cell_size_m=meta["cell_size_m"], - friction=friction_mult, - friction_raw=friction_raw, - trails=trails, - barriers=barriers, - wilderness=None, - mvum=None, - boundary_mode=boundary_mode, - mode="foot", - ) - - # Free intermediate arrays - del friction_mult, friction_raw - gc.collect() - - # Convert origin to pixel coordinates - origin_row, origin_col = self.dem_reader.latlon_to_pixel(origin_lat, origin_lon, meta) - - rows, cols = elevation.shape - if not (0 <= origin_row < rows and 0 <= origin_col < cols): - return {"status": "error", "message": f"{label.capitalize()} point outside grid bounds"} - - # Map entry points to pixels - entry_pixels = [] - for ep in entry_points: - row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) - if 0 <= row < rows and 0 <= col < cols: - entry_pixels.append({"row": row, "col": col, "entry_point": ep}) - - if not entry_pixels: - return {"status": "error", "message": f"No entry points map to grid bounds for {label}"} - - # Run MCP - mcp = MCP_Geometric(cost, fully_connected=True) - cumulative_costs, traceback = mcp.find_costs([(origin_row, origin_col)]) - - # Find nearest reachable entry point - best_entry = None - best_cost = np.inf - - for ep in entry_pixels: - ep_cost = cumulative_costs[ep["row"], ep["col"]] - if ep_cost < best_cost: - best_cost = ep_cost - best_entry = ep - - if best_entry is None or np.isinf(best_cost): - return { - "status": "error", - "message": f"No path found from {label} to any entry point (blocked by impassable terrain)" - } - - # Traceback path - path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) - - # Convert to coordinates and collect stats - coords = [] - elevations = [] - trail_values = [] - barrier_crossings = 0 - - for row, col in path_indices: - lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) - coords.append([lon, lat]) - elevations.append(elevation[row, col]) - trail_values.append(trails[row, col]) - if barriers[row, col] == 255: - barrier_crossings += 1 - - # Calculate distance - distance_m = 0 - for i in range(1, len(coords)): - lon1, lat1 = coords[i-1] - lon2, lat2 = coords[i] - distance_m += haversine_distance(lat1, lon1, lat2, lon2) - - # Elevation stats - elev_arr = np.array(elevations) - elev_diff = np.diff(elev_arr) - elev_gain = float(np.sum(elev_diff[elev_diff > 0])) - elev_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) - - # Trail stats - trail_arr = np.array(trail_values) - on_trail_cells = np.sum(trail_arr > 0) - total_cells = len(trail_arr) - on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - - # Free memory - del mcp, cumulative_costs, traceback, cost, trails, barriers, elevation - gc.collect() - - return { - "status": "ok", - "coords": coords, - "stats": { - "distance_km": distance_m / 1000, - "effort_minutes": best_cost / 60, - "elevation_gain_m": elev_gain, - "elevation_loss_m": elev_loss, - "on_trail_pct": on_trail_pct, - "barrier_crossings": barrier_crossings, - "cell_count": total_cells, - }, - "entry_point": best_entry["entry_point"] - } - - def _valhalla_route( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str - ) -> Dict: - """ - Call Valhalla for network routing. - - Returns: - {"segment": {...}, "error": None} on success - {"segment": None, "error": "..."} on failure - """ - costing = MODE_TO_COSTING.get(mode, "pedestrian") - - valhalla_request = { - "locations": [ - {"lat": start_lat, "lon": start_lon}, - {"lat": end_lat, "lon": end_lon} - ], - "costing": costing, - "directions_options": {"units": "kilometers"} - } - - try: - resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) - - if resp.status_code == 200: - valhalla_data = resp.json() - trip = valhalla_data.get("trip", {}) - legs = trip.get("legs", []) - - if legs: - leg = legs[0] - shape = leg.get("shape", "") - coords = self._decode_polyline(shape) - - maneuvers = [] - for m in leg.get("maneuvers", []): - maneuvers.append({ - "instruction": m.get("instruction", ""), - "type": m.get("type", 0), - "distance_km": m.get("length", 0), - "time_seconds": m.get("time", 0), - "street_names": m.get("street_names", []), - }) - - summary = trip.get("summary", {}) - return { - "segment": { - "coordinates": coords, - "distance_km": summary.get("length", 0), - "duration_minutes": summary.get("time", 0) / 60, - "maneuvers": maneuvers, - }, - "error": None - } - - return {"segment": None, "error": f"Valhalla returned {resp.status_code}: {resp.text[:200]}"} - - except Exception as e: - return {"segment": None, "error": f"Valhalla request failed: {e}"} - - def _build_response( - self, - wilderness_start: Optional[List], - wilderness_start_stats: Optional[Dict], - network_segment: Optional[Dict], - wilderness_end: Optional[List], - wilderness_end_stats: Optional[Dict], - mode: str, - boundary_mode: str, - entry_start: Optional[Dict], - entry_end: Optional[Dict], - scenario: str, - t0: float, - valhalla_error: Optional[str] - ) -> Dict: - """Build the final GeoJSON response.""" - features = [] - - # Wilderness start segment - if wilderness_start and wilderness_start_stats: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "wilderness", - "segment_position": "start", - "effort_minutes": float(wilderness_start_stats["effort_minutes"]), - "distance_km": float(wilderness_start_stats["distance_km"]), - "elevation_gain_m": wilderness_start_stats["elevation_gain_m"], - "elevation_loss_m": wilderness_start_stats["elevation_loss_m"], - "boundary_mode": boundary_mode, - "on_trail_pct": wilderness_start_stats["on_trail_pct"], - "barrier_crossings": wilderness_start_stats["barrier_crossings"], - "wilderness_mode": "foot", - }, - "geometry": {"type": "LineString", "coordinates": wilderness_start} - }) - - # Network segment - if network_segment: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "network", - "distance_km": network_segment["distance_km"], - "duration_minutes": network_segment["duration_minutes"], - "maneuvers": network_segment["maneuvers"], - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} - }) - - # Wilderness end segment - if wilderness_end and wilderness_end_stats: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "wilderness", - "segment_position": "end", - "effort_minutes": float(wilderness_end_stats["effort_minutes"]), - "distance_km": float(wilderness_end_stats["distance_km"]), - "elevation_gain_m": wilderness_end_stats["elevation_gain_m"], - "elevation_loss_m": wilderness_end_stats["elevation_loss_m"], - "boundary_mode": boundary_mode, - "on_trail_pct": wilderness_end_stats["on_trail_pct"], - "barrier_crossings": wilderness_end_stats["barrier_crossings"], - "wilderness_mode": "foot", - }, - "geometry": {"type": "LineString", "coordinates": wilderness_end} - }) - - # Combined path - combined_coords = [] - if wilderness_start: - combined_coords.extend(wilderness_start) - if network_segment: - # Skip first coord if we already have wilderness_start (avoid duplicate) - start_idx = 1 if wilderness_start else 0 - combined_coords.extend(network_segment["coordinates"][start_idx:]) - if wilderness_end: - # Skip first coord (avoid duplicate with network end) - start_idx = 1 if (wilderness_start or network_segment) else 0 - combined_coords.extend(wilderness_end[start_idx:]) - - if combined_coords: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "combined", - "wilderness_mode": "foot", - "network_mode": mode, - "boundary_mode": boundary_mode, - "scenario": scenario, - }, - "geometry": {"type": "LineString", "coordinates": combined_coords} - }) - - geojson = {"type": "FeatureCollection", "features": features} - - # Calculate totals - total_distance_km = 0.0 - total_effort_minutes = 0.0 - wilderness_distance_km = 0.0 - wilderness_effort_minutes = 0.0 - network_distance_km = 0.0 - network_duration_minutes = 0.0 - barrier_crossings = 0 - on_trail_pct = 0.0 - - if wilderness_start_stats: - wilderness_distance_km += wilderness_start_stats["distance_km"] - wilderness_effort_minutes += wilderness_start_stats["effort_minutes"] - barrier_crossings += wilderness_start_stats["barrier_crossings"] - on_trail_pct = wilderness_start_stats["on_trail_pct"] - - if wilderness_end_stats: - wilderness_distance_km += wilderness_end_stats["distance_km"] - wilderness_effort_minutes += wilderness_end_stats["effort_minutes"] - barrier_crossings += wilderness_end_stats["barrier_crossings"] - # Average on-trail percentage if we have both - if wilderness_start_stats: - on_trail_pct = (on_trail_pct + wilderness_end_stats["on_trail_pct"]) / 2 - else: - on_trail_pct = wilderness_end_stats["on_trail_pct"] - - if network_segment: - network_distance_km = network_segment["distance_km"] - network_duration_minutes = network_segment["duration_minutes"] - - total_distance_km = wilderness_distance_km + network_distance_km - total_effort_minutes = wilderness_effort_minutes + network_duration_minutes - - summary = { - "total_distance_km": float(total_distance_km), - "total_effort_minutes": float(total_effort_minutes), - "wilderness_distance_km": float(wilderness_distance_km), - "wilderness_effort_minutes": float(wilderness_effort_minutes), - "network_distance_km": float(network_distance_km), - "network_duration_minutes": float(network_duration_minutes), - "on_trail_pct": float(on_trail_pct), - "barrier_crossings": barrier_crossings, - "boundary_mode": boundary_mode, - "wilderness_mode": "foot", - "network_mode": mode, - "scenario": scenario, - "computation_time_s": time.time() - t0, - } - - if entry_start: - summary["entry_point_start"] = { - "lat": entry_start["lat"], - "lon": entry_start["lon"], - "highway_class": entry_start["highway_class"], - "name": entry_start.get("name", ""), - } - - if entry_end: - summary["entry_point_end"] = { - "lat": entry_end["lat"], - "lon": entry_end["lon"], - "highway_class": entry_end["highway_class"], - "name": entry_end.get("name", ""), - } - - result = {"status": "ok", "route": geojson, "summary": summary} - - if valhalla_error: - result["warning"] = f"Network segment incomplete: {valhalla_error}" - - return result - - def _decode_polyline(self, encoded: str, precision: int = 6) -> List[List[float]]: - """Decode a polyline string into coordinates [lon, lat].""" - coords = [] - index = 0 - lat = 0 - lon = 0 - - while index < len(encoded): - shift = 0 - result = 0 - while True: - b = ord(encoded[index]) - 63 - index += 1 - result |= (b & 0x1f) << shift - shift += 5 - if b < 0x20: - break - dlat = ~(result >> 1) if result & 1 else result >> 1 - lat += dlat - - shift = 0 - result = 0 - while True: - b = ord(encoded[index]) - 63 - index += 1 - result |= (b & 0x1f) << shift - shift += 5 - if b < 0x20: - break - dlon = ~(result >> 1) if result & 1 else result >> 1 - lon += dlon - - coords.append([lon / (10 ** precision), lat / (10 ** precision)]) - - return coords - - def close(self): - """Close all readers.""" - if self.dem_reader: - self.dem_reader.close() - if self.friction_reader: - self.friction_reader.close() - if self.barrier_reader: - self.barrier_reader.close() - if self.wilderness_reader: - self.wilderness_reader.close() - if self.trail_reader: - self.trail_reader.close() - self.entry_index.close() - - -def build_entry_index(): - """Build the trail entry point index.""" - index = EntryPointIndex() - stats = index.build_index() - index.close() - return stats - - -if __name__ == "__main__": - import sys - - if len(sys.argv) > 1 and sys.argv[1] == "build": - print("Building trail entry point index...") - stats = build_entry_index() - print(f"\nDone. Total entry points: {stats['total']}") - - elif len(sys.argv) > 1 and sys.argv[1] == "test": - print("Testing router (all scenarios)...") - print("=" * 60) - - router = OffrouteRouter() - - # Test points - wilderness_start = (44.0543, -115.4237) # Off-network - wilderness_end = (45.2, -115.5) # Deep wilderness (Frank Church) - road_start = (43.6150, -116.2023) # Boise downtown (on-network) - road_end = (43.5867, -116.5625) # Nampa (on-network) - - tests = [ - ("A: wilderness→road", wilderness_start, (44.0814, -115.5021)), - ("B: wilderness→wilderness", wilderness_start, wilderness_end), - ("C: road→wilderness", road_start, wilderness_start), - ("D: road→road", road_start, road_end), - ] - - for label, (slat, slon), (elat, elon) in tests: - print(f"\n{label}") - print("-" * 40) - - result = router.route( - start_lat=slat, start_lon=slon, - end_lat=elat, end_lon=elon, - mode="foot", boundary_mode="pragmatic" - ) - - if result["status"] == "ok": - s = result["summary"] - print(f" Scenario: {s.get('scenario', '?')}") - print(f" Total: {s['total_distance_km']:.2f} km, {s['total_effort_minutes']:.1f} min") - print(f" Wilderness: {s['wilderness_distance_km']:.2f} km") - print(f" Network: {s['network_distance_km']:.2f} km") - if s.get('entry_point_start'): - ep = s['entry_point_start'] - print(f" Entry (start): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") - if s.get('entry_point_end'): - ep = s['entry_point_end'] - print(f" Entry (end): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") - else: - print(f" ERROR: {result['message']}") - - router.close() - - else: - print("Usage:") - print(" python router.py build # Build entry point index") - print(" python router.py test # Test all scenarios") + "service"}, + "foot": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track", "path", "footway", "bridleway"}, + "mtb": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track"}, + "atv": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service", "track"}, + "vehicle": {"primary", "secondary", "tertiary", "unclassified", "residential", + "service"}, +} + + +def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Calculate distance between two points in meters.""" + R = 6371000 + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2 + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) + return R * c + + +def check_memory_usage() -> float: + """Check current memory usage in GB.""" + try: + import psutil + process = psutil.Process() + return process.memory_info().rss / (1024**3) + except ImportError: + return 0 + + +class EntryPointIndex: + """ + PostGIS-backed spatial index of road/trail entry points. + Uses ST_DWithin for fast radius queries with meter-accurate distances. + Densifies highway LineStrings at 100m intervals for better coverage. + """ + + def __init__(self, dsn: str = None): + self.dsn = dsn or POSTGIS_DSN + self._conn: Optional[psycopg2.extensions.connection] = None + + def _get_conn(self) -> psycopg2.extensions.connection: + if self._conn is None or self._conn.closed: + self._conn = psycopg2.connect(self.dsn) + return self._conn + + def table_exists(self) -> bool: + """Check if entry_points table exists.""" + conn = self._get_conn() + with conn.cursor() as cur: + cur.execute(""" + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'entry_points' + ) + """) + return cur.fetchone()[0] + + def get_entry_point_count(self) -> int: + """Return the number of entry points in the index.""" + if not self.table_exists(): + return 0 + conn = self._get_conn() + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM entry_points") + return cur.fetchone()[0] + + def query_bbox( + self, + south: float, + north: float, + west: float, + east: float, + valid_highways: Optional[Set[str]] = None + ) -> List[Dict]: + """Find entry points within a bounding box.""" + if not self.table_exists(): + return [] + + conn = self._get_conn() + + highway_filter = "" + params = [west, south, east, north] + if valid_highways: + placeholders = ','.join(['%s'] * len(valid_highways)) + highway_filter = f"AND highway_class IN ({placeholders})" + params.extend(list(valid_highways)) + + query = f""" + SELECT + id, + ST_Y(geom) as lat, + ST_X(geom) as lon, + highway_class, + name, + land_status + FROM entry_points + WHERE geom && ST_MakeEnvelope(%s, %s, %s, %s, 4326) + {highway_filter} + """ + + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(query, params) + return [dict(row) for row in cur.fetchall()] + + def query_radius( + self, + lat: float, + lon: float, + radius_km: float, + valid_highways: Optional[Set[str]] = None, + limit: int = 50 + ) -> List[Dict]: + """ + Find entry points within radius_km of (lat, lon). + Uses PostGIS ST_DWithin with geography cast for meter-accurate distance. + """ + if not self.table_exists(): + return [] + + conn = self._get_conn() + radius_m = radius_km * 1000 + + # Build query with optional highway filter + highway_filter = "" + params = [lon, lat, lon, lat, radius_m] + if valid_highways: + placeholders = ','.join(['%s'] * len(valid_highways)) + highway_filter = f"AND highway_class IN ({placeholders})" + params.extend(list(valid_highways)) + params.append(limit) + + query = f""" + SELECT + id, + ST_Y(geom) as lat, + ST_X(geom) as lon, + highway_class, + name, + land_status, + ST_Distance( + geom::geography, + ST_SetSRID(ST_Point(%s, %s), 4326)::geography + ) as distance_m + FROM entry_points + WHERE ST_DWithin( + geom::geography, + ST_SetSRID(ST_Point(%s, %s), 4326)::geography, + %s + ) + {highway_filter} + ORDER BY distance_m + LIMIT %s + """ + + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(query, params) + return [dict(row) for row in cur.fetchall()] + + def build_index(self, osm_pbf_path: Path = None) -> Dict: + """ + Build the entry point index from OSM PBF. + Densifies LineStrings to sample points every 100m. + Tags points with land_status from PAD-US. + """ + if osm_pbf_path is None: + osm_pbf_path = OSM_PBF_PATH + + if not osm_pbf_path.exists(): + raise FileNotFoundError(f"OSM PBF not found: {osm_pbf_path}") + + print(f"Building entry point index from {osm_pbf_path}...") + start_time = time.time() + + highway_types = [ + "primary", "secondary", "tertiary", "unclassified", + "residential", "service", "track", "path", "footway", "bridleway" + ] + + stats = {"total": 0, "by_class": {}, "lines_processed": 0} + + with tempfile.TemporaryDirectory() as tmpdir: + geojson_path = Path(tmpdir) / "highways.geojson" + + # Extract highways with osmium + print(" Extracting highways with osmium...") + cmd = ["osmium", "tags-filter", str(osm_pbf_path)] + for ht in highway_types: + cmd.append(f"w/highway={ht}") + cmd.extend(["-o", str(Path(tmpdir) / "filtered.osm.pbf"), "--overwrite"]) + subprocess.run(cmd, check=True, capture_output=True) + + # Convert to GeoJSON + print(" Converting to GeoJSON with ogr2ogr...") + cmd = [ + "ogr2ogr", "-f", "GeoJSON", + str(geojson_path), + str(Path(tmpdir) / "filtered.osm.pbf"), + "lines", "-t_srs", "EPSG:4326" + ] + subprocess.run(cmd, check=True, capture_output=True) + + # Load GeoJSON + print(" Loading GeoJSON...") + with open(geojson_path) as f: + data = json.load(f) + + # Process features and densify + print(f" Densifying LineStrings at {DENSIFY_INTERVAL_M}m intervals...") + points_to_insert = [] + seen_keys = set() + + features = data.get("features", []) + total_features = len(features) + + for idx, feature in enumerate(features): + if idx > 0 and idx % 100000 == 0: + print(f" Processed {idx}/{total_features} features...") + + props = feature.get("properties", {}) + geom = feature.get("geometry", {}) + + if geom.get("type") != "LineString": + continue + + coords = geom.get("coordinates", []) + if len(coords) < 2: + continue + + highway_class = props.get("highway", "unknown") + name = props.get("name", "") + stats["lines_processed"] += 1 + + # Densify this LineString + densified = self._densify_line(coords, DENSIFY_INTERVAL_M) + + for lon, lat in densified: + # Deduplicate by rounding to 5 decimal places (~1m precision) + key = (round(lat, 5), round(lon, 5)) + if key in seen_keys: + continue + seen_keys.add(key) + + points_to_insert.append((lon, lat, highway_class, name)) + + # Insert into PostGIS + print(f" Inserting {len(points_to_insert)} entry points into PostGIS...") + conn = self._get_conn() + + with conn.cursor() as cur: + # Truncate existing data + cur.execute("TRUNCATE entry_points RESTART IDENTITY") + + # Batch insert with execute_values for speed + batch_size = 50000 + for i in range(0, len(points_to_insert), batch_size): + batch = points_to_insert[i:i+batch_size] + psycopg2.extras.execute_values( + cur, + """ + INSERT INTO entry_points (geom, highway_class, name) + VALUES %s + """, + batch, + template="(ST_SetSRID(ST_Point(%s, %s), 4326), %s, %s)", + page_size=10000 + ) + if i > 0 and i % 500000 == 0: + print(f" Inserted {i}/{len(points_to_insert)} points...") + + conn.commit() + + # Tag land_status from PAD-US + print(" Tagging land_status from PAD-US subdivided polygons...") + with conn.cursor() as cur: + cur.execute(""" + UPDATE entry_points e + SET land_status = 'public' + FROM padus_sub p + WHERE ST_Intersects(e.geom, p.geom) + """) + public_count = cur.rowcount + print(f" Tagged {public_count} points as public land") + + conn.commit() + + # Gather stats + elapsed = time.time() - start_time + stats["total"] = len(points_to_insert) + stats["build_time_sec"] = round(elapsed, 1) + + for lon, lat, hc, name in points_to_insert: + stats["by_class"][hc] = stats["by_class"].get(hc, 0) + 1 + + print(f" Done in {elapsed:.1f}s. Total: {stats['total']} entry points from {stats['lines_processed']} lines") + for hc, count in sorted(stats["by_class"].items(), key=lambda x: -x[1]): + print(f" {hc}: {count}") + + return stats + + def _densify_line(self, coords: List[List[float]], interval_m: float) -> List[tuple]: + """ + Sample points along a LineString at regular intervals. + coords: [[lon, lat], ...] in GeoJSON order + Returns: [(lon, lat), ...] sampled points including first and last + """ + if len(coords) < 2: + return [(coords[0][0], coords[0][1])] if coords else [] + + # Calculate line length in meters using haversine on segments + total_m = 0 + for i in range(len(coords) - 1): + lon1, lat1 = coords[i] + lon2, lat2 = coords[i + 1] + total_m += haversine_distance(lat1, lon1, lat2, lon2) + + if total_m == 0: + return [(coords[0][0], coords[0][1])] + + # Create Shapely LineString + line = LineString(coords) + + # Calculate number of points needed + n_points = max(2, int(total_m / interval_m) + 1) + + # Sample using normalized interpolation + result = [] + for i in range(n_points): + fraction = min(i / (n_points - 1), 1.0) if n_points > 1 else 0 + point = line.interpolate(fraction, normalized=True) + result.append((point.x, point.y)) # (lon, lat) + + # Always ensure first and last original coordinates are included + first_coord = (coords[0][0], coords[0][1]) + last_coord = (coords[-1][0], coords[-1][1]) + + if result[0] != first_coord: + result[0] = first_coord + if result[-1] != last_coord: + result[-1] = last_coord + + return result + + def _highway_priority(self, highway_class: str) -> int: + """Lower number = better priority for entry points.""" + priority = { + "primary": 1, "secondary": 2, "tertiary": 3, + "unclassified": 4, "residential": 5, "service": 6, + "track": 7, "path": 8, "footway": 9, "bridleway": 10 + } + return priority.get(highway_class, 99) + + def close(self): + if self._conn and not self._conn.closed: + self._conn.close() + self._conn = None + + +class OffrouteRouter: + """ + OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. + + Supports four scenarios: + A: off-network start → on-network end + B: off-network start → off-network end + C: on-network start → off-network end + D: on-network start → on-network end (pure Valhalla) + + IMPORTANT: Wilderness segment ALWAYS uses foot mode for pathfinding. + User's mode affects entry point selection and Valhalla costing only. + """ + + def __init__(self): + self.dem_reader = None + self.friction_reader = None + self.barrier_reader = None + self.wilderness_reader = None + self.trail_reader = None + self.entry_index = EntryPointIndex() + + def _init_readers(self): + """Lazy init readers.""" + if self.dem_reader is None: + self.dem_reader = DEMReader() + if self.friction_reader is None: + self.friction_reader = FrictionReader() + if self.barrier_reader is None: + self.barrier_reader = BarrierReader() + if self.wilderness_reader is None and DEFAULT_WILDERNESS_PATH.exists(): + self.wilderness_reader = WildernessReader() + if self.trail_reader is None: + self.trail_reader = TrailReader() + + def _locate_on_network(self, lat: float, lon: float, mode: str) -> Dict: + """ + Check if a point is on the routable network using Valhalla's /locate. + + Returns: + { + "on_network": bool, + "snap_distance_m": float, + "snapped_lat": float, + "snapped_lon": float + } + """ + costing = MODE_TO_COSTING.get(mode, "pedestrian") + try: + resp = requests.post( + f"{VALHALLA_URL}/locate", + json={"locations": [{"lat": lat, "lon": lon}], "costing": costing}, + timeout=10 + ) + + if resp.status_code == 200: + data = resp.json() + if data and len(data) > 0 and data[0].get("edges"): + edge = data[0]["edges"][0] + snap_lat = edge.get("correlated_lat", lat) + snap_lon = edge.get("correlated_lon", lon) + snap_dist = haversine_distance(lat, lon, snap_lat, snap_lon) + return { + "on_network": snap_dist <= OFF_NETWORK_THRESHOLD_M, + "snap_distance_m": snap_dist, + "snapped_lat": snap_lat, + "snapped_lon": snap_lon + } + except Exception: + pass + + return { + "on_network": False, + "snap_distance_m": float('inf'), + "snapped_lat": lat, + "snapped_lon": lon + } + + def route( + self, + start_lat: float, + start_lon: float, + end_lat: float, + end_lon: float, + mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot", + boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" + ) -> Dict: + """ + Route between two points, handling all four scenarios. + + Scenarios: + A: off-network start → on-network end (wilderness then network) + B: off-network start → off-network end (wilderness, network, wilderness) + C: on-network start → off-network end (network then wilderness) + D: on-network start → on-network end (pure network) + + Args: + start_lat, start_lon: Starting coordinates + end_lat, end_lon: Destination coordinates + mode: Travel mode (foot, mtb, atv, vehicle) + boundary_mode: How to handle private land (strict, pragmatic, emergency) + + Returns a GeoJSON FeatureCollection with route segments. + """ + if mode not in MODE_TO_COSTING: + return {"status": "error", "message": f"Unknown mode: {mode}"} + + # Detect network status for both endpoints + start_status = self._locate_on_network(start_lat, start_lon, mode) + end_status = self._locate_on_network(end_lat, end_lon, mode) + + start_off_network = not start_status["on_network"] + end_off_network = not end_status["on_network"] + + # Dispatch to appropriate handler + if not start_off_network and not end_off_network: + # Scenario D: on-network → on-network (pure Valhalla) + return self._route_D_network_only( + start_lat, start_lon, end_lat, end_lon, mode + ) + elif not start_off_network and end_off_network: + # Scenario C: on-network → off-network + return self._route_C_network_to_wilderness( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + elif start_off_network and not end_off_network: + # Scenario A: off-network → on-network + return self._route_A_wilderness_to_network( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + else: + # Scenario B: off-network → off-network + return self._route_B_wilderness_both( + start_lat, start_lon, end_lat, end_lon, mode, boundary_mode + ) + + def _route_D_network_only( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str + ) -> Dict: + """ + Scenario D: Both endpoints on-network. Pure Valhalla routing. + """ + t0 = time.time() + costing = MODE_TO_COSTING.get(mode, "pedestrian") + + valhalla_request = { + "locations": [ + {"lat": start_lat, "lon": start_lon}, + {"lat": end_lat, "lon": end_lon} + ], + "costing": costing, + "directions_options": {"units": "kilometers"} + } + + try: + resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) + + if resp.status_code != 200: + return { + "status": "error", + "message": f"Network routing failed: {resp.text[:200]}" + } + + valhalla_data = resp.json() + trip = valhalla_data.get("trip", {}) + legs = trip.get("legs", []) + + if not legs: + return {"status": "error", "message": "No route found"} + + leg = legs[0] + shape = leg.get("shape", "") + network_coords = self._decode_polyline(shape) + + maneuvers = [] + for m in leg.get("maneuvers", []): + maneuvers.append({ + "instruction": m.get("instruction", ""), + "type": m.get("type", 0), + "distance_km": m.get("length", 0), + "time_seconds": m.get("time", 0), + "street_names": m.get("street_names", []), + }) + + summary = trip.get("summary", {}) + distance_km = summary.get("length", 0) + duration_min = summary.get("time", 0) / 60 + + # Build response in same format as wilderness routes + network_feature = { + "type": "Feature", + "properties": { + "segment_type": "network", + "distance_km": distance_km, + "duration_minutes": duration_min, + "maneuvers": maneuvers, + "network_mode": mode, + }, + "geometry": {"type": "LineString", "coordinates": network_coords} + } + + combined_feature = { + "type": "Feature", + "properties": { + "segment_type": "combined", + "network_mode": mode, + }, + "geometry": {"type": "LineString", "coordinates": network_coords} + } + + geojson = {"type": "FeatureCollection", "features": [network_feature, combined_feature]} + + result = { + "status": "ok", + "route": geojson, + "summary": { + "total_distance_km": float(distance_km), + "total_effort_minutes": float(duration_min), + "wilderness_distance_km": 0.0, + "wilderness_effort_minutes": 0.0, + "network_distance_km": float(distance_km), + "network_duration_minutes": float(duration_min), + "on_trail_pct": 100.0, + "barrier_crossings": 0, + "network_mode": mode, + "scenario": "D", + "computation_time_s": time.time() - t0, + } + } + return result + + except Exception as e: + return {"status": "error", "message": f"Network routing failed: {e}"} + + def _route_A_wilderness_to_network( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario A: Off-network start → on-network end. + Wilderness pathfinding from start to entry point, then Valhalla to end. + """ + t0 = time.time() + + # Ensure entry point index exists + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + # Get valid highway classes for this mode + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + + # Find entry points near start, filtered by mode + MAX_ENTRY_POINTS = 10 + entry_points = self.entry_index.query_radius( + start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + + if not entry_points: + entry_points = self.entry_index.query_radius( + start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points: + if mode == "vehicle": + msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try a different mode." + elif mode in ("mtb", "atv"): + msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try foot mode." + else: + msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start." + return {"status": "error", "message": msg} + + entry_points = entry_points[:MAX_ENTRY_POINTS] + + # Run wilderness pathfinding + wilderness_result = self._pathfind_wilderness( + start_lat, start_lon, end_lat, end_lon, + entry_points, boundary_mode, "start" + ) + + if wilderness_result.get("status") == "error": + return wilderness_result + + # Extract results + wilderness_coords = wilderness_result["coords"] + wilderness_stats = wilderness_result["stats"] + best_entry = wilderness_result["entry_point"] + + entry_lat = best_entry["lat"] + entry_lon = best_entry["lon"] + + # Call Valhalla from entry point to destination + network_result = self._valhalla_route(entry_lat, entry_lon, end_lat, end_lon, mode) + + # Build response + return self._build_response( + wilderness_start=wilderness_coords, + wilderness_start_stats=wilderness_stats, + network_segment=network_result.get("segment"), + wilderness_end=None, + wilderness_end_stats=None, + mode=mode, + boundary_mode=boundary_mode, + entry_start=best_entry, + entry_end=None, + scenario="A", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _route_C_network_to_wilderness( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario C: On-network start → off-network end. + Valhalla from start to entry point, then wilderness pathfinding to end. + """ + t0 = time.time() + + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + + # Find entry points near END (destination) + MAX_ENTRY_POINTS = 10 + entry_points = self.entry_index.query_radius( + end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + + if not entry_points: + entry_points = self.entry_index.query_radius( + end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points: + if mode == "vehicle": + msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try a different mode." + elif mode in ("mtb", "atv"): + msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try foot mode." + else: + msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of destination." + return {"status": "error", "message": msg} + + entry_points = entry_points[:MAX_ENTRY_POINTS] + + # Run wilderness pathfinding FROM END toward entry points + wilderness_result = self._pathfind_wilderness( + end_lat, end_lon, start_lat, start_lon, + entry_points, boundary_mode, "end" + ) + + if wilderness_result.get("status") == "error": + return wilderness_result + + # The path is from end→entry, reverse it for display (entry→end) + wilderness_coords = list(reversed(wilderness_result["coords"])) + wilderness_stats = wilderness_result["stats"] + best_entry = wilderness_result["entry_point"] + + entry_lat = best_entry["lat"] + entry_lon = best_entry["lon"] + + # Call Valhalla from start to entry point + network_result = self._valhalla_route(start_lat, start_lon, entry_lat, entry_lon, mode) + + # Build response (network first, then wilderness) + return self._build_response( + wilderness_start=None, + wilderness_start_stats=None, + network_segment=network_result.get("segment"), + wilderness_end=wilderness_coords, + wilderness_end_stats=wilderness_stats, + mode=mode, + boundary_mode=boundary_mode, + entry_start=None, + entry_end=best_entry, + scenario="C", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _route_B_wilderness_both( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str, boundary_mode: str + ) -> Dict: + """ + Scenario B: Off-network start → off-network end. + Wilderness from start to entry_A, Valhalla entry_A to entry_B, wilderness from entry_B to end. + """ + t0 = time.time() + + if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: + return { + "status": "error", + "message": "Trail entry point index not built. Run build_entry_index() first." + } + + valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) + MAX_ENTRY_POINTS = 10 + + # Find entry points near START + entry_points_start = self.entry_index.query_radius( + start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_start: + entry_points_start = self.entry_index.query_radius( + start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_start: + return {"status": "error", "message": f"No entry points found near start within {EXPANDED_SEARCH_RADIUS_KM}km."} + entry_points_start = entry_points_start[:MAX_ENTRY_POINTS] + + # Find entry points near END + entry_points_end = self.entry_index.query_radius( + end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_end: + entry_points_end = self.entry_index.query_radius( + end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways + ) + if not entry_points_end: + return {"status": "error", "message": f"No entry points found near destination within {EXPANDED_SEARCH_RADIUS_KM}km."} + entry_points_end = entry_points_end[:MAX_ENTRY_POINTS] + + # Phase 1: Wilderness pathfinding from START + wilderness_start_result = self._pathfind_wilderness( + start_lat, start_lon, end_lat, end_lon, + entry_points_start, boundary_mode, "start" + ) + + if wilderness_start_result.get("status") == "error": + return wilderness_start_result + + wilderness_start_coords = wilderness_start_result["coords"] + wilderness_start_stats = wilderness_start_result["stats"] + entry_A = wilderness_start_result["entry_point"] + + # Phase 2: Wilderness pathfinding from END (run after freeing phase 1 memory) + wilderness_end_result = self._pathfind_wilderness( + end_lat, end_lon, start_lat, start_lon, + entry_points_end, boundary_mode, "end" + ) + + if wilderness_end_result.get("status") == "error": + return wilderness_end_result + + # Reverse the end wilderness path (it's end→entry, we want entry→end for display) + wilderness_end_coords = list(reversed(wilderness_end_result["coords"])) + wilderness_end_stats = wilderness_end_result["stats"] + entry_B = wilderness_end_result["entry_point"] + + # Phase 3: Valhalla from entry_A to entry_B + network_result = self._valhalla_route( + entry_A["lat"], entry_A["lon"], + entry_B["lat"], entry_B["lon"], + mode + ) + + # Build response + return self._build_response( + wilderness_start=wilderness_start_coords, + wilderness_start_stats=wilderness_start_stats, + network_segment=network_result.get("segment"), + wilderness_end=wilderness_end_coords, + wilderness_end_stats=wilderness_end_stats, + mode=mode, + boundary_mode=boundary_mode, + entry_start=entry_A, + entry_end=entry_B, + scenario="B", + t0=t0, + valhalla_error=network_result.get("error") + ) + + def _pathfind_wilderness( + self, + origin_lat: float, origin_lon: float, + dest_lat: float, dest_lon: float, + entry_points: List[Dict], + boundary_mode: str, + label: str + ) -> Dict: + """ + Run MCP wilderness pathfinding from origin toward entry points. + + Args: + origin_lat, origin_lon: Starting point for pathfinding + dest_lat, dest_lon: Ultimate destination (for bbox calculation) + entry_points: List of candidate entry points + boundary_mode: How to handle barriers + label: "start" or "end" for error messages + + Returns: + {"status": "ok", "coords": [...], "stats": {...}, "entry_point": {...}} + or {"status": "error", "message": "..."} + """ + # Build bbox - only include origin and entry points, NOT distant destination + # The destination is handled by Valhalla, wilderness only needs to reach entry points + MAX_BBOX_DEGREES = 2.0 + all_lats = [origin_lat] + [p["lat"] for p in entry_points] + all_lons = [origin_lon] + [p["lon"] for p in entry_points] + + padding = 0.05 + bbox = { + "south": min(all_lats) - padding, + "north": max(all_lats) + padding, + "west": min(all_lons) - padding, + "east": max(all_lons) + padding, + } + + # Clamp bbox size, centering on origin + lat_span = bbox["north"] - bbox["south"] + lon_span = bbox["east"] - bbox["west"] + if lat_span > MAX_BBOX_DEGREES or lon_span > MAX_BBOX_DEGREES: + half_span = MAX_BBOX_DEGREES / 2 + bbox = { + "south": origin_lat - half_span, + "north": origin_lat + half_span, + "west": origin_lon - half_span, + "east": origin_lon + half_span, + } + + # Initialize readers + self._init_readers() + + # Load elevation + try: + elevation, meta = self.dem_reader.get_elevation_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + ) + except Exception as e: + return {"status": "error", "message": f"Failed to load elevation for {label}: {e}"} + + # Check memory + mem = check_memory_usage() + if mem > MEMORY_LIMIT_GB: + return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} + + # Load friction + friction_raw = self.friction_reader.get_friction_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + target_shape=elevation.shape + ) + friction_mult = friction_to_multiplier(friction_raw) + + # Load barriers + barriers = self.barrier_reader.get_barrier_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + target_shape=elevation.shape + ) + + # Load trails + trails = self.trail_reader.get_trails_grid( + south=bbox["south"], north=bbox["north"], + west=bbox["west"], east=bbox["east"], + target_shape=elevation.shape + ) + + # Compute cost grid (ALWAYS foot mode for wilderness) + cost = compute_cost_grid( + elevation, + cell_size_m=meta["cell_size_m"], + friction=friction_mult, + friction_raw=friction_raw, + trails=trails, + barriers=barriers, + wilderness=None, + mvum=None, + boundary_mode=boundary_mode, + mode="foot", + ) + + # Free intermediate arrays + del friction_mult, friction_raw + gc.collect() + + # Convert origin to pixel coordinates + origin_row, origin_col = self.dem_reader.latlon_to_pixel(origin_lat, origin_lon, meta) + + rows, cols = elevation.shape + if not (0 <= origin_row < rows and 0 <= origin_col < cols): + return {"status": "error", "message": f"{label.capitalize()} point outside grid bounds"} + + # Map entry points to pixels + entry_pixels = [] + for ep in entry_points: + row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) + if 0 <= row < rows and 0 <= col < cols: + entry_pixels.append({"row": row, "col": col, "entry_point": ep}) + + if not entry_pixels: + return {"status": "error", "message": f"No entry points map to grid bounds for {label}"} + + # Run MCP + mcp = MCP_Geometric(cost, fully_connected=True) + cumulative_costs, traceback = mcp.find_costs([(origin_row, origin_col)]) + + # Find nearest reachable entry point + best_entry = None + best_cost = np.inf + + for ep in entry_pixels: + ep_cost = cumulative_costs[ep["row"], ep["col"]] + if ep_cost < best_cost: + best_cost = ep_cost + best_entry = ep + + if best_entry is None or np.isinf(best_cost): + return { + "status": "error", + "message": f"No path found from {label} to any entry point (blocked by impassable terrain)" + } + + # Traceback path + path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) + + # Convert to coordinates and collect stats + coords = [] + elevations = [] + trail_values = [] + barrier_crossings = 0 + + for row, col in path_indices: + lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) + coords.append([lon, lat]) + elevations.append(elevation[row, col]) + trail_values.append(trails[row, col]) + if barriers[row, col] == 255: + barrier_crossings += 1 + + # Calculate distance + distance_m = 0 + for i in range(1, len(coords)): + lon1, lat1 = coords[i-1] + lon2, lat2 = coords[i] + distance_m += haversine_distance(lat1, lon1, lat2, lon2) + + # Elevation stats + elev_arr = np.array(elevations) + elev_diff = np.diff(elev_arr) + elev_gain = float(np.sum(elev_diff[elev_diff > 0])) + elev_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) + + # Trail stats + trail_arr = np.array(trail_values) + on_trail_cells = np.sum(trail_arr > 0) + total_cells = len(trail_arr) + on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 + + # Free memory + del mcp, cumulative_costs, traceback, cost, trails, barriers, elevation + gc.collect() + + return { + "status": "ok", + "coords": coords, + "stats": { + "distance_km": distance_m / 1000, + "effort_minutes": best_cost / 60, + "elevation_gain_m": elev_gain, + "elevation_loss_m": elev_loss, + "on_trail_pct": on_trail_pct, + "barrier_crossings": barrier_crossings, + "cell_count": total_cells, + }, + "entry_point": best_entry["entry_point"] + } + + def _valhalla_route( + self, + start_lat: float, start_lon: float, + end_lat: float, end_lon: float, + mode: str + ) -> Dict: + """ + Call Valhalla for network routing. + + Returns: + {"segment": {...}, "error": None} on success + {"segment": None, "error": "..."} on failure + """ + costing = MODE_TO_COSTING.get(mode, "pedestrian") + + valhalla_request = { + "locations": [ + {"lat": start_lat, "lon": start_lon}, + {"lat": end_lat, "lon": end_lon} + ], + "costing": costing, + "directions_options": {"units": "kilometers"} + } + + try: + resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) + + if resp.status_code == 200: + valhalla_data = resp.json() + trip = valhalla_data.get("trip", {}) + legs = trip.get("legs", []) + + if legs: + leg = legs[0] + shape = leg.get("shape", "") + coords = self._decode_polyline(shape) + + maneuvers = [] + for m in leg.get("maneuvers", []): + maneuvers.append({ + "instruction": m.get("instruction", ""), + "type": m.get("type", 0), + "distance_km": m.get("length", 0), + "time_seconds": m.get("time", 0), + "street_names": m.get("street_names", []), + }) + + summary = trip.get("summary", {}) + return { + "segment": { + "coordinates": coords, + "distance_km": summary.get("length", 0), + "duration_minutes": summary.get("time", 0) / 60, + "maneuvers": maneuvers, + }, + "error": None + } + + return {"segment": None, "error": f"Valhalla returned {resp.status_code}: {resp.text[:200]}"} + + except Exception as e: + return {"segment": None, "error": f"Valhalla request failed: {e}"} + + def _build_response( + self, + wilderness_start: Optional[List], + wilderness_start_stats: Optional[Dict], + network_segment: Optional[Dict], + wilderness_end: Optional[List], + wilderness_end_stats: Optional[Dict], + mode: str, + boundary_mode: str, + entry_start: Optional[Dict], + entry_end: Optional[Dict], + scenario: str, + t0: float, + valhalla_error: Optional[str] + ) -> Dict: + """Build the final GeoJSON response.""" + features = [] + + # Wilderness start segment + if wilderness_start and wilderness_start_stats: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "wilderness", + "segment_position": "start", + "effort_minutes": float(wilderness_start_stats["effort_minutes"]), + "distance_km": float(wilderness_start_stats["distance_km"]), + "elevation_gain_m": wilderness_start_stats["elevation_gain_m"], + "elevation_loss_m": wilderness_start_stats["elevation_loss_m"], + "boundary_mode": boundary_mode, + "on_trail_pct": wilderness_start_stats["on_trail_pct"], + "barrier_crossings": wilderness_start_stats["barrier_crossings"], + "wilderness_mode": "foot", + }, + "geometry": {"type": "LineString", "coordinates": wilderness_start} + }) + + # Network segment + if network_segment: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "network", + "distance_km": network_segment["distance_km"], + "duration_minutes": network_segment["duration_minutes"], + "maneuvers": network_segment["maneuvers"], + "network_mode": mode, + }, + "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} + }) + + # Wilderness end segment + if wilderness_end and wilderness_end_stats: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "wilderness", + "segment_position": "end", + "effort_minutes": float(wilderness_end_stats["effort_minutes"]), + "distance_km": float(wilderness_end_stats["distance_km"]), + "elevation_gain_m": wilderness_end_stats["elevation_gain_m"], + "elevation_loss_m": wilderness_end_stats["elevation_loss_m"], + "boundary_mode": boundary_mode, + "on_trail_pct": wilderness_end_stats["on_trail_pct"], + "barrier_crossings": wilderness_end_stats["barrier_crossings"], + "wilderness_mode": "foot", + }, + "geometry": {"type": "LineString", "coordinates": wilderness_end} + }) + + # Combined path + combined_coords = [] + if wilderness_start: + combined_coords.extend(wilderness_start) + if network_segment: + # Skip first coord if we already have wilderness_start (avoid duplicate) + start_idx = 1 if wilderness_start else 0 + combined_coords.extend(network_segment["coordinates"][start_idx:]) + if wilderness_end: + # Skip first coord (avoid duplicate with network end) + start_idx = 1 if (wilderness_start or network_segment) else 0 + combined_coords.extend(wilderness_end[start_idx:]) + + if combined_coords: + features.append({ + "type": "Feature", + "properties": { + "segment_type": "combined", + "wilderness_mode": "foot", + "network_mode": mode, + "boundary_mode": boundary_mode, + "scenario": scenario, + }, + "geometry": {"type": "LineString", "coordinates": combined_coords} + }) + + geojson = {"type": "FeatureCollection", "features": features} + + # Calculate totals + total_distance_km = 0.0 + total_effort_minutes = 0.0 + wilderness_distance_km = 0.0 + wilderness_effort_minutes = 0.0 + network_distance_km = 0.0 + network_duration_minutes = 0.0 + barrier_crossings = 0 + on_trail_pct = 0.0 + + if wilderness_start_stats: + wilderness_distance_km += wilderness_start_stats["distance_km"] + wilderness_effort_minutes += wilderness_start_stats["effort_minutes"] + barrier_crossings += wilderness_start_stats["barrier_crossings"] + on_trail_pct = wilderness_start_stats["on_trail_pct"] + + if wilderness_end_stats: + wilderness_distance_km += wilderness_end_stats["distance_km"] + wilderness_effort_minutes += wilderness_end_stats["effort_minutes"] + barrier_crossings += wilderness_end_stats["barrier_crossings"] + # Average on-trail percentage if we have both + if wilderness_start_stats: + on_trail_pct = (on_trail_pct + wilderness_end_stats["on_trail_pct"]) / 2 + else: + on_trail_pct = wilderness_end_stats["on_trail_pct"] + + if network_segment: + network_distance_km = network_segment["distance_km"] + network_duration_minutes = network_segment["duration_minutes"] + + total_distance_km = wilderness_distance_km + network_distance_km + total_effort_minutes = wilderness_effort_minutes + network_duration_minutes + + summary = { + "total_distance_km": float(total_distance_km), + "total_effort_minutes": float(total_effort_minutes), + "wilderness_distance_km": float(wilderness_distance_km), + "wilderness_effort_minutes": float(wilderness_effort_minutes), + "network_distance_km": float(network_distance_km), + "network_duration_minutes": float(network_duration_minutes), + "on_trail_pct": float(on_trail_pct), + "barrier_crossings": barrier_crossings, + "boundary_mode": boundary_mode, + "wilderness_mode": "foot", + "network_mode": mode, + "scenario": scenario, + "computation_time_s": time.time() - t0, + } + + if entry_start: + summary["entry_point_start"] = { + "lat": entry_start["lat"], + "lon": entry_start["lon"], + "highway_class": entry_start["highway_class"], + "name": entry_start.get("name", ""), + } + + if entry_end: + summary["entry_point_end"] = { + "lat": entry_end["lat"], + "lon": entry_end["lon"], + "highway_class": entry_end["highway_class"], + "name": entry_end.get("name", ""), + } + + result = {"status": "ok", "route": geojson, "summary": summary} + + if valhalla_error: + result["warning"] = f"Network segment incomplete: {valhalla_error}" + + return result + + def _decode_polyline(self, encoded: str, precision: int = 6) -> List[List[float]]: + """Decode a polyline string into coordinates [lon, lat].""" + coords = [] + index = 0 + lat = 0 + lon = 0 + + while index < len(encoded): + shift = 0 + result = 0 + while True: + b = ord(encoded[index]) - 63 + index += 1 + result |= (b & 0x1f) << shift + shift += 5 + if b < 0x20: + break + dlat = ~(result >> 1) if result & 1 else result >> 1 + lat += dlat + + shift = 0 + result = 0 + while True: + b = ord(encoded[index]) - 63 + index += 1 + result |= (b & 0x1f) << shift + shift += 5 + if b < 0x20: + break + dlon = ~(result >> 1) if result & 1 else result >> 1 + lon += dlon + + coords.append([lon / (10 ** precision), lat / (10 ** precision)]) + + return coords + + def close(self): + """Close all readers.""" + if self.dem_reader: + self.dem_reader.close() + if self.friction_reader: + self.friction_reader.close() + if self.barrier_reader: + self.barrier_reader.close() + if self.wilderness_reader: + self.wilderness_reader.close() + if self.trail_reader: + self.trail_reader.close() + self.entry_index.close() + + +def build_entry_index(): + """Build the trail entry point index.""" + index = EntryPointIndex() + stats = index.build_index() + index.close() + return stats + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "build": + print("Building trail entry point index...") + stats = build_entry_index() + print(f"\nDone. Total entry points: {stats['total']}") + + elif len(sys.argv) > 1 and sys.argv[1] == "test": + print("Testing router (all scenarios)...") + print("=" * 60) + + router = OffrouteRouter() + + # Test points + wilderness_start = (44.0543, -115.4237) # Off-network + wilderness_end = (45.2, -115.5) # Deep wilderness (Frank Church) + road_start = (43.6150, -116.2023) # Boise downtown (on-network) + road_end = (43.5867, -116.5625) # Nampa (on-network) + + tests = [ + ("A: wilderness→road", wilderness_start, (44.0814, -115.5021)), + ("B: wilderness→wilderness", wilderness_start, wilderness_end), + ("C: road→wilderness", road_start, wilderness_start), + ("D: road→road", road_start, road_end), + ] + + for label, (slat, slon), (elat, elon) in tests: + print(f"\n{label}") + print("-" * 40) + + result = router.route( + start_lat=slat, start_lon=slon, + end_lat=elat, end_lon=elon, + mode="foot", boundary_mode="pragmatic" + ) + + if result["status"] == "ok": + s = result["summary"] + print(f" Scenario: {s.get('scenario', '?')}") + print(f" Total: {s['total_distance_km']:.2f} km, {s['total_effort_minutes']:.1f} min") + print(f" Wilderness: {s['wilderness_distance_km']:.2f} km") + print(f" Network: {s['network_distance_km']:.2f} km") + if s.get('entry_point_start'): + ep = s['entry_point_start'] + print(f" Entry (start): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") + if s.get('entry_point_end'): + ep = s['entry_point_end'] + print(f" Entry (end): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") + else: + print(f" ERROR: {result['message']}") + + router.close() + + else: + print("Usage:") + print(" python router.py build # Build entry point index") + print(" python router.py test # Test all scenarios") From d8f84ab55aa8f6e910ed6f2a52aae2032a70c91b Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 9 May 2026 03:34:37 +0000 Subject: [PATCH 48/72] offroute: revert off-network threshold to 10m Co-Authored-By: Claude Opus 4.5 --- lib/offroute/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index 4b988ab..bdb7894 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -61,7 +61,7 @@ EXPANDED_SEARCH_RADIUS_KM = 100 MEMORY_LIMIT_GB = 12 # Off-network detection threshold (meters) -OFF_NETWORK_THRESHOLD_M = 50 +OFF_NETWORK_THRESHOLD_M = 10 # Mode to Valhalla costing mapping MODE_TO_COSTING = { From a04c10ad556ba5e5d915c402bd003a6e5d6db609 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 9 May 2026 05:05:00 +0000 Subject: [PATCH 49/72] offroute: wilderness maneuvers with bearing, elevation, grade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Segment breaks on: bearing change >30°, grade category change, distance >0.5mi - Grade categories: flat (0-2°), gentle (2-5°), moderate (5-10°), steep (10-15°), very steep (15°+) - Distance formatting: feet with commas <1mi, miles with decimal ≥1mi - Instruction format: Head {cardinal}, gaining/descending X ft ({grade} uphill/downhill) — {dist} Co-Authored-By: Claude --- lib/offroute/router.py | 209 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) diff --git a/lib/offroute/router.py b/lib/offroute/router.py index bdb7894..bd3d379 100644 --- a/lib/offroute/router.py +++ b/lib/offroute/router.py @@ -728,6 +728,7 @@ class OffrouteRouter: # Extract results wilderness_coords = wilderness_result["coords"] wilderness_stats = wilderness_result["stats"] + wilderness_elevations = wilderness_result.get("elevations", []) best_entry = wilderness_result["entry_point"] entry_lat = best_entry["lat"] @@ -740,9 +741,11 @@ class OffrouteRouter: return self._build_response( wilderness_start=wilderness_coords, wilderness_start_stats=wilderness_stats, + wilderness_start_elevations=wilderness_elevations, network_segment=network_result.get("segment"), wilderness_end=None, wilderness_end_stats=None, + wilderness_end_elevations=None, mode=mode, boundary_mode=boundary_mode, entry_start=best_entry, @@ -805,6 +808,7 @@ class OffrouteRouter: # The path is from end→entry, reverse it for display (entry→end) wilderness_coords = list(reversed(wilderness_result["coords"])) wilderness_stats = wilderness_result["stats"] + wilderness_elevations = list(reversed(wilderness_result.get("elevations", []))) best_entry = wilderness_result["entry_point"] entry_lat = best_entry["lat"] @@ -817,9 +821,11 @@ class OffrouteRouter: return self._build_response( wilderness_start=None, wilderness_start_stats=None, + wilderness_start_elevations=None, network_segment=network_result.get("segment"), wilderness_end=wilderness_coords, wilderness_end_stats=wilderness_stats, + wilderness_end_elevations=wilderness_elevations, mode=mode, boundary_mode=boundary_mode, entry_start=None, @@ -885,6 +891,7 @@ class OffrouteRouter: wilderness_start_coords = wilderness_start_result["coords"] wilderness_start_stats = wilderness_start_result["stats"] + wilderness_start_elevations = wilderness_start_result.get("elevations", []) entry_A = wilderness_start_result["entry_point"] # Phase 2: Wilderness pathfinding from END (run after freeing phase 1 memory) @@ -899,6 +906,7 @@ class OffrouteRouter: # Reverse the end wilderness path (it's end→entry, we want entry→end for display) wilderness_end_coords = list(reversed(wilderness_end_result["coords"])) wilderness_end_stats = wilderness_end_result["stats"] + wilderness_end_elevations = list(reversed(wilderness_end_result.get("elevations", []))) entry_B = wilderness_end_result["entry_point"] # Phase 3: Valhalla from entry_A to entry_B @@ -912,9 +920,11 @@ class OffrouteRouter: return self._build_response( wilderness_start=wilderness_start_coords, wilderness_start_stats=wilderness_start_stats, + wilderness_start_elevations=wilderness_start_elevations, network_segment=network_result.get("segment"), wilderness_end=wilderness_end_coords, wilderness_end_stats=wilderness_end_stats, + wilderness_end_elevations=wilderness_end_elevations, mode=mode, boundary_mode=boundary_mode, entry_start=entry_A, @@ -1109,6 +1119,7 @@ class OffrouteRouter: return { "status": "ok", "coords": coords, + "elevations": elevations, # Raw elevation values for maneuver generation "stats": { "distance_km": distance_m / 1000, "effort_minutes": best_cost / 60, @@ -1184,13 +1195,199 @@ class OffrouteRouter: except Exception as e: return {"segment": None, "error": f"Valhalla request failed: {e}"} + def _generate_wilderness_maneuvers( + self, + coords: List[List[float]], + elevations: List[float], + position: str = "start" + ) -> List[Dict]: + """ + Generate turn-by-turn maneuvers for a wilderness segment. + + Segment breaks occur when: + - Bearing changes more than 30° from segment start + - Grade category changes (flat→steep etc) + - Distance exceeds 0.5 miles without a break + + Args: + coords: [[lon, lat], ...] coordinate list + elevations: Elevation values (meters) for each coord + position: "start" or "end" for labeling + + Returns: + List of maneuver dicts with instruction, distance, elevation, grade, bearing + """ + if not coords or len(coords) < 2: + return [] + + # Constants + COMPASS = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", + "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"] + MAX_SEGMENT_M = 804.672 # 0.5 miles in meters + BEARING_THRESHOLD = 30 # degrees + M_TO_FT = 3.28084 + M_TO_MI = 0.000621371 + + def get_bearing(lat1, lon1, lat2, lon2): + """Calculate bearing between two points (degrees 0-360).""" + dlon = math.radians(lon2 - lon1) + lat1_r, lat2_r = math.radians(lat1), math.radians(lat2) + x = math.sin(dlon) * math.cos(lat2_r) + y = math.cos(lat1_r) * math.sin(lat2_r) - math.sin(lat1_r) * math.cos(lat2_r) * math.cos(dlon) + return (math.degrees(math.atan2(x, y)) + 360) % 360 + + def bearing_to_cardinal(bearing): + """Convert bearing to 16-point compass direction.""" + return COMPASS[round(bearing / 22.5) % 16] + + def get_grade_category(grade_deg): + """Categorize grade angle: flat (0-2°), gentle (2-5°), moderate (5-10°), steep (10-15°), very steep (15°+).""" + grade_abs = abs(grade_deg) + if grade_abs < 2: + return "flat" + elif grade_abs < 5: + return "gentle" + elif grade_abs < 10: + return "moderate" + elif grade_abs < 15: + return "steep" + else: + return "very steep" + + def format_distance(meters): + """Format distance: feet with commas if under 1 mile, miles with one decimal if over.""" + miles = meters * M_TO_MI + if miles < 1.0: + feet = round(meters * M_TO_FT) + return f"{feet:,} ft" + else: + return f"{miles:.1f} mi" + + def build_instruction(cardinal, gain_ft, loss_ft, grade_cat, distance_m): + """Build instruction string per spec.""" + dist_str = format_distance(distance_m) + if grade_cat == "flat": + return f"Head {cardinal} on level ground — {dist_str}" + elif gain_ft > loss_ft: + return f"Head {cardinal}, gaining {gain_ft:,} ft ({grade_cat} uphill) — {dist_str}" + else: + return f"Head {cardinal}, descending {loss_ft:,} ft ({grade_cat} downhill) — {dist_str}" + + maneuvers = [] + i = 0 + + while i < len(coords) - 1: + seg_start_idx = i + seg_start_lon, seg_start_lat = coords[i] + seg_start_elev = elevations[i] if i < len(elevations) else 0 + + # Initial bearing for this segment + next_lon, next_lat = coords[i + 1] + seg_bearing = get_bearing(seg_start_lat, seg_start_lon, next_lat, next_lon) + + # Accumulate elevation changes within segment + seg_distance_m = 0 + seg_elev_gain = 0 + seg_elev_loss = 0 + prev_elev = seg_start_elev + + # Calculate initial grade category + step_dist = haversine_distance(seg_start_lat, seg_start_lon, next_lat, next_lon) + step_elev_change = (elevations[i + 1] if i + 1 < len(elevations) else seg_start_elev) - seg_start_elev + initial_grade = math.degrees(math.atan(step_elev_change / step_dist)) if step_dist > 0 else 0 + seg_grade_cat = get_grade_category(initial_grade) + + j = i + while j < len(coords) - 1: + lon1, lat1 = coords[j] + lon2, lat2 = coords[j + 1] + elev1 = elevations[j] if j < len(elevations) else prev_elev + elev2 = elevations[j + 1] if j + 1 < len(elevations) else elev1 + + step_dist = haversine_distance(lat1, lon1, lat2, lon2) + step_bearing = get_bearing(lat1, lon1, lat2, lon2) + step_elev_change = elev2 - elev1 + step_grade = math.degrees(math.atan(step_elev_change / step_dist)) if step_dist > 0 else 0 + step_grade_cat = get_grade_category(step_grade) + + # Check break conditions + bearing_diff = abs(step_bearing - seg_bearing) + if bearing_diff > 180: + bearing_diff = 360 - bearing_diff + + # Break if: bearing changed >30°, grade category changed, or distance >0.5mi + if seg_distance_m > 0: # Don't break on first step + if bearing_diff > BEARING_THRESHOLD: + break + if step_grade_cat != seg_grade_cat: + break + if seg_distance_m >= MAX_SEGMENT_M: + break + + # Accumulate + seg_distance_m += step_dist + if step_elev_change > 0: + seg_elev_gain += step_elev_change + else: + seg_elev_loss += abs(step_elev_change) + prev_elev = elev2 + j += 1 + + # Compute segment stats + seg_end_idx = j + gain_ft = round(seg_elev_gain * M_TO_FT) + loss_ft = round(seg_elev_loss * M_TO_FT) + + # Net elevation change for grade calculation + net_elev_change = seg_elev_gain - seg_elev_loss + grade_deg = math.degrees(math.atan(net_elev_change / seg_distance_m)) if seg_distance_m > 0 else 0 + grade_cat = get_grade_category(grade_deg) + + cardinal = bearing_to_cardinal(seg_bearing) + instruction = build_instruction(cardinal, gain_ft, loss_ft, grade_cat, seg_distance_m) + + maneuvers.append({ + "instruction": instruction, + "type": "wilderness", + "distance_m": round(seg_distance_m, 1), + "elevation_gain_ft": gain_ft, + "elevation_loss_ft": loss_ft, + "grade_degrees": round(grade_deg, 1), + "grade_category": grade_cat, + "bearing": round(seg_bearing, 1), + "cardinal": cardinal, + }) + + i = seg_end_idx + + # Add arrival maneuver + arrival_text = "Arrive at trail/road" if position == "start" else "Arrive at destination" + last_bearing = maneuvers[-1]["bearing"] if maneuvers else 0 + last_cardinal = maneuvers[-1]["cardinal"] if maneuvers else "N" + + maneuvers.append({ + "instruction": arrival_text, + "type": "arrival", + "distance_m": 0, + "elevation_gain_ft": 0, + "elevation_loss_ft": 0, + "grade_degrees": 0, + "grade_category": "flat", + "bearing": last_bearing, + "cardinal": last_cardinal, + }) + + return maneuvers + def _build_response( self, wilderness_start: Optional[List], wilderness_start_stats: Optional[Dict], + wilderness_start_elevations: Optional[List], network_segment: Optional[Dict], wilderness_end: Optional[List], wilderness_end_stats: Optional[Dict], + wilderness_end_elevations: Optional[List], mode: str, boundary_mode: str, entry_start: Optional[Dict], @@ -1204,6 +1401,11 @@ class OffrouteRouter: # Wilderness start segment if wilderness_start and wilderness_start_stats: + wild_start_maneuvers = [] + if wilderness_start_elevations: + wild_start_maneuvers = self._generate_wilderness_maneuvers( + wilderness_start, wilderness_start_elevations, position="start" + ) features.append({ "type": "Feature", "properties": { @@ -1217,6 +1419,7 @@ class OffrouteRouter: "on_trail_pct": wilderness_start_stats["on_trail_pct"], "barrier_crossings": wilderness_start_stats["barrier_crossings"], "wilderness_mode": "foot", + "maneuvers": wild_start_maneuvers, }, "geometry": {"type": "LineString", "coordinates": wilderness_start} }) @@ -1237,6 +1440,11 @@ class OffrouteRouter: # Wilderness end segment if wilderness_end and wilderness_end_stats: + wild_end_maneuvers = [] + if wilderness_end_elevations: + wild_end_maneuvers = self._generate_wilderness_maneuvers( + wilderness_end, wilderness_end_elevations, position="end" + ) features.append({ "type": "Feature", "properties": { @@ -1250,6 +1458,7 @@ class OffrouteRouter: "on_trail_pct": wilderness_end_stats["on_trail_pct"], "barrier_crossings": wilderness_end_stats["barrier_crossings"], "wilderness_mode": "foot", + "maneuvers": wild_end_maneuvers, }, "geometry": {"type": "LineString", "coordinates": wilderness_end} }) From f276b957532117c73e9efbbb2bee3f2c6d9380bb Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 20 May 2026 05:33:45 +0000 Subject: [PATCH 50/72] Add /api/reverse// localhost-sourced enrichment bundle New geocode_bp sibling to the existing /api/reverse?lat=&lon= route (which is unchanged). Returns a flat 9-field bundle for the Central enrichment framework: name, city, county, state, country, postal_code (Photon), timezone (timezones.sqlite via R-tree + shapely), landclass (in-process lookup_landclass), elevation_m (Valhalla /height). - Each component lookup is independent and wrapped in try/except: a failure logs a warning and yields null, never a 5xx. 400 only on unparseable / out-of-range coordinates. - lat/lon parsed manually rather than via Flask , which rejects negative and integer coordinates and would 404 instead of 400. - 10k-entry / 24h TTLCache keyed on coords rounded to 4 decimals. - Tests mock Photon/Valhalla/landclass; one test exercises the real timezones.sqlite. cachetools pinned in requirements.txt. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/netsyms_api.py | 161 +++++++++++++++++++++++++++++++++++++ lib/reverse_bundle_test.py | 136 +++++++++++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 298 insertions(+) create mode 100644 lib/reverse_bundle_test.py diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 4a0847f..e530d15 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -4,13 +4,19 @@ RECON Netsyms API + Geocode — Flask Blueprints. GET /api/netsyms/lookup?q=&country= GET /api/netsyms/health GET /api/geocode?q=&limit= (Photon-first search with ranked results) +GET /api/reverse// (localhost-sourced enrichment bundle for Central) """ +import sqlite3 +import threading + +from cachetools import TTLCache from flask import Blueprint, request, jsonify from . import netsyms from . import address_book from . import nav_tools +from .geocode import PHOTON_URL from .utils import setup_logging logger = setup_logging('recon.netsyms_api') @@ -124,3 +130,158 @@ def api_reverse(): results = _parse_photon_features(features, source='photon_reverse') return jsonify({'query': query_str, 'results': results, 'count': len(results)}) + + +# ───────────────────────────────────────────────────────────────────────── +# /api/reverse// — localhost-sourced enrichment bundle (Central) +# +# Sibling to the query-string /api/reverse above; that route is unchanged. +# Every component is sourced from localhost only (Photon, timezones.sqlite, +# in-process landclass/PostGIS, Valhalla). Each lookup is independent: a +# component failure logs a warning and yields null — never a 5xx. +# ───────────────────────────────────────────────────────────────────────── + +_TZ_DB_PATH = "/mnt/nav/sources/timezones.sqlite" +_VALHALLA_HEIGHT_URL = "http://localhost:8002/height" + +# Full bundle cache: key=(round(lat,4), round(lon,4)) -> dict. ~10k entries, 24h TTL. +_REVERSE_BUNDLE_CACHE = TTLCache(maxsize=10_000, ttl=86_400) +_REVERSE_BUNDLE_LOCK = threading.Lock() + +_BUNDLE_KEYS = ('name', 'city', 'county', 'state', 'country', + 'postal_code', 'timezone', 'landclass', 'elevation_m') + + +def _spatialite_blob_to_wkb(blob): + """Recover standard WKB from a SpatiaLite geometry BLOB. + + Layout: [00][endian][srid:4][mbr:32][7C][WKB body][FE]. The body omits the + leading byte-order marker, so we re-prepend it and drop the trailing 0xFE. + """ + return bytes([blob[1]]) + blob[39:-1] + + +def _reverse_photon(lat, lon): + """Nearest-feature admin fields from local Photon. Returns the six address + fields (any value may be None). Mirrors the existing /api/reverse call.""" + import requests as http_requests + resp = http_requests.get( + f"{PHOTON_URL}/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + features = resp.json().get("features", []) + if not features: + return {} + props = features[0].get("properties", {}) + return { + "name": props.get("name"), + "city": props.get("city"), + "county": props.get("county"), + "state": props.get("state"), + "country": props.get("country"), + "postal_code": props.get("postcode"), + } + + +def _reverse_timezone(lat, lon): + """IANA tzid for the point from local timezones.sqlite (SpatiaLite tz_world). + + Uses the table's R-tree index for an MBR prefilter, then shapely + point-in-polygon on the few candidates. Returns None if unresolved. + """ + from shapely import wkb + from shapely.geometry import Point + con = sqlite3.connect(f"file:{_TZ_DB_PATH}?mode=ro", uri=True) + try: + cur = con.cursor() + cur.execute( + "SELECT pkid FROM idx_tz_world_geom " + "WHERE xmin<=? AND xmax>=? AND ymin<=? AND ymax>=?", + (lon, lon, lat, lat), + ) + candidates = [r[0] for r in cur.fetchall()] + if not candidates: + return None + pt = Point(lon, lat) + for pk in candidates: + row = cur.execute( + "SELECT tzid, geom FROM tz_world WHERE pk_uid=?", (pk,) + ).fetchone() + if row and wkb.loads(_spatialite_blob_to_wkb(row[1])).contains(pt): + return row[0] + return None + finally: + con.close() + + +def _reverse_landclass(lat, lon): + """Most-specific PAD-US land class for the point, looked up in-process. + Returns None when there is no coverage or landclass is unavailable.""" + from .landclass import lookup_landclass, format_summary + return format_summary(lookup_landclass(lat, lon)) + + +def _reverse_elevation(lat, lon): + """Elevation in metres from local Valhalla /height. None on failure.""" + import requests as http_requests + resp = http_requests.post( + _VALHALLA_HEIGHT_URL, + json={"shape": [{"lat": lat, "lon": lon}]}, + timeout=10, + ) + resp.raise_for_status() + heights = resp.json().get("height", []) + return heights[0] if heights else None + + +@geocode_bp.route('/api/reverse//') +def api_reverse_bundle(lat, lon): + """Localhost-sourced reverse-geocode enrichment bundle for Central. + + GET /api/reverse// + + Always returns 200 with EXACTLY these keys (any may be null): + name, city, county, state, country, postal_code, timezone, landclass, elevation_m + + lat/lon are parsed manually (not via Flask's converter, which + rejects negative and integer coordinates) so out-of-range or unparseable + input yields 400 per contract; 503 is reserved for catastrophic failure. + """ + try: + lat = float(lat) + lon = float(lon) + except (ValueError, TypeError): + return jsonify({'error': 'lat and lon must be numbers'}), 400 + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'lat must be -90..90, lon must be -180..180'}), 400 + + key = (round(lat, 4), round(lon, 4)) + with _REVERSE_BUNDLE_LOCK: + cached = _REVERSE_BUNDLE_CACHE.get(key) + if cached is not None: + return jsonify(cached) + + bundle = {k: None for k in _BUNDLE_KEYS} + + try: + bundle.update(_reverse_photon(lat, lon)) + except Exception: + logger.warning("reverse-bundle: Photon lookup failed for %s,%s", lat, lon) + try: + bundle['timezone'] = _reverse_timezone(lat, lon) + except Exception: + logger.warning("reverse-bundle: timezone lookup failed for %s,%s", lat, lon) + try: + bundle['landclass'] = _reverse_landclass(lat, lon) + except Exception: + logger.warning("reverse-bundle: landclass lookup failed for %s,%s", lat, lon) + try: + bundle['elevation_m'] = _reverse_elevation(lat, lon) + except Exception: + logger.warning("reverse-bundle: elevation lookup failed for %s,%s", lat, lon) + + with _REVERSE_BUNDLE_LOCK: + _REVERSE_BUNDLE_CACHE[key] = bundle + return jsonify(bundle) diff --git a/lib/reverse_bundle_test.py b/lib/reverse_bundle_test.py new file mode 100644 index 0000000..d825b71 --- /dev/null +++ b/lib/reverse_bundle_test.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""Tests for the /api/reverse// enrichment bundle (lib.netsyms_api). + +Photon/Valhalla/landclass are mocked so the suite runs without live services; +one timezone test exercises the real SpatiaLite DB when it is present. Plain +asserts + a __main__ runner, matching the rest of lib/*_test.py. +""" + +import os +import sys +from unittest import mock + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from flask import Flask +from lib import netsyms_api + +EXPECTED_KEYS = {'name', 'city', 'county', 'state', 'country', + 'postal_code', 'timezone', 'landclass', 'elevation_m'} + + +def _client(): + app = Flask(__name__) + app.register_blueprint(netsyms_api.geocode_bp) + return app.test_client() + + +def _clear_cache(): + netsyms_api._REVERSE_BUNDLE_CACHE.clear() + + +def test_happy_path(): + _clear_cache() + with mock.patch.object(netsyms_api, '_reverse_photon', return_value={ + 'name': 'Where you are', 'city': 'Boise', 'county': 'Ada', + 'state': 'Idaho', 'country': 'United States', 'postal_code': '83701'}), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value='America/Boise'), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value='Boise National Forest'), \ + mock.patch.object(netsyms_api, '_reverse_elevation', return_value=824): + resp = _client().get('/api/reverse/43.6150/-116.2023') + assert resp.status_code == 200, resp.status_code + data = resp.get_json() + assert set(data.keys()) == EXPECTED_KEYS, data.keys() + assert data['city'] == 'Boise' and data['timezone'] == 'America/Boise' + assert data['landclass'] == 'Boise National Forest' and data['elevation_m'] == 824 + print(" PASS: happy path — all 9 fields populated, exact key set") + + +def test_negative_and_integer_coords_parse(): + # Regression: Flask's converter would 404 these; manual parse must not. + _clear_cache() + with mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_elevation', return_value=None): + for path in ('/api/reverse/43.6/-116.2', '/api/reverse/43/-116'): + resp = _client().get(path) + assert resp.status_code == 200, f"{path} -> {resp.status_code}" + assert set(resp.get_json().keys()) == EXPECTED_KEYS + print(" PASS: negative and integer coordinates parse (200, not 404)") + + +def test_partial_failure_returns_200_with_nulls(): + _clear_cache() + with mock.patch.object(netsyms_api, '_reverse_photon', + side_effect=RuntimeError('photon down')), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value='America/Boise'), \ + mock.patch.object(netsyms_api, '_reverse_landclass', + side_effect=RuntimeError('postgis down')), \ + mock.patch.object(netsyms_api, '_reverse_elevation', return_value=824): + resp = _client().get('/api/reverse/43.6150/-116.2023') + assert resp.status_code == 200, resp.status_code + data = resp.get_json() + assert set(data.keys()) == EXPECTED_KEYS + assert data['name'] is None and data['city'] is None # photon failed -> nulls + assert data['landclass'] is None # landclass failed -> null + assert data['timezone'] == 'America/Boise' and data['elevation_m'] == 824 + print(" PASS: per-component failure -> 200 with nulls, no 5xx") + + +def test_ocean_point_mostly_null(): + _clear_cache() + with mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value='Etc/GMT+2'), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_elevation', return_value=0): + resp = _client().get('/api/reverse/0.0/-30.0') + assert resp.status_code == 200, resp.status_code + data = resp.get_json() + assert set(data.keys()) == EXPECTED_KEYS + assert data['city'] is None and data['country'] is None and data['landclass'] is None + print(" PASS: ocean point -> 200, mostly null") + + +def test_invalid_input_400(): + _clear_cache() + client = _client() + for path in ('/api/reverse/9999/0', '/api/reverse/0/9999', '/api/reverse/abc/0'): + resp = client.get(path) + assert resp.status_code == 400, f"{path} -> {resp.status_code}" + print(" PASS: out-of-range / unparseable input -> 400") + + +def test_cache_hit_serves_without_recompute(): + _clear_cache() + with mock.patch.object(netsyms_api, '_reverse_photon', + return_value={'name': 'X'}) as m_photon, \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_elevation', return_value=None): + client = _client() + client.get('/api/reverse/12.3456/-65.4321') + client.get('/api/reverse/12.3456/-65.4321') # same key (rounded) -> cached + assert m_photon.call_count == 1, f"expected 1 compute, got {m_photon.call_count}" + print(" PASS: second identical request served from cache (no recompute)") + + +def test_real_timezone_db(): + if not os.path.exists(netsyms_api._TZ_DB_PATH): + print(" SKIP: real timezone test (timezones.sqlite not present)") + return + assert netsyms_api._reverse_timezone(43.6150, -116.2023) == 'America/Boise' + assert netsyms_api._reverse_timezone(40.7128, -74.0060) == 'America/New_York' + print(" PASS: real timezones.sqlite point-in-polygon") + + +if __name__ == '__main__': + print("Running reverse-bundle tests...") + test_happy_path() + test_negative_and_integer_coords_parse() + test_partial_failure_returns_200_with_nulls() + test_ocean_point_mostly_null() + test_invalid_input_400() + test_cache_hit_serves_without_recompute() + test_real_timezone_db() + print("All tests passed.") diff --git a/requirements.txt b/requirements.txt index f643cd8..1da21bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ anyio==4.12.1 babel==2.18.0 beautifulsoup4==4.14.3 blinker==1.9.0 +cachetools==7.1.3 certifi==2026.1.4 cffi==2.0.0 charset-normalizer==3.4.4 From 3d2d69cd56504af44ea54062e6dd3ac048e09345 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 20 May 2026 15:20:35 +0000 Subject: [PATCH 51/72] Switch /api/reverse// elevation source from Valhalla to planet-DEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per OFFROUTE-ARCHITECTURE.md §9 ("planet-dem.pmtiles as single elevation source"). The bundle endpoint previously called Valhalla /height, which only has 48 Idaho HGT tiles; it now reads the planet-scale Terrarium PMTiles that already back the frontend hillshade and contours. - dem.py: add DEMReader.sample_point(lat, lon) — one z12 tile (LRU-cached), Web-Mercator pixel index, None outside the +/-85.05 pole cap or when untiled. - netsyms_api.py: module-level DEMReader singleton (lazy mmap, None if init fails); _reverse_elevation now calls _DEM.sample_point; drop the Valhalla HTTP call and _VALHALLA_HEIGHT_URL. - tests: DEM-mock and DEM-unavailable cases; EXPECTED_KEYS derives from _BUNDLE_KEYS. All 9 tests pass. Verified live: Boise 824m, London 8m, Tokyo 35m, Yosemite 2804m, pole -> None. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/netsyms_api.py | 30 ++++++++++++++++------------ lib/offroute/dem.py | 23 ++++++++++++++++++++- lib/reverse_bundle_test.py | 41 +++++++++++++++++++++++++++++++++++--- 3 files changed, 77 insertions(+), 17 deletions(-) diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index e530d15..d217eb0 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -17,6 +17,7 @@ from . import netsyms from . import address_book from . import nav_tools from .geocode import PHOTON_URL +from .offroute.dem import DEMReader from .utils import setup_logging logger = setup_logging('recon.netsyms_api') @@ -137,12 +138,11 @@ def api_reverse(): # # Sibling to the query-string /api/reverse above; that route is unchanged. # Every component is sourced from localhost only (Photon, timezones.sqlite, -# in-process landclass/PostGIS, Valhalla). Each lookup is independent: a -# component failure logs a warning and yields null — never a 5xx. +# in-process landclass/PostGIS, planet-DEM PMTiles). Each lookup is +# independent: a component failure logs a warning and yields null — never 5xx. # ───────────────────────────────────────────────────────────────────────── _TZ_DB_PATH = "/mnt/nav/sources/timezones.sqlite" -_VALHALLA_HEIGHT_URL = "http://localhost:8002/height" # Full bundle cache: key=(round(lat,4), round(lon,4)) -> dict. ~10k entries, 24h TTL. _REVERSE_BUNDLE_CACHE = TTLCache(maxsize=10_000, ttl=86_400) @@ -151,6 +151,14 @@ _REVERSE_BUNDLE_LOCK = threading.Lock() _BUNDLE_KEYS = ('name', 'city', 'county', 'state', 'country', 'postal_code', 'timezone', 'landclass', 'elevation_m') +# planet-DEM elevation source (single PMTiles, replaces Valhalla /height). +# Instantiated once at import; the underlying mmap is lazy. None if unavailable. +try: + _DEM = DEMReader() +except Exception as e: # pragma: no cover - depends on PMTiles availability + logger.warning("DEMReader unavailable, elevation will be null: %s", e) + _DEM = None + def _spatialite_blob_to_wkb(blob): """Recover standard WKB from a SpatiaLite geometry BLOB. @@ -224,16 +232,12 @@ def _reverse_landclass(lat, lon): def _reverse_elevation(lat, lon): - """Elevation in metres from local Valhalla /height. None on failure.""" - import requests as http_requests - resp = http_requests.post( - _VALHALLA_HEIGHT_URL, - json={"shape": [{"lat": lat, "lon": lon}]}, - timeout=10, - ) - resp.raise_for_status() - heights = resp.json().get("height", []) - return heights[0] if heights else None + """Elevation in metres from the planet-DEM PMTiles — the single elevation + source per OFFROUTE-ARCHITECTURE.md §9. None on failure, on untiled points + (e.g. true ocean), or if DEMReader could not be initialized at startup.""" + if _DEM is None: + return None + return _DEM.sample_point(lat, lon) @geocode_bp.route('/api/reverse//') diff --git a/lib/offroute/dem.py b/lib/offroute/dem.py index f715611..06cfcea 100644 --- a/lib/offroute/dem.py +++ b/lib/offroute/dem.py @@ -158,7 +158,28 @@ class DEMReader: } return elevation, metadata - + + def sample_point(self, lat: float, lon: float) -> Optional[float]: + """Return elevation in meters at a single point, or None if untiled. + + Reads one z12 Terrarium tile (LRU-cached) and indexes the matching + pixel. Sub-ms warm, ~15 ms cold per tile via NFS. Returns None when the + tile is absent (e.g. true ocean nodata) or lat is outside the + Web-Mercator pole cap (~+/-85.05 deg). + """ + if not -85.05112878 <= lat <= 85.05112878: + return None + n = 2 ** ZOOM_LEVEL + fx = (lon + 180.0) / 360.0 * n + fy = (1.0 - math.asinh(math.tan(math.radians(lat))) / math.pi) / 2.0 * n + tx, ty = int(fx), int(fy) + tile = self._decode_tile(ZOOM_LEVEL, tx, ty) + if tile is None: + return None + row = min(TILE_SIZE - 1, int((fy - ty) * TILE_SIZE)) + col = min(TILE_SIZE - 1, int((fx - tx) * TILE_SIZE)) + return float(tile[row, col]) + def pixel_to_latlon(self, row: int, col: int, metadata: dict) -> Tuple[float, float]: """Convert pixel coordinates to lat/lon.""" lat = metadata["origin_lat"] + row * metadata["pixel_size_lat"] diff --git a/lib/reverse_bundle_test.py b/lib/reverse_bundle_test.py index d825b71..6defd9e 100644 --- a/lib/reverse_bundle_test.py +++ b/lib/reverse_bundle_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the /api/reverse// enrichment bundle (lib.netsyms_api). -Photon/Valhalla/landclass are mocked so the suite runs without live services; +Photon/DEM/landclass are mocked so the suite runs without live services; one timezone test exercises the real SpatiaLite DB when it is present. Plain asserts + a __main__ runner, matching the rest of lib/*_test.py. """ @@ -15,8 +15,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from flask import Flask from lib import netsyms_api -EXPECTED_KEYS = {'name', 'city', 'county', 'state', 'country', - 'postal_code', 'timezone', 'landclass', 'elevation_m'} +EXPECTED_KEYS = set(netsyms_api._BUNDLE_KEYS) def _client(): @@ -124,6 +123,40 @@ def test_real_timezone_db(): print(" PASS: real timezones.sqlite point-in-polygon") +def test_elevation_from_dem_reader_mock(): + # elevation_m comes from DEMReader.sample_point (not Valhalla); other + # components stubbed to null so the bundle is hermetic. + _clear_cache() + fake_dem = mock.Mock() + fake_dem.sample_point.return_value = 824 + with mock.patch.object(netsyms_api, '_DEM', fake_dem), \ + mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None): + resp = _client().get('/api/reverse/43.6150/-116.2023') + assert resp.status_code == 200, resp.status_code + data = resp.get_json() + assert set(data.keys()) == EXPECTED_KEYS + assert data['elevation_m'] == 824, data['elevation_m'] + fake_dem.sample_point.assert_called_once() + print(" PASS: elevation_m sourced from DEMReader.sample_point") + + +def test_elevation_dem_unavailable(): + # DEMReader failed to init at startup (_DEM is None) -> elevation_m null, 200. + _clear_cache() + with mock.patch.object(netsyms_api, '_DEM', None), \ + mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ + mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ + mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None): + resp = _client().get('/api/reverse/43.6150/-116.2023') + assert resp.status_code == 200, resp.status_code + data = resp.get_json() + assert set(data.keys()) == EXPECTED_KEYS + assert data['elevation_m'] is None + print(" PASS: DEMReader unavailable -> elevation_m null, still 200") + + if __name__ == '__main__': print("Running reverse-bundle tests...") test_happy_path() @@ -133,4 +166,6 @@ if __name__ == '__main__': test_invalid_input_400() test_cache_hit_serves_without_recompute() test_real_timezone_db() + test_elevation_from_dem_reader_mock() + test_elevation_dem_unavailable() print("All tests passed.") From 484dfbd1e0da7e31fc7a5dcc5e1d8d7952e1ab23 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 20 May 2026 16:34:14 +0000 Subject: [PATCH 52/72] landclass: filter antimeridian-wrapping PAD-US records 47 PAD-US units (Aleutian/Bering-Sea BOEM marine features, all is_valid=False) are stored as antimeridian-wrapping polygons whose bbox spans ~360 deg of longitude. Their invalid planar geometry forms latitude bands that ST_Intersects false-matches for non-US points (e.g. London/Germany at ~51N matched "Rat Islands" ogc_fid 3974). Fix: add `AND (ST_XMax(geom) - ST_XMin(geom)) < 60` to the lookup_landclass SELECT. No DB writes; two cheap ST_XMax/XMin evals on the already spatial-index-filtered result set. Verified live: total 651088 rows, filtered 651041 (exactly 47 excluded); Yosemite/Grand Canyon retained, London/Germany now empty. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/landclass.py | 3 +++ lib/landclass_test.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 lib/landclass_test.py diff --git a/lib/landclass.py b/lib/landclass.py index f581994..7760cce 100644 --- a/lib/landclass.py +++ b/lib/landclass.py @@ -214,6 +214,9 @@ def lookup_landclass(lat, lon): des_tp, gap_sts, pub_access, category, gis_acres, state_nm FROM pad_units WHERE ST_Intersects(geom, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) + -- exclude antimeridian-wrapping polygons: 47 BOEM marine artifacts + -- span ~360 deg longitude and false-match non-US points at their lat band + AND (ST_XMax(geom) - ST_XMin(geom)) < 60 ORDER BY gis_acres ASC LIMIT 10""", (lon, lat) diff --git a/lib/landclass_test.py b/lib/landclass_test.py new file mode 100644 index 0000000..cba8ca7 --- /dev/null +++ b/lib/landclass_test.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +"""Tests for lib.landclass PAD-US lookups. + +Live-PostgreSQL regression test using the skip-if-not-available pattern +(matching test_real_timezone_db in reverse_bundle_test.py). Plain asserts + +a __main__ runner, matching the rest of lib/*_test.py. + +Note: lookup_landclass swallows DB errors and returns [] (it never raises), +so PG availability is probed via a known US point (Boise) rather than by +catching an exception. +""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import landclass + + +def test_landclass_no_antimeridian_false_match(): + # Yosemite doubles as the liveness probe: a point on real US public land. + # (lookup_landclass returns [] when PG is unreachable AND when the point is + # off public land, so the probe must be a known-public-land point — e.g. + # downtown Boise is private and would yield [] even with PG up.) + yosemite = landclass.lookup_landclass(37.85, -119.55) + if not yosemite: + print(" SKIP: live PG not available (Yosemite returned no rows)") + return + # Filter must NOT drop legitimate (non-wrapping) US units. + assert len(yosemite) >= 1, f"Yosemite should match >=1 PAD-US unit, got {len(yosemite)}" + + # London (51.5074 N) previously false-matched the antimeridian-wrapping + # 'Rat Islands' record (ogc_fid 3974, ~360 deg lon span). The < 60 deg + # filter must now drop it -> empty result. + london = landclass.lookup_landclass(51.5074, -0.1278) + assert london == [], f"London should match no PAD-US unit, got {[r.get('unit_name') for r in london]}" + print(" PASS: antimeridian filter drops London false-match, keeps Yosemite coverage") + + +if __name__ == '__main__': + print("Running landclass tests...") + test_landclass_no_antimeridian_false_match() + print("All tests passed.") From f67f4ec9e3796caa3cde75c0e9571e3350ed6e64 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 21 May 2026 21:47:52 +0000 Subject: [PATCH 53/72] Add wiki_index enrichment for place details Enriches place API responses with wiki_summary, wiki_url, wiki_population, and wikivoyage_url from wiki_index.db. Lookups by wikidata_id first, then falls back to name + country_code. Called from Nominatim, Overpass, and Wikidata endpoints. Gated by has_kiwix_wiki feature flag. Co-Authored-By: Claude Opus 4.5 --- lib/place_detail.py | 85 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/lib/place_detail.py b/lib/place_detail.py index e85ee54..e2515b5 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -286,6 +286,88 @@ def _apply_google_data(result, google_data, gaps): _WIKI_TAGS = ('wikipedia', 'wikidata', 'wikivoyage', 'appropedia') + +# ── Wiki Index enrichment ─────────────────────────────────────────────── + +_wiki_index_conn = None + +def _get_wiki_index_db(): + global _wiki_index_conn + if _wiki_index_conn is not None: + return _wiki_index_conn + + db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "wiki_index.db") + if not os.path.exists(db_path): + logger.debug(f"wiki_index.db not found at {db_path}") + return None + + _wiki_index_conn = sqlite3.connect(db_path, check_same_thread=False) + _wiki_index_conn.row_factory = sqlite3.Row + logger.info(f"Wiki index DB ready at {db_path}") + return _wiki_index_conn + + +def _enrich_with_wiki_index(result): + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get("features", {}) + if not features.get("has_kiwix_wiki", False): + return result + except Exception: + return result + + db = _get_wiki_index_db() + if not db: + return result + + try: + cur = db.cursor() + row = None + + extratags = result.get("extratags", {}) + wikidata_id = result.get("wikidata_id") or extratags.get("wikidata") + if wikidata_id: + if isinstance(wikidata_id, str) and wikidata_id.startswith("http"): + wikidata_id = wikidata_id.split("/")[-1] + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE wikidata_id = ?", + (wikidata_id,) + ) + row = cur.fetchone() + + if not row: + name = result.get("name") + address = result.get("address") or {} + country_code = address.get("country_code") or result.get("country_code") + if name and country_code: + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE place_name = ? AND country_code = ? LIMIT 1", + (name, country_code.lower()) + ) + row = cur.fetchone() + + if row: + if row["summary"]: + result["wiki_summary"] = row["summary"] + if row["wiki_population"]: + try: + result["wiki_population"] = int(row["wiki_population"]) + except (ValueError, TypeError): + result["wiki_population"] = row["wiki_population"] + if row["wikipedia_title"]: + title = row["wikipedia_title"].replace(" ", "_") + result["wiki_url"] = f"https://en.wikipedia.org/wiki/{title}" + if row["wikivoyage_title"]: + title = row["wikivoyage_title"].replace(" ", "_") + result["wikivoyage_url"] = f"https://en.wikivoyage.org/wiki/{title}" + logger.debug(f"Wiki index enrichment hit for {result.get(name)}") + + except Exception as e: + logger.debug(f"Wiki index enrichment error: {e}") + + return result + def _enrich_wiki_links(result): """ Rewrite wiki-related extratags to local Kiwix URLs where available. @@ -625,6 +707,7 @@ def get_place_detail(osm_type, osm_id): nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_wiki_links(nominatim_result) + nominatim_result = _enrich_with_wiki_index(nominatim_result) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -658,6 +741,7 @@ def get_place_detail(osm_type, osm_id): overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) overpass_result = _enrich_wiki_links(overpass_result) + overpass_result = _enrich_with_wiki_index(overpass_result) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200 @@ -809,6 +893,7 @@ def get_place_by_wikidata(wikidata_id): result["boundary"] = boundary + result = _enrich_with_wiki_index(result) logger.debug(f"Wikidata hit: {wikidata_id} -> {name}") return result, 200 From dcd4ddd3581cd466c48f886c7aa59b24993c9410 Mon Sep 17 00:00:00 2001 From: malice Date: Thu, 21 May 2026 16:07:54 -0600 Subject: [PATCH 54/72] Migrate TomTom flow proxy from classic to Orbis Maps API --- lib/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/api.py b/lib/api.py index 949a0cc..d56c519 100644 --- a/lib/api.py +++ b/lib/api.py @@ -1215,7 +1215,8 @@ def api_traffic_flow(z, x, y): key = os.environ.get('TOMTOM_API_KEY') if not key: return 'Traffic service not configured', 503 - url = f'https://api.tomtom.com/traffic/map/4/tile/flow/relative/{z}/{x}/{y}.png?key={key}' + # Orbis Maps Traffic API (migrated from classic) + url = f'https://api.tomtom.com/maps/orbis/traffic/tile/flow/{z}/{x}/{y}.png?key={key}&apiVersion=1&style=light' try: resp = http_requests.get(url, timeout=10) if resp.status_code != 200: From 75664c7d02c5b11e3fe7f2a022be0b118df1aa0c Mon Sep 17 00:00:00 2001 From: malice Date: Fri, 22 May 2026 01:12:20 -0600 Subject: [PATCH 55/72] recon: remove /api/traffic/flow handler (now served by navi-traffic, extraction #1) The /api/traffic/flow///.png handler is dead code in recon. As of extraction #1 of the recon<->Navi decoupling, this path is served by the standalone navi-traffic service. Live request flow is now: Caddy (CT 101, navi.echo6.co @authed_api, forward_auth) -> nginx :8440 (location ^~ /api/traffic/ -> proxy_cache traffic_cache) -> navi-traffic gunicorn :8421 (services/navi_traffic) Cutover verified live: authenticated browser fetch to https://navi.echo6.co/api/traffic/flow/... returns 200 image/png with X-Cache-Status MISS then HIT (120s cache), Server: gunicorn. navi-backend (github.com/zvx-echo6/navi-backend): - dae54f3 Initial scaffold: navi-backend + navi-traffic - 311cb8f nginx: use ^~ prefix on /api/traffic/ to beat .png regex catch-all Caddy cutover (@authed_api upstream 8420 -> nginx 8440) applied on Utility CT 101. Also drops the now-unused make_response flask import (no other uses in lib/api.py). os and http_requests remain (used elsewhere). Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/api.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/lib/api.py b/lib/api.py index d56c519..7aae00f 100644 --- a/lib/api.py +++ b/lib/api.py @@ -17,7 +17,7 @@ import shutil import tempfile import requests as http_requests -from flask import Flask, request, jsonify, redirect, render_template, make_response +from flask import Flask, request, jsonify, redirect, render_template from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue from werkzeug.utils import secure_filename @@ -1208,27 +1208,6 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) - -@app.route('/api/traffic/flow///.png') -def api_traffic_flow(z, x, y): - """Proxy TomTom traffic flow tiles to hide API key from frontend.""" - key = os.environ.get('TOMTOM_API_KEY') - if not key: - return 'Traffic service not configured', 503 - # Orbis Maps Traffic API (migrated from classic) - url = f'https://api.tomtom.com/maps/orbis/traffic/tile/flow/{z}/{x}/{y}.png?key={key}&apiVersion=1&style=light' - try: - resp = http_requests.get(url, timeout=10) - if resp.status_code != 200: - return 'Upstream error', 502 - r = make_response(resp.content) - r.headers['Content-Type'] = 'image/png' - r.headers['Cache-Control'] = 'public, max-age=120' - return r - except Exception: - return 'Upstream timeout', 504 - - @app.route('/api/place//') def api_place_detail(osm_type, osm_id): """Proxy place details from local Nominatim or Overpass API.""" From bb220b7ba3d890aecc100a2540f3d143c5eb2dc5 Mon Sep 17 00:00:00 2001 From: malice Date: Fri, 22 May 2026 08:10:33 -0600 Subject: [PATCH 56/72] recon: add auth.login_url/logout_url to deployment profiles (extraction #2) Additive prep for the Navi Panel.jsx login/logout cutover. Adds an `auth` block (login_url, logout_url) to each deployment profile, placed after the existing `services` block: - home.yaml login=/outpost.goauthentik.io/start?rd=%2F logout=auth.echo6.co invalidation flow, next=navi.echo6.co - minimal_pi.yaml same, with TODO(matt) to confirm logout next= host - regional_pi.yaml same, with TODO(matt) to confirm logout next= host No Python change. /api/config returns the whole profile dict, so these keys flow through automatically; existing consumers ignore unknown keys, making this backward-safe (the frontend fallback path is simply never needed once this is live). Next steps (separate PRs): the navi-config service (:8422) mirroring this handler, and the Panel.jsx fix to read cfg.auth.login_url/logout_url with the current literals as fallback. Co-Authored-By: Claude Opus 4.7 (1M context) --- config/profiles/home.yaml | 4 ++++ config/profiles/minimal_pi.yaml | 5 +++++ config/profiles/regional_pi.yaml | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 474ffb2..de704d9 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -31,6 +31,10 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" +auth: + login_url: "/outpost.goauthentik.io/start?rd=%2F" + logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" + features: has_nominatim_details: true has_kiwix_wiki: true diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index e3ae0fd..c2fd90a 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -26,6 +26,11 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" +# TODO(matt): confirm logout next= host for this profile +auth: + login_url: "/outpost.goauthentik.io/start?rd=%2F" + logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" + features: has_nominatim_details: false has_kiwix_wiki: false diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 8e70cd6..b6f2cad 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -31,6 +31,11 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" +# TODO(matt): confirm logout next= host for this profile +auth: + login_url: "/outpost.goauthentik.io/start?rd=%2F" + logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" + features: has_nominatim_details: true has_kiwix_wiki: false From f42b1fef3ba07bd5a7f3fa4b63adeb8c8ec38180 Mon Sep 17 00:00:00 2001 From: malice Date: Fri, 22 May 2026 13:23:08 -0600 Subject: [PATCH 57/72] recon: add /api/wiki-enrich endpoint (extraction #5 prep, additive) (#8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HTTP wrapper over the wiki_index lookup so the (future) navi-places service can fetch wiki enrichment over HTTP instead of reading recon's 2.1 GB data/wiki_index.db directly (Phase A option B — HTTP coupling). GET /api/wiki-enrich?wikidata= (primary key) GET /api/wiki-enrich?name=&country= (fallback key) -> 200 {wiki_summary?, wiki_population?, wiki_url?, wikivoyage_url?} -> 400 if no usable key; 404 on no match. Public (no auth, like /api/place/*). Route keys are wikidata_id / name+country — NOT osm_type/osm_id — because that is how wiki_index is actually queried (the in-process _enrich_with_wiki_index looks up by result['wikidata_id'] then name+country_code, never by OSM id; see extraction-5-wiki-enrich-investigation.md). An osm-keyed route would have forced a redundant in-recon place lookup. Changes (additive only): - lib/place_detail.py: new standalone lookup_wiki_index(wikidata_id, name, country_code) doing the same two SELECTs + field/URL mapping as the in-process path, returning a dict or None. Pure DB read, never raises. `_enrich_with_wiki_index` is LEFT UNTOUCHED — it can be DRY-refactored to delegate to this in a later PR; the in-process enrichment path is unchanged. - lib/wiki_enrich_api.py: new wiki_enrich_bp blueprint with the route. - lib/api.py: register the blueprint (one block). - lib/wiki_enrich_api_test.py: 4 tests (hit-by-wikidata + decoded fields, no-match -> 404, name+country fallback, no-key -> 400) over an in-memory fixture DB; plain-assert style + __main__ runner (recon venv has no pytest). Verified green against recon's venv (flask 3.1.2). Does NOT remove the in-process _enrich_with_wiki_index call from place_detail — that happens in a later PR once navi-places is live and serving. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 4 ++ lib/place_detail.py | 64 ++++++++++++++++++++++++++++++ lib/wiki_enrich_api.py | 31 +++++++++++++++ lib/wiki_enrich_api_test.py | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 lib/wiki_enrich_api.py create mode 100644 lib/wiki_enrich_api_test.py diff --git a/lib/api.py b/lib/api.py index 7aae00f..c9f991e 100644 --- a/lib/api.py +++ b/lib/api.py @@ -73,6 +73,10 @@ from .netsyms_api import netsyms_bp, geocode_bp app.register_blueprint(netsyms_bp) app.register_blueprint(geocode_bp) +# ── Wiki-enrich Blueprint (extraction #5 prep — HTTP wrapper over wiki_index) ── +from .wiki_enrich_api import wiki_enrich_bp +app.register_blueprint(wiki_enrich_bp) + # ── Navigation Constants ── diff --git a/lib/place_detail.py b/lib/place_detail.py index e2515b5..46aa8b0 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -307,6 +307,70 @@ def _get_wiki_index_db(): return _wiki_index_conn +def lookup_wiki_index(wikidata_id=None, name=None, country_code=None): + """Standalone wiki_index lookup, extracted for the /api/wiki-enrich endpoint + (extraction #5: navi-places HTTP-fetches wiki enrichment instead of reading + the 2.1 GB wiki_index.db directly). + + Mirrors the lookup that `_enrich_with_wiki_index` performs in-process: + by wikidata_id first, then a name + country_code fallback. Returns a dict of + wiki enrichment fields (only those present), or None if there is no match or + the wiki_index DB is unavailable. Pure DB read — no feature-flag gating + (callers decide whether to call) and never raises. + + NOTE: additive only — `_enrich_with_wiki_index` is intentionally left + untouched here; it can be DRY-refactored to delegate to this in a later PR. + """ + db = _get_wiki_index_db() + if not db: + return None + + try: + cur = db.cursor() + row = None + + if wikidata_id: + wid = wikidata_id + if isinstance(wid, str) and wid.startswith("http"): + wid = wid.split("/")[-1] + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE wikidata_id = ?", + (wid,) + ) + row = cur.fetchone() + + if not row and name and country_code: + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE place_name = ? AND country_code = ? LIMIT 1", + (name, country_code.lower()) + ) + row = cur.fetchone() + + if not row: + return None + + out = {} + if row["summary"]: + out["wiki_summary"] = row["summary"] + if row["wiki_population"]: + try: + out["wiki_population"] = int(row["wiki_population"]) + except (ValueError, TypeError): + out["wiki_population"] = row["wiki_population"] + if row["wikipedia_title"]: + title = row["wikipedia_title"].replace(" ", "_") + out["wiki_url"] = f"https://en.wikipedia.org/wiki/{title}" + if row["wikivoyage_title"]: + title = row["wikivoyage_title"].replace(" ", "_") + out["wikivoyage_url"] = f"https://en.wikivoyage.org/wiki/{title}" + + return out or None + + except Exception as e: + logger.debug(f"wiki_index lookup error: {e}") + return None + + def _enrich_with_wiki_index(result): try: from .deployment_config import get_deployment_config diff --git a/lib/wiki_enrich_api.py b/lib/wiki_enrich_api.py new file mode 100644 index 0000000..ff0f9c7 --- /dev/null +++ b/lib/wiki_enrich_api.py @@ -0,0 +1,31 @@ +"""Wiki-enrich API — read-only HTTP wrapper over the wiki_index lookup. + +Extraction #5 prep: lets the (future) navi-places service fetch wiki enrichment +over HTTP instead of reading recon's 2.1 GB data/wiki_index.db directly. Additive +only — does not change place_detail's in-process `_enrich_with_wiki_index` path. + + GET /api/wiki-enrich?wikidata= (primary key) + GET /api/wiki-enrich?name=&country= (fallback key) + +Public (no auth), matching /api/place/*. 400 if no usable key; 404 on no match. +""" +from flask import Blueprint, request, jsonify + +from .place_detail import lookup_wiki_index + +wiki_enrich_bp = Blueprint('wiki_enrich', __name__) + + +@wiki_enrich_bp.route('/api/wiki-enrich') +def api_wiki_enrich(): + wikidata = (request.args.get('wikidata') or '').strip() or None + name = (request.args.get('name') or '').strip() or None + country = (request.args.get('country') or '').strip() or None + + if not wikidata and not (name and country): + return jsonify({'error': 'provide ?wikidata= or ?name=&country='}), 400 + + result = lookup_wiki_index(wikidata_id=wikidata, name=name, country_code=country) + if result is None: + return jsonify({'error': 'no wiki match'}), 404 + return jsonify(result) diff --git a/lib/wiki_enrich_api_test.py b/lib/wiki_enrich_api_test.py new file mode 100644 index 0000000..681e5cb --- /dev/null +++ b/lib/wiki_enrich_api_test.py @@ -0,0 +1,77 @@ +"""Tests for the /api/wiki-enrich endpoint (extraction #5 prep). + +Plain-assert style (matching the other lib *_test.py; recon's venv has no +pytest). Builds a minimal Flask app with only wiki_enrich_bp registered (avoids +importing the full recon app) and points place_detail's lazy wiki_index +connection at an in-memory fixture DB. Run with pytest, or directly: + python -m lib.wiki_enrich_api_test +""" +import sqlite3 + +from flask import Flask + +from lib import place_detail +from lib.wiki_enrich_api import wiki_enrich_bp + + +def _client(): + """Fresh in-memory wiki_index fixture + a minimal app with just the route.""" + conn = sqlite3.connect(":memory:", check_same_thread=False) + conn.row_factory = sqlite3.Row + conn.execute( + "CREATE TABLE wiki_places (wikidata_id TEXT, place_name TEXT, country_code TEXT, " + "summary TEXT, wiki_population TEXT, wikipedia_title TEXT, wikivoyage_title TEXT)" + ) + conn.execute( + "INSERT INTO wiki_places VALUES (?,?,?,?,?,?,?)", + ("Q830149", "Filer", "us", "A city in Idaho.", "2508", "Filer, Idaho", "Filer"), + ) + conn.commit() + # Point the lazy module-level connection at the fixture so + # _get_wiki_index_db()/lookup_wiki_index() use it (bypasses the file path). + place_detail._wiki_index_conn = conn + app = Flask(__name__) + app.register_blueprint(wiki_enrich_bp) + return app.test_client() + + +def test_wiki_enrich_hit_by_wikidata(): + resp = _client().get("/api/wiki-enrich?wikidata=Q830149") + assert resp.status_code == 200, resp.status_code + d = resp.get_json() + assert d["wiki_summary"] == "A city in Idaho." + assert d["wiki_population"] == 2508 # cast to int + assert d["wiki_url"] == "https://en.wikipedia.org/wiki/Filer,_Idaho" + assert d["wikivoyage_url"] == "https://en.wikivoyage.org/wiki/Filer" + + +def test_wiki_enrich_no_match_404(): + resp = _client().get("/api/wiki-enrich?wikidata=Q9999999") + assert resp.status_code == 404, resp.status_code + + +def test_wiki_enrich_name_country_fallback(): + resp = _client().get("/api/wiki-enrich?name=Filer&country=US") + assert resp.status_code == 200, resp.status_code + assert resp.get_json()["wiki_summary"] == "A city in Idaho." + + +def test_wiki_enrich_no_key_400(): + c = _client() + assert c.get("/api/wiki-enrich").status_code == 400 + # name without country is not a usable key + assert c.get("/api/wiki-enrich?name=Filer").status_code == 400 + + +if __name__ == "__main__": + failures = 0 + for _name, _fn in sorted(globals().items()): + if _name.startswith("test_") and callable(_fn): + try: + _fn() + print(f"PASS {_name}") + except Exception as exc: # noqa: BLE001 + failures += 1 + print(f"FAIL {_name}: {exc!r}") + print("OK" if failures == 0 else f"{failures} FAILED") + raise SystemExit(1 if failures else 0) From 14ad2cd34ac6a53587103f44949bec0f052bb957 Mon Sep 17 00:00:00 2001 From: malice Date: Fri, 22 May 2026 14:08:18 -0600 Subject: [PATCH 58/72] recon: add /api/wiki-rewrite endpoint (extraction #5 prep, additive) (#9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-tag HTTP wrapper over wiki_rewrite.rewrite_wiki_link so the (future) navi-places service can rewrite OSM wiki tags to local Kiwix URLs over HTTP instead of importing recon's wiki_rewrite module (which talks to Kiwix on localhost:8430 and the wiki_cache table in /opt/recon/data/place_cache.db). Companion to PR #8 (/api/wiki-enrich) — Matt picked option B (HTTP-couple the Kiwix offline-wiki rewriting too, since it matters in prod). GET /api/wiki-rewrite?tag=&value= -> 200 {url, status} where status is "local" | "public" | "original" -> 400 on missing value or unknown tag -> no 404 (unclassifiable value echoes back with status "original", mirroring rewrite_wiki_link) Public (no auth), like /api/place/* and /api/wiki-enrich. Changes (additive only): - lib/wiki_rewrite_api.py: new wiki_rewrite_bp blueprint. Thin route directly over the existing rewrite_wiki_link(tag, value) — no extraction needed (it's already a clean standalone function, unlike wiki-enrich's lookup). - lib/api.py: register the blueprint (one block). - lib/wiki_rewrite_api_test.py: 5 tests (local Kiwix hit, public fallback, unclassifiable -> original, missing value -> 400, unknown tag -> 400), stubbing check_kiwix_has_article (no Kiwix/DB), plain-assert + __main__ runner. Verified green against recon's venv (flask 3.1.2). Does NOT touch place_detail's in-process _enrich_wiki_links — that gets removed in a later PR once navi-places is live (same as PR #8). wiki_cache stays in recon's own place_cache.db post-cutover (harmless positive-cache duplication). Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 4 ++ lib/wiki_rewrite_api.py | 34 +++++++++++++++++ lib/wiki_rewrite_api_test.py | 73 ++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 lib/wiki_rewrite_api.py create mode 100644 lib/wiki_rewrite_api_test.py diff --git a/lib/api.py b/lib/api.py index c9f991e..a245706 100644 --- a/lib/api.py +++ b/lib/api.py @@ -77,6 +77,10 @@ app.register_blueprint(geocode_bp) from .wiki_enrich_api import wiki_enrich_bp app.register_blueprint(wiki_enrich_bp) +# ── Wiki-rewrite Blueprint (extraction #5 prep — HTTP wrapper over rewrite_wiki_link) ── +from .wiki_rewrite_api import wiki_rewrite_bp +app.register_blueprint(wiki_rewrite_bp) + # ── Navigation Constants ── diff --git a/lib/wiki_rewrite_api.py b/lib/wiki_rewrite_api.py new file mode 100644 index 0000000..ae1d52e --- /dev/null +++ b/lib/wiki_rewrite_api.py @@ -0,0 +1,34 @@ +"""Wiki-rewrite API — read-only HTTP wrapper over wiki_rewrite.rewrite_wiki_link. + +Extraction #5 prep: lets the (future) navi-places service rewrite OSM wiki tags +to local Kiwix URLs over HTTP instead of importing recon's wiki_rewrite module +(which talks to Kiwix and the wiki_cache table in /opt/recon/data/place_cache.db). +Additive only — does not change place_detail's in-process `_enrich_wiki_links`. + + GET /api/wiki-rewrite?tag=&value= + +Public (no auth), matching /api/place/* and /api/wiki-enrich. 400 on missing +value or unknown tag. No 404 — an unclassifiable value returns the original +value with status "original" (mirrors rewrite_wiki_link). +""" +from flask import Blueprint, request, jsonify + +from .wiki_rewrite import rewrite_wiki_link + +wiki_rewrite_bp = Blueprint('wiki_rewrite', __name__) + +_KNOWN_TAGS = {'wikipedia', 'wikidata', 'wikivoyage', 'appropedia'} + + +@wiki_rewrite_bp.route('/api/wiki-rewrite') +def api_wiki_rewrite(): + tag = (request.args.get('tag') or '').strip().lower() + value = (request.args.get('value') or '').strip() + + if not value: + return jsonify({'error': 'value is required'}), 400 + if tag not in _KNOWN_TAGS: + return jsonify({'error': f"tag must be one of {sorted(_KNOWN_TAGS)}"}), 400 + + url, status = rewrite_wiki_link(tag, value) + return jsonify({'url': url, 'status': status}) diff --git a/lib/wiki_rewrite_api_test.py b/lib/wiki_rewrite_api_test.py new file mode 100644 index 0000000..2bc50f4 --- /dev/null +++ b/lib/wiki_rewrite_api_test.py @@ -0,0 +1,73 @@ +"""Tests for the /api/wiki-rewrite endpoint (extraction #5 prep). + +Plain-assert style (recon's venv has no pytest). Builds a minimal Flask app +with only wiki_rewrite_bp registered. Mocks `wiki_rewrite.check_kiwix_has_article` +to control the local-Kiwix-hit vs. fallback paths without touching Kiwix or the +wiki_cache DB. classify_wiki_link (pure regex) runs for real. Run with pytest, +or directly: python -m lib.wiki_rewrite_api_test +""" +from flask import Flask + +from lib import wiki_rewrite +from lib.wiki_rewrite_api import wiki_rewrite_bp + + +def _client(kiwix_hit): + """kiwix_hit: (found_bool, url) returned by a stubbed check_kiwix_has_article.""" + wiki_rewrite.check_kiwix_has_article = lambda source_type, article_id: kiwix_hit + app = Flask(__name__) + app.register_blueprint(wiki_rewrite_bp) + return app.test_client() + + +def test_local_kiwix_hit(): + url = "https://wiki.echo6.co/content/wikipedia/Filer,_Idaho" + c = _client((True, url)) + resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer, Idaho") + assert resp.status_code == 200, resp.status_code + d = resp.get_json() + assert d["status"] == "local" + assert d["url"] == url + + +def test_public_fallback_when_not_in_kiwix(): + c = _client((False, None)) # not in Kiwix -> canonical public URL + resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer") + assert resp.status_code == 200, resp.status_code + d = resp.get_json() + assert d["status"] == "public" + assert d["url"] == "https://en.wikipedia.org/wiki/Filer" + + +def test_unclassifiable_returns_original(): + # 'wikidata' requires a Q-id; a non-matching value -> classify None -> original. + c = _client((False, None)) + resp = c.get("/api/wiki-rewrite?tag=wikidata&value=not-a-qid") + assert resp.status_code == 200, resp.status_code + d = resp.get_json() + assert d["status"] == "original" + assert d["url"] == "not-a-qid" + + +def test_missing_value_400(): + c = _client((False, None)) + assert c.get("/api/wiki-rewrite?tag=wikipedia").status_code == 400 + + +def test_unknown_tag_400(): + c = _client((False, None)) + assert c.get("/api/wiki-rewrite?tag=facebook&value=x").status_code == 400 + + +if __name__ == "__main__": + failures = 0 + for _name, _fn in sorted(globals().items()): + if _name.startswith("test_") and callable(_fn): + try: + _fn() + print(f"PASS {_name}") + except Exception as exc: # noqa: BLE001 + failures += 1 + print(f"FAIL {_name}: {exc!r}") + print("OK" if failures == 0 else f"{failures} FAILED") + raise SystemExit(1 if failures else 0) From ed36eec85e6f68265695a6516248aac77581c65e Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 00:08:48 -0600 Subject: [PATCH 59/72] cleanup: remove /api/config handler (extraction #2 shadow) (#10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cleanup: remove /api/config handler (extraction #2 shadow) recon's /api/config Flask handler (lib/api.py) is edge-shadowed since extraction #2 — navi-config (:8422) serves the route via nginx on navi.echo6.co. The recon-side handler is dead at the edge; remove it. lib/deployment_config.py is KEPT: get_deployment_config() still has many in-process consumers (lib/api.py:1237 /api/landclass has_landclass gate, google_places.py, place_detail.py x4, offroute/router.py). Only the /api/config HTTP handler is removed; the import at api.py:27 stays. Co-Authored-By: Claude Opus 4.7 (1M context) * cleanup: refresh deployment_config docstring (drop /api/config reference) The module docstring still said get_deployment_config() was "for use by the /api/config endpoint" — that handler was removed in the parent commit. Rewrite to reflect the actual 5 in-process consumers (landclass gate, google_places, place_detail ×4, offroute/router.py profile.offroute.*). Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: zvx-echo6 Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 9 --------- lib/deployment_config.py | 10 +++++++++- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/api.py b/lib/api.py index a245706..1fbde76 100644 --- a/lib/api.py +++ b/lib/api.py @@ -1263,15 +1263,6 @@ def api_landclass(): }) -@app.route('/api/config') -def api_config(): - """Return deployment profile config for frontend consumption.""" - config = get_deployment_config() - resp = jsonify(config) - resp.headers['Cache-Control'] = 'public, max-age=300' - return resp - - @app.route('/api/health') def api_health(): """Health check endpoint for monitoring.""" diff --git a/lib/deployment_config.py b/lib/deployment_config.py index 978b8a0..83cc864 100644 --- a/lib/deployment_config.py +++ b/lib/deployment_config.py @@ -3,7 +3,15 @@ Deployment profile loader. Reads RECON_PROFILE env var (default: "home"), loads the matching YAML from config/profiles/.yaml, and caches the parsed dict in memory. -Provides get_deployment_config() for use by the /api/config endpoint. + +Provides get_deployment_config() for in-process consumers of the profile: + - lib/api.py:api_landclass — the has_landclass feature-flag gate + - lib/google_places.py — Google Places enrichment config + - lib/place_detail.py — place-detail enrichment config (×4 call sites) + - lib/offroute/router.py — profile.offroute.* (osm_pbf_path / postgis_dsn / + densify_interval_m) +(The former /api/config HTTP endpoint that served this dict to the frontend was +removed once navi-config (:8422) took over that route.) """ import os import yaml From c968497b943ec5065139f93b5838d4906b87f605 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 00:27:44 -0600 Subject: [PATCH 60/72] cleanup: remove /api/place handlers (extraction #5 shadow) (#11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /api/place// and /api/place/wikidata/ are edge-shadowed since extraction #5 — navi-places (:8425) serves both via nginx. Removes the two recon-side handlers + the now-unused `from .place_detail import get_place_detail, get_place_by_wikidata` import. NO modules deleted. place_detail.py is KEPT — wiki_enrich_api.py (the /api/wiki-enrich endpoint, which stays; navi-places HTTP-consumes it) imports `lookup_wiki_index` from it. That transitively keeps its deps google_places.py, overture.py, osm_categories.py (all imported only by place_detail). This corrects Phase A #5 §3's "only lib/api.py imports place_detail" — the wiki-enrich endpoint (added post-#5) is a second consumer. Co-authored-by: zvx-echo6 Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/lib/api.py b/lib/api.py index 1fbde76..d14068c 100644 --- a/lib/api.py +++ b/lib/api.py @@ -25,7 +25,6 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB from .deployment_config import get_deployment_config -from .place_detail import get_place_detail, get_place_by_wikidata from .landclass import lookup_landclass, format_summary logger = setup_logging('recon.api') @@ -1216,21 +1215,6 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) -@app.route('/api/place//') -def api_place_detail(osm_type, osm_id): - """Proxy place details from local Nominatim or Overpass API.""" - result, status = get_place_detail(osm_type, osm_id) - return jsonify(result), status - - -@app.route("/api/place/wikidata/") -def api_place_wikidata(wikidata_id): - """Fetch place details from Wikidata entity.""" - result, status = get_place_by_wikidata(wikidata_id) - return jsonify(result), status - - - @app.route('/api/landclass') def api_landclass(): """PAD-US land classification lookup for a point.""" From d56b1d5f922e5d54ba3c95794833bc05da9e4096 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 03:34:22 -0600 Subject: [PATCH 61/72] cleanup: remove /api/contacts + /api/address_book handlers + pull entire /nav-i/* subtree (extraction #3 shadow) (#12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cleanup: remove /api/address_book handlers (extraction #3 shadow) Removes address_book_bp (lib/address_book_api.py: /api/address_book/lookup + /api/address_book/list) + its registration in lib/api.py. Edge-shadowed since extraction #3 — navi-contacts (:8423) serves /api/address_book/* on navi.echo6.co; no recon-side consumer (no template/JS reference). lib/address_book.py is KEPT — geocode.py (nickname short-circuit + annotation) and netsyms_api.py import it. NOT removed this PR: contacts_bp. The recon dashboard at /deleted-contacts (recon-product, stays) calls /api/contacts//{restore,restore-as,purge} via XHR, and recon.echo6.co proxies straight to recon:8420 (verified the Caddy block — no navi-contacts routing there). Removing contacts_bp would break those dashboard actions. Flagged for a decision; lib/contacts.py also stays (dashboard ContactsDB reads). See PR body. Co-Authored-By: Claude Opus 4.7 (1M context) * cleanup: deprecate /nav-i + /deleted-contacts; remove contacts_bp + lib/contacts.py Probe found recon's /deleted-contacts dashboard reads /opt/recon/data/contacts.db — frozen since extraction #3 moved write ownership to navi-contacts (/var/lib/navi-backend/contacts.db). The page has been silently rendering ~25-day stale data, and its restore/restore-as/purge XHRs hit recon's contacts_bp (the recon.echo6.co Caddy block proxies straight to recon:8420 — no navi-contacts routing there). Per Matt's decision, deprecate the pages entirely; they'll be re-surfaced later as a proper admin page consuming navi-contacts via API. Removed: - contacts_bp (lib/contacts_api.py, all 10 /api/contacts* routes) + its registration in lib/api.py — edge-shadowed by navi-contacts :8423 since #3, and now free of recon-product consumers once the dashboard goes. - /nav-i (navi_landing_page) + /deleted-contacts (deleted_contacts_page) route handlers; templates/navi/landing.html + templates/navi/deleted_contacts.html. - lib/contacts.py (ContactsDB) — the dashboard was its only non-contacts_bp consumer; both gone. - The two dead NAVI_SUBNAV entries (Overview→/nav-i, Deleted Contacts→ /deleted-contacts). Kept / adapted: - /nav-i/api-keys page (recon-product key management) stays. NAVI_SUBNAV reduced to just its API Keys entry; the base.html top-nav "Nav-I" link repointed /nav-i -> /nav-i/api-keys so the surviving section page stays reachable (minimal href change, not a nav restructure — flagged in PR). - lib/address_book.py — geocode.py + netsyms_api.py still consume it (untouched). Out-of-band follow-up after merge: delete the stale /opt/recon/data/contacts.db (frozen 2026-04-28; data, not code). Co-Authored-By: Claude Opus 4.7 (1M context) * cleanup: pull the entire /nav-i/* subtree (api-keys page is a weaker dup of /settings/keys) Completes the contacts cleanup by removing the rest of /nav-i/. The /nav-i/api-keys page was (a) a weaker duplicate of /settings/keys for Gemini (it lacked remove + reload-from-.env), and (b) a write-only-to-dead-files surface for TomTom + Google Places: it wrote /opt/recon/.env, but the live navi-traffic (:8421) and navi-places (:8425) services read their own /etc/navi-backend/.env and have ignored recon's copy since extractions #1 + #5. End state: no /nav-i/* URLs in recon. Removed: - /nav-i/api-keys route + template (templates/navi/api_keys.html) - all /api/nav-i/api-keys/* endpoints (list/update/test/restart-recon) - lib/api_keys_admin.py (its only importers were those 4 endpoints; _KEY_DEFS/ _read_env/_write_env were private to it) - the now-orphaned NAVI_SUBNAV - the "Nav-I" top-nav entry in base.html (reverses the /nav-i->/nav-i/api-keys repoint from the previous commit, now that the page itself is gone) Kept (Gemini's real home, recon-product): - /settings/keys + /api/keys/* + lib/key_manager.py (KeyManager) — they import key_manager directly, never api_keys_admin, so untouched. Note: TOMTOM_API_KEY now has zero recon .py references. GOOGLE_PLACES_API_KEY still has one (lib/google_places.py), kept in the prior /api/place cleanup as place_detail's dep; its only caller (_enrich_with_google) is unreachable since the /api/place handlers were removed — left in place pending /api/wiki-enrich retirement (out of scope here). Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: zvx-echo6 Co-authored-by: Claude Opus 4.7 (1M context) --- lib/address_book_api.py | 31 --- lib/api.py | 98 -------- lib/api_keys_admin.py | 358 --------------------------- lib/contacts.py | 230 ----------------- lib/contacts_api.py | 132 ---------- templates/base.html | 1 - templates/navi/api_keys.html | 269 -------------------- templates/navi/deleted_contacts.html | 116 --------- templates/navi/landing.html | 22 -- 9 files changed, 1257 deletions(-) delete mode 100644 lib/address_book_api.py delete mode 100644 lib/api_keys_admin.py delete mode 100644 lib/contacts.py delete mode 100644 lib/contacts_api.py delete mode 100644 templates/navi/api_keys.html delete mode 100644 templates/navi/deleted_contacts.html delete mode 100644 templates/navi/landing.html diff --git a/lib/address_book_api.py b/lib/address_book_api.py deleted file mode 100644 index 020828b..0000000 --- a/lib/address_book_api.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -RECON Address Book API — Flask Blueprint. - -GET /api/address_book/lookup?q= — best match or 404 -GET /api/address_book/list — all entries -""" - -from flask import Blueprint, request, jsonify - -from . import address_book - -address_book_bp = Blueprint('address_book', __name__) - - -@address_book_bp.route('/api/address_book/lookup') -def api_address_book_lookup(): - q = request.args.get('q', '').strip() - if not q: - return jsonify({'error': 'Missing q parameter'}), 400 - - result = address_book.lookup(q) - if result is None: - return '', 404 - - return jsonify(result) - - -@address_book_bp.route('/api/address_book/list') -def api_address_book_list(): - entries = address_book.list_all() - return jsonify(entries) diff --git a/lib/api.py b/lib/api.py index d14068c..61f25f2 100644 --- a/lib/api.py +++ b/lib/api.py @@ -59,14 +59,6 @@ class _LargeZimRequest(_FlaskRequest): return super()._get_file_stream(total_content_length, content_type, filename, content_length) app.request_class = _LargeZimRequest -# ── Address Book Blueprint ── -from .address_book_api import address_book_bp -app.register_blueprint(address_book_bp) - -# ── Contacts Blueprint ── -from .contacts_api import contacts_bp -app.register_blueprint(contacts_bp) - # ── Netsyms + Geocode Blueprints ── from .netsyms_api import netsyms_bp, geocode_bp app.register_blueprint(netsyms_bp) @@ -109,12 +101,6 @@ SETTINGS_SUBNAV = [ {'href': '/settings/health', 'label': 'Service Health'}, ] -NAVI_SUBNAV = [ - {'href': '/nav-i', 'label': 'Overview'}, - {'href': '/deleted-contacts', 'label': 'Deleted Contacts'}, - {'href': '/nav-i/api-keys', 'label': 'API Keys'}, -] - def _format_source_citation(payload): """Format a human-readable citation from a search result payload.""" @@ -341,36 +327,6 @@ def failures_page(): failures=failures) -@app.route("/deleted-contacts") -def deleted_contacts_page(): - from .auth import get_user_id - from .contacts import ContactsDB - user_id = get_user_id() or "anonymous" - db = ContactsDB() - contacts = db.list_deleted(user_id) - return render_template("navi/deleted_contacts.html", - domain="navi", subnav=NAVI_SUBNAV, active_page="/deleted-contacts", - contacts=contacts) - - -@app.route("/nav-i") -def navi_landing_page(): - from .auth import get_user_id - from .contacts import ContactsDB - user_id = get_user_id() or "anonymous" - db = ContactsDB() - deleted_count = len(db.list_deleted(user_id)) - return render_template("navi/landing.html", - domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i", - deleted_count=deleted_count) - - -@app.route("/nav-i/api-keys") -def navi_api_keys_page(): - return render_template("navi/api_keys.html", - domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i/api-keys") - - @app.route('/peertube') def peertube_dashboard(): return render_template('peertube/dashboard.html', @@ -1408,60 +1364,6 @@ def api_keys_reload(): -# ── Nav-I API Key Admin ── - -@app.route('/api/nav-i/api-keys/list', methods=['GET']) -def navi_api_keys_list(): - from .api_keys_admin import list_keys - return jsonify({'keys': list_keys()}) - - -@app.route('/api/nav-i/api-keys/update', methods=['POST']) -def navi_api_keys_update(): - from .auth import require_auth - from .api_keys_admin import update_key, update_gemini_key - data = request.get_json(force=True) - name = data.get('name', '') - new_value = data.get('new_value', '') - index = data.get('index') # optional, for Gemini key replacement - if not name or not new_value: - return jsonify({'error': 'name and new_value required'}), 400 - if name == 'GEMINI_KEY' and index is not None: - result = update_gemini_key(int(index), new_value) - else: - result = update_key(name, new_value) - if result.get('success'): - return jsonify(result) - return jsonify(result), 400 - - -@app.route('/api/nav-i/api-keys/test', methods=['POST']) -def navi_api_keys_test(): - from .api_keys_admin import test_key - data = request.get_json(force=True) - name = data.get('name', '') - index = data.get('index') # optional, for testing specific Gemini key - if not name: - return jsonify({'error': 'name required'}), 400 - result = test_key(name, index=int(index) if index is not None else None) - return jsonify(result) - - -@app.route('/api/nav-i/api-keys/restart-recon', methods=['POST']) -def navi_api_keys_restart(): - import subprocess - try: - result = subprocess.run( - ['sudo', 'systemctl', 'restart', 'recon'], - capture_output=True, text=True, timeout=30 - ) - if result.returncode == 0: - return jsonify({'success': True, 'note': 'RECON service restarted'}) - return jsonify({'success': False, 'error': result.stderr.strip()}), 500 - except subprocess.TimeoutExpired: - return jsonify({'success': False, 'error': 'Restart timed out'}), 500 - except Exception as e: - return jsonify({'success': False, 'error': str(e)}), 500 # ── YouTube Cookie Management ── diff --git a/lib/api_keys_admin.py b/lib/api_keys_admin.py deleted file mode 100644 index 3c63565..0000000 --- a/lib/api_keys_admin.py +++ /dev/null @@ -1,358 +0,0 @@ -""" -Nav-I API Keys Admin — unified view/update/test for third-party API keys. - -Manages three provider categories: - - Gemini (multiple keys via KeyManager singleton) - - TomTom (single key in .env) - - Google Places (single key in .env) - -All key values are masked in responses. Full values never leave the server -except as user-supplied input on update. -""" -import os -import re -import shutil -import tempfile -import time - -import requests as http_requests - -from .utils import setup_logging - -logger = setup_logging('recon.api_keys_admin') - -ENV_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env') - -# Key definitions: env_name → display metadata -_KEY_DEFS = { - 'TOMTOM_API_KEY': { - 'display_name': 'TomTom', - 'provider': 'tomtom', - }, - 'GOOGLE_PLACES_API_KEY': { - 'display_name': 'Google Places', - 'provider': 'google_places', - }, -} - - -# ── .env read/write helpers ───────────────────────────────────────────── - -def _read_env(): - """Read .env file into a dict of key=value pairs, preserving order.""" - entries = [] # list of (key, value, raw_line) — preserves order and comments - if not os.path.exists(ENV_PATH): - return entries - with open(ENV_PATH, 'r') as f: - for line in f: - raw = line.rstrip('\n') - stripped = raw.strip() - if not stripped or stripped.startswith('#'): - entries.append((None, None, raw)) - continue - m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)=(.*)$', stripped) - if m: - entries.append((m.group(1), m.group(2).strip().strip('"').strip("'"), raw)) - else: - entries.append((None, None, raw)) - return entries - - -def _write_env(entries): - """Atomically write .env from entries list. Backs up to .env.bak first.""" - # Backup current .env - if os.path.exists(ENV_PATH): - bak_path = ENV_PATH + '.bak' - shutil.copy2(ENV_PATH, bak_path) - - # Write to temp file, then rename (atomic on same filesystem) - fd, tmp_path = tempfile.mkstemp(dir=os.path.dirname(ENV_PATH), prefix='.env.', suffix='.tmp') - try: - with os.fdopen(fd, 'w') as f: - for key, value, raw in entries: - if key is not None: - f.write(f'{key}={value}\n') - else: - f.write(raw + '\n') - os.rename(tmp_path, ENV_PATH) - except Exception: - # Clean up temp file on failure - try: - os.unlink(tmp_path) - except OSError: - pass - raise - - logger.info(f"Wrote .env atomically ({len([e for e in entries if e[0]])} keys)") - - -def _get_env_value(name): - """Get a single value from .env by key name.""" - for key, value, _ in _read_env(): - if key == name: - return value - return None - - -def _set_env_value(name, new_value): - """Set a single value in .env. Adds if not present.""" - entries = _read_env() - found = False - for i, (key, value, raw) in enumerate(entries): - if key == name: - entries[i] = (name, new_value, f'{name}={new_value}') - found = True - break - if not found: - entries.append((name, new_value, f'{name}={new_value}')) - _write_env(entries) - - -# ── Masking ───────────────────────────────────────────────────────────── - -def _mask_key(value): - """Mask a key: first 4 chars + '...' + last 4 chars. Never return full value.""" - if not value: - return None - if len(value) <= 8: - return '****' - return value[:4] + '...' + value[-4:] - - -# ── List ──────────────────────────────────────────────────────────────── - -def list_keys(): - """ - Return masked status of all managed API keys. - - Returns list of dicts with: name, display_name, provider, masked_value, - is_set, count (for multi-key providers like Gemini). - """ - result = [] - env_mtime = None - if os.path.exists(ENV_PATH): - env_mtime = time.strftime('%Y-%m-%dT%H:%M:%SZ', - time.gmtime(os.path.getmtime(ENV_PATH))) - - # Gemini keys (via KeyManager) - from .key_manager import get_key_manager - km = get_key_manager() - gemini_keys = km.get_masked_keys() - gemini_count = len(gemini_keys) - # Show a single summary entry for Gemini with count - first_masked = gemini_keys[0]['masked'] if gemini_keys else None - result.append({ - 'name': 'GEMINI_KEY', - 'display_name': 'Gemini', - 'provider': 'gemini', - 'masked_value': first_masked, - 'is_set': gemini_count > 0, - 'count': gemini_count, - 'last_modified': env_mtime, - 'keys': gemini_keys, # full list with per-key stats - }) - - # Single-value keys - for env_name, meta in _KEY_DEFS.items(): - value = _get_env_value(env_name) - result.append({ - 'name': env_name, - 'display_name': meta['display_name'], - 'provider': meta['provider'], - 'masked_value': _mask_key(value), - 'is_set': bool(value), - 'count': 1 if value else 0, - 'last_modified': env_mtime, - }) - - return result - - -# ── Update ────────────────────────────────────────────────────────────── - -def update_key(name, new_value): - """ - Update a key value. For Gemini, name should be 'GEMINI_KEY' with an - optional 'index' for replacing a specific key, or use the KeyManager API. - For TomTom/Google Places, writes directly to .env. - - Returns dict with success status and masked value. - """ - new_value = new_value.strip() - if not new_value: - return {'success': False, 'error': 'Key value cannot be empty'} - - if name == 'GEMINI_KEY': - # Use KeyManager for Gemini - from .key_manager import get_key_manager - km = get_key_manager() - try: - idx = km.add_gemini_key(new_value) - return { - 'success': True, - 'name': name, - 'masked_value': _mask_key(new_value), - 'action': 'added', - 'index': idx, - } - except ValueError as e: - return {'success': False, 'error': str(e)} - - if name in _KEY_DEFS: - _set_env_value(name, new_value) - return { - 'success': True, - 'name': name, - 'masked_value': _mask_key(new_value), - 'action': 'updated', - } - - return {'success': False, 'error': f'Unknown key: {name}'} - - -def update_gemini_key(index, new_value): - """Replace a specific Gemini key by index.""" - new_value = new_value.strip() - if not new_value: - return {'success': False, 'error': 'Key value cannot be empty'} - - from .key_manager import get_key_manager - km = get_key_manager() - try: - km.replace_gemini_key(index, new_value) - return { - 'success': True, - 'name': 'GEMINI_KEY', - 'index': index, - 'masked_value': _mask_key(new_value), - 'action': 'replaced', - } - except (ValueError, IndexError) as e: - return {'success': False, 'error': str(e)} - - -# ── Test ──────────────────────────────────────────────────────────────── - -def test_key(name, index=None): - """ - Test a key against its provider API using the current .env value. - - Returns dict with: success, latency_ms, error, note. - """ - if name == 'GEMINI_KEY': - return _test_gemini(index) - elif name == 'TOMTOM_API_KEY': - return _test_tomtom() - elif name == 'GOOGLE_PLACES_API_KEY': - return _test_google_places() - else: - return {'success': False, 'error': f'Unknown key: {name}', 'latency_ms': 0} - - -def _test_gemini(index=None): - """Test Gemini key by listing models.""" - from .key_manager import get_key_manager - km = get_key_manager() - - if index is not None: - key = km.get_gemini_key(index) - if not key: - return {'success': False, 'error': f'Gemini key index {index} not found', 'latency_ms': 0} - else: - key = km.get_gemini_key(0) - if not key: - return {'success': False, 'error': 'No Gemini keys configured', 'latency_ms': 0} - - t0 = time.time() - try: - resp = http_requests.get( - f"https://generativelanguage.googleapis.com/v1beta/models?key={key}", - timeout=10 - ) - latency = int((time.time() - t0) * 1000) - - if resp.status_code == 200 and 'models' in resp.text: - return {'success': True, 'latency_ms': latency, 'error': None, - 'note': 'Models list returned successfully'} - elif resp.status_code == 403: - return {'success': False, 'latency_ms': latency, - 'error': 'Key disabled or quota exhausted'} - elif resp.status_code == 429: - return {'success': True, 'latency_ms': latency, 'error': None, - 'note': 'Valid key — currently rate-limited'} - else: - return {'success': False, 'latency_ms': latency, - 'error': f'HTTP {resp.status_code}'} - except Exception as e: - latency = int((time.time() - t0) * 1000) - return {'success': False, 'latency_ms': latency, 'error': str(e)} - - -def _test_tomtom(): - """Test TomTom key with a minimal geocode request.""" - key = _get_env_value('TOMTOM_API_KEY') - if not key: - return {'success': False, 'error': 'TOMTOM_API_KEY not set', 'latency_ms': 0} - - t0 = time.time() - try: - resp = http_requests.get( - f"https://api.tomtom.com/search/2/geocode/Boise.json", - params={'key': key, 'limit': 1}, - timeout=10 - ) - latency = int((time.time() - t0) * 1000) - - if resp.status_code == 200: - data = resp.json() - count = data.get('summary', {}).get('totalResults', 0) - return {'success': True, 'latency_ms': latency, 'error': None, - 'note': f'Geocode returned {count} result(s)'} - elif resp.status_code == 403: - return {'success': False, 'latency_ms': latency, - 'error': 'Invalid or expired key'} - else: - return {'success': False, 'latency_ms': latency, - 'error': f'HTTP {resp.status_code}'} - except Exception as e: - latency = int((time.time() - t0) * 1000) - return {'success': False, 'latency_ms': latency, 'error': str(e)} - - -def _test_google_places(): - """Test Google Places (New) API key with a minimal searchText request.""" - key = _get_env_value('GOOGLE_PLACES_API_KEY') - if not key: - return {'success': False, 'error': 'GOOGLE_PLACES_API_KEY not set', 'latency_ms': 0} - - t0 = time.time() - try: - resp = http_requests.post( - "https://places.googleapis.com/v1/places:searchText", - json={'textQuery': 'Boise Idaho', 'maxResultCount': 1}, - headers={ - 'X-Goog-Api-Key': key, - 'X-Goog-FieldMask': 'places.displayName', - }, - timeout=10 - ) - latency = int((time.time() - t0) * 1000) - - if resp.status_code == 200: - data = resp.json() - count = len(data.get('places', [])) - return {'success': True, 'latency_ms': latency, 'error': None, - 'note': f'searchText returned {count} place(s)'} - elif resp.status_code == 403: - return {'success': False, 'latency_ms': latency, - 'error': 'Key not authorized for Places API (New)'} - elif resp.status_code == 429: - return {'success': True, 'latency_ms': latency, 'error': None, - 'note': 'Valid key — quota exceeded'} - else: - body = resp.text[:200] - return {'success': False, 'latency_ms': latency, - 'error': f'HTTP {resp.status_code}: {body}'} - except Exception as e: - latency = int((time.time() - t0) * 1000) - return {'success': False, 'latency_ms': latency, 'error': str(e)} diff --git a/lib/contacts.py b/lib/contacts.py deleted file mode 100644 index f2782db..0000000 --- a/lib/contacts.py +++ /dev/null @@ -1,230 +0,0 @@ -""" -RECON Contacts Database — per-user phone book with soft delete and proximity queries. - -Separate DB at data/contacts.db. Thread-local connections with WAL mode (StatusDB pattern). -""" -import math -import os -import sqlite3 -import threading -from datetime import datetime, timezone - -_local = threading.local() - -_SCHEMA = """ -CREATE TABLE IF NOT EXISTS contacts ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id TEXT NOT NULL, - label TEXT NOT NULL, - name TEXT, - call_sign TEXT, - phone TEXT, - email TEXT, - category TEXT, - notes TEXT, - lat REAL, - lon REAL, - osm_type TEXT, - osm_id INTEGER, - address TEXT, - show_proximity INTEGER DEFAULT 0, - created_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), - updated_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), - deleted_at TEXT, - deleted_by TEXT -); - -CREATE INDEX IF NOT EXISTS idx_contacts_user ON contacts(user_id); -CREATE INDEX IF NOT EXISTS idx_contacts_user_category ON contacts(user_id, category); -CREATE INDEX IF NOT EXISTS idx_contacts_user_deleted ON contacts(user_id, deleted_at); -CREATE INDEX IF NOT EXISTS idx_contacts_geo ON contacts(lat, lon); -CREATE UNIQUE INDEX IF NOT EXISTS idx_contacts_home_work - ON contacts(user_id, label) - WHERE label IN ('Home', 'Work') AND deleted_at IS NULL; -""" - - -def _haversine_m(lat1, lon1, lat2, lon2): - """Haversine distance in meters.""" - R = 6_371_000 - rlat1, rlat2 = math.radians(lat1), math.radians(lat2) - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 - return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) - - -def _row_to_dict(row): - """Convert sqlite3.Row to dict, casting show_proximity to bool.""" - d = dict(row) - d['show_proximity'] = bool(d.get('show_proximity', 0)) - return d - - -class ContactsDB: - def __init__(self, db_path=None): - if db_path is None: - db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'contacts.db') - self.db_path = db_path - os.makedirs(os.path.dirname(db_path), exist_ok=True) - self._init_db() - - def _get_conn(self): - if not hasattr(_local, 'contacts_conn') or _local.contacts_conn is None: - _local.contacts_conn = sqlite3.connect(self.db_path, timeout=30) - _local.contacts_conn.row_factory = sqlite3.Row - _local.contacts_conn.execute("PRAGMA journal_mode=WAL") - _local.contacts_conn.execute("PRAGMA busy_timeout=5000") - return _local.contacts_conn - - def _init_db(self): - conn = self._get_conn() - conn.executescript(_SCHEMA) - conn.commit() - - def list_all(self, user_id, category=None, search=None): - conn = self._get_conn() - sql = "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NULL" - params = [user_id] - if category: - sql += " AND category = ?" - params.append(category) - if search: - sql += " AND (label LIKE ? OR name LIKE ? OR call_sign LIKE ? OR phone LIKE ?)" - like = f"%{search}%" - params.extend([like, like, like, like]) - sql += " ORDER BY label" - return [_row_to_dict(r) for r in conn.execute(sql, params).fetchall()] - - def list_deleted(self, user_id): - conn = self._get_conn() - rows = conn.execute( - "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NOT NULL ORDER BY deleted_at DESC", - (user_id,) - ).fetchall() - return [_row_to_dict(r) for r in rows] - - def get(self, user_id, contact_id, include_deleted=False): - conn = self._get_conn() - sql = "SELECT * FROM contacts WHERE id = ? AND user_id = ?" - if not include_deleted: - sql += " AND deleted_at IS NULL" - row = conn.execute(sql, (contact_id, user_id)).fetchone() - return _row_to_dict(row) if row else None - - def create(self, user_id, **fields): - conn = self._get_conn() - fields.pop('id', None) - fields.pop('user_id', None) - fields.pop('created_at', None) - fields.pop('updated_at', None) - fields.pop('deleted_at', None) - fields.pop('deleted_by', None) - if 'show_proximity' in fields: - fields['show_proximity'] = 1 if fields['show_proximity'] else 0 - columns = ['user_id'] + list(fields.keys()) - placeholders = ', '.join(['?'] * len(columns)) - col_str = ', '.join(columns) - values = [user_id] + list(fields.values()) - try: - cur = conn.execute(f"INSERT INTO contacts ({col_str}) VALUES ({placeholders})", values) - conn.commit() - return self.get(user_id, cur.lastrowid), None - except sqlite3.IntegrityError: - return None, 'conflict' - - def update(self, user_id, contact_id, **fields): - conn = self._get_conn() - fields.pop('id', None) - fields.pop('user_id', None) - fields.pop('created_at', None) - fields.pop('deleted_at', None) - fields.pop('deleted_by', None) - if 'show_proximity' in fields: - fields['show_proximity'] = 1 if fields['show_proximity'] else 0 - fields['updated_at'] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') - sets = ', '.join(f"{k} = ?" for k in fields) - values = list(fields.values()) + [contact_id, user_id] - conn.execute(f"UPDATE contacts SET {sets} WHERE id = ? AND user_id = ? AND deleted_at IS NULL", values) - conn.commit() - return self.get(user_id, contact_id) - - def soft_delete(self, user_id, contact_id): - conn = self._get_conn() - now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') - conn.execute( - "UPDATE contacts SET deleted_at = ?, deleted_by = ? WHERE id = ? AND user_id = ? AND deleted_at IS NULL", - (now, user_id, contact_id, user_id) - ) - conn.commit() - return self.get(user_id, contact_id, include_deleted=True) - - def restore(self, user_id, contact_id): - conn = self._get_conn() - row = self.get(user_id, contact_id, include_deleted=True) - if not row or not row.get('deleted_at'): - return None, 'not_found' - if row.get('label') in ('Home', 'Work'): - existing = conn.execute( - "SELECT id FROM contacts WHERE user_id = ? AND label = ? AND deleted_at IS NULL AND id != ?", - (user_id, row['label'], contact_id) - ).fetchone() - if existing: - return None, 'conflict' - conn.execute( - "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL WHERE id = ? AND user_id = ?", - (contact_id, user_id) - ) - conn.commit() - return self.get(user_id, contact_id), None - - def restore_as(self, user_id, contact_id, new_label): - """Restore a soft-deleted contact with a new label (for Home/Work conflict resolution).""" - conn = self._get_conn() - row = self.get(user_id, contact_id, include_deleted=True) - if not row or not row.get('deleted_at'): - return None, 'not_found' - if not new_label or not new_label.strip(): - return None, 'invalid_label' - now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%fZ') - try: - conn.execute( - "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL, label = ?, updated_at = ? WHERE id = ? AND user_id = ?", - (new_label.strip(), now, contact_id, user_id) - ) - conn.commit() - except sqlite3.IntegrityError: - return None, 'conflict' - return self.get(user_id, contact_id), None - - def purge(self, user_id, contact_id): - conn = self._get_conn() - row = self.get(user_id, contact_id, include_deleted=True) - if not row: - return False, 'not_found' - if not row.get('deleted_at'): - return False, 'not_deleted' - conn.execute("DELETE FROM contacts WHERE id = ? AND user_id = ?", (contact_id, user_id)) - conn.commit() - return True, None - - def find_nearby(self, user_id, lat, lon, radius_m=75): - conn = self._get_conn() - # Bounding box pre-filter (~111km per degree lat) - dlat = radius_m / 111_000 - dlon = radius_m / (111_000 * math.cos(math.radians(lat))) - rows = conn.execute( - """SELECT * FROM contacts - WHERE user_id = ? AND deleted_at IS NULL AND show_proximity = 1 - AND lat BETWEEN ? AND ? AND lon BETWEEN ? AND ?""", - (user_id, lat - dlat, lat + dlat, lon - dlon, lon + dlon) - ).fetchall() - results = [] - for r in rows: - dist = _haversine_m(lat, lon, r['lat'], r['lon']) - if dist <= radius_m: - d = _row_to_dict(r) - d['distance_m'] = round(dist, 1) - results.append(d) - results.sort(key=lambda x: x['distance_m']) - return results diff --git a/lib/contacts_api.py b/lib/contacts_api.py deleted file mode 100644 index 0e4506b..0000000 --- a/lib/contacts_api.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -RECON Contacts API — Flask Blueprint. - -Per-user phone book with soft delete, restore, purge, and proximity queries. -All endpoints require Authentik forward-auth (X-Authentik-Username header). -""" -from flask import Blueprint, request, jsonify - -from .auth import require_auth -from .contacts import ContactsDB - -contacts_bp = Blueprint('contacts', __name__) - -_db = None - -def _get_db(): - global _db - if _db is None: - _db = ContactsDB() - return _db - - -@contacts_bp.route('/api/contacts', methods=['GET']) -@require_auth -def list_contacts(): - db = _get_db() - category = request.args.get('category') - search = request.args.get('search') - return jsonify(db.list_all(request.user_id, category=category, search=search)) - - -@contacts_bp.route('/api/contacts', methods=['POST']) -@require_auth -def create_contact(): - db = _get_db() - data = request.get_json(force=True) - contact, err = db.create(request.user_id, **data) - if err == 'conflict': - return jsonify({'error': 'You already have a Home/Work contact'}), 409 - return jsonify(contact), 201 - - -@contacts_bp.route('/api/contacts/nearby', methods=['GET']) -@require_auth -def nearby_contacts(): - db = _get_db() - lat = request.args.get('lat', type=float) - lon = request.args.get('lon', type=float) - radius_m = request.args.get('radius_m', 75, type=float) - if lat is None or lon is None: - return jsonify({'error': 'lat and lon required'}), 400 - return jsonify(db.find_nearby(request.user_id, lat, lon, radius_m)) - - -@contacts_bp.route('/api/contacts/deleted', methods=['GET']) -@require_auth -def list_deleted(): - db = _get_db() - return jsonify(db.list_deleted(request.user_id)) - - -@contacts_bp.route('/api/contacts/', methods=['GET']) -@require_auth -def get_contact(contact_id): - db = _get_db() - contact = db.get(request.user_id, contact_id) - if not contact: - return jsonify({'error': 'Not found'}), 404 - return jsonify(contact) - - -@contacts_bp.route('/api/contacts/', methods=['PATCH']) -@require_auth -def update_contact(contact_id): - db = _get_db() - data = request.get_json(force=True) - contact = db.update(request.user_id, contact_id, **data) - if not contact: - return jsonify({'error': 'Not found'}), 404 - return jsonify(contact) - - -@contacts_bp.route('/api/contacts/', methods=['DELETE']) -@require_auth -def delete_contact(contact_id): - db = _get_db() - contact = db.soft_delete(request.user_id, contact_id) - if not contact: - return jsonify({'error': 'Not found'}), 404 - return jsonify(contact) - - -@contacts_bp.route('/api/contacts//restore', methods=['POST']) -@require_auth -def restore_contact(contact_id): - db = _get_db() - contact, err = db.restore(request.user_id, contact_id) - if err == 'not_found': - return jsonify({'error': 'Not found'}), 404 - if err == 'conflict': - return jsonify({'error': 'You already have a Home/Work contact'}), 409 - return jsonify(contact) - - -@contacts_bp.route('/api/contacts//restore-as', methods=['POST']) -@require_auth -def restore_as_contact(contact_id): - db = _get_db() - data = request.get_json(force=True) - new_label = data.get('label', '').strip() - if not new_label: - return jsonify({'error': 'label is required'}), 400 - contact, err = db.restore_as(request.user_id, contact_id, new_label) - if err == 'not_found': - return jsonify({'error': 'Not found'}), 404 - if err == 'invalid_label': - return jsonify({'error': 'Invalid label'}), 400 - if err == 'conflict': - return jsonify({'error': 'Label conflict'}), 409 - return jsonify(contact) - - -@contacts_bp.route('/api/contacts//purge', methods=['DELETE']) -@require_auth -def purge_contact(contact_id): - db = _get_db() - ok, err = db.purge(request.user_id, contact_id) - if err == 'not_found': - return jsonify({'error': 'Not found'}), 404 - if err == 'not_deleted': - return jsonify({'error': 'Contact must be deleted before purging'}), 400 - return jsonify({'ok': True}) diff --git a/templates/base.html b/templates/base.html index 4c06892..49b1a21 100644 --- a/templates/base.html +++ b/templates/base.html @@ -21,7 +21,6 @@ PeerTube Kiwix Search - Nav-I Settings {% if subnav %} diff --git a/templates/navi/api_keys.html b/templates/navi/api_keys.html deleted file mode 100644 index abf2d16..0000000 --- a/templates/navi/api_keys.html +++ /dev/null @@ -1,269 +0,0 @@ -{% extends "base.html" %} -{% block content %} -

    API Keys

    - -
    -

    Updating keys does not restart RECON. After updates, click Restart RECON below or restart manually from terminal.

    -
    - -
    Loading keys...
    - - -
  • - - - - - - - - -
    - - -
    - - - -{% endblock %} - -{% block scripts %} - -{% endblock %} diff --git a/templates/navi/deleted_contacts.html b/templates/navi/deleted_contacts.html deleted file mode 100644 index 0847fab..0000000 --- a/templates/navi/deleted_contacts.html +++ /dev/null @@ -1,116 +0,0 @@ -{% extends "base.html" %} -{% block content %} -

    Deleted Contacts

    -{% if not contacts %} -

    No deleted contacts.

    -{% else %} - - - {% for c in contacts %} - - - - - - - - - {% endfor %} -
    LabelNameCategoryPhoneDeleted AtActions
    {{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} - - -
    -{% endif %} - - - -{% endblock %} -{% block scripts %} - -{% endblock %} diff --git a/templates/navi/landing.html b/templates/navi/landing.html deleted file mode 100644 index 131f3af..0000000 --- a/templates/navi/landing.html +++ /dev/null @@ -1,22 +0,0 @@ -{% extends "base.html" %} -{% block content %} -

    Nav-I

    -

    Navi frontend management — contacts, API keys, and configuration.

    - - -{% endblock %} From d7292c4cc7c834bd5e7f4166a19bc5a1aac5f2f6 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 04:04:45 -0600 Subject: [PATCH 62/72] cleanup: remove /api/geocode + /api/reverse handlers (extraction #6 shadow) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All three routes (/api/geocode, /api/reverse, /api/reverse//) are edge-shadowed since extraction #6 — navi-geo :8426 serves them via nginx. - netsyms_api.py: drop geocode_bp + its three handlers, the bundle-private helpers, and module state (TTLCache/lock/_TZ_DB_PATH/_DEM). netsyms_bp (/api/netsyms/lookup + /health) survives. - api.py: drop the geocode_bp import + register_blueprint line. - DELETE lib/geocode.py, lib/nav_tools.py (both orphaned once the handlers go). - DELETE reverse_bundle_test.py, geocode_test.py, nav_tools_test.py. Decouples netsyms_api.py from landclass.py and offroute/dem.py — prerequisite for cleanups #5 and #6. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 5 +- lib/geocode.py | 774 ------------------------------------- lib/geocode_test.py | 157 -------- lib/nav_tools.py | 168 -------- lib/nav_tools_test.py | 77 ---- lib/netsyms_api.py | 261 +------------ lib/reverse_bundle_test.py | 171 -------- 7 files changed, 3 insertions(+), 1610 deletions(-) delete mode 100644 lib/geocode.py delete mode 100644 lib/geocode_test.py delete mode 100644 lib/nav_tools.py delete mode 100644 lib/nav_tools_test.py delete mode 100644 lib/reverse_bundle_test.py diff --git a/lib/api.py b/lib/api.py index 61f25f2..e4a186f 100644 --- a/lib/api.py +++ b/lib/api.py @@ -59,10 +59,9 @@ class _LargeZimRequest(_FlaskRequest): return super()._get_file_stream(total_content_length, content_type, filename, content_length) app.request_class = _LargeZimRequest -# ── Netsyms + Geocode Blueprints ── -from .netsyms_api import netsyms_bp, geocode_bp +# ── Netsyms Blueprint ── +from .netsyms_api import netsyms_bp app.register_blueprint(netsyms_bp) -app.register_blueprint(geocode_bp) # ── Wiki-enrich Blueprint (extraction #5 prep — HTTP wrapper over wiki_index) ── from .wiki_enrich_api import wiki_enrich_bp diff --git a/lib/geocode.py b/lib/geocode.py deleted file mode 100644 index aabd37e..0000000 --- a/lib/geocode.py +++ /dev/null @@ -1,774 +0,0 @@ -""" -RECON geocode — structured preprocessing, multi-source retrieval, reranking. - -Replaces the naive Photon-only search with: - 1. usaddress parsing + intent classification (ADDRESS / POI / LOCALITY / COORD / POSTCODE) - 2. Multi-source retrieval: ADDRESS → Netsyms + Photon; POI/LOCALITY → Photon /api - 3. Python reranker with weighted signals - -Public entry point: geocode(query, limit) → {query, results, count} -""" - -import math -import re -import logging - -import requests -import usaddress -from rapidfuzz import fuzz - -from .utils import setup_logging - -logger = setup_logging('recon.geocode') - -# ── Trace logger for reranking audit ── -_trace_logger = logging.getLogger('recon.geocode.trace') -_trace_handler = logging.FileHandler('/tmp/geocode_rerank_trace.log') -_trace_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s')) -_trace_logger.addHandler(_trace_handler) -_trace_logger.setLevel(logging.DEBUG) - -# ── Config constants ── -PHOTON_URL = "http://localhost:2322" -GEOCODE_BIAS_LAT = 42.5736 -GEOCODE_BIAS_LON = -114.6066 -GEOCODE_BIAS_ZOOM = 10 -ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 - -# ── Reranker weights ── -# Derived from research analysis of failure modes: -# housenumber_exact is the strongest signal because Photon's soft-boost -# lets wrong-number results bubble up. street_name_fuzz and locality_fuzz -# handle abbreviation/case variation. source_authority gives Netsyms a -# boost for US addresses since it has USPS-verified data. -W_HOUSENUMBER_EXACT = 6.0 # exact housenumber match -W_HOUSENUMBER_MISMATCH = -5.0 # housenumber present but wrong -W_STREET_NAME_FUZZ = 3.0 # fuzzy street name similarity [0..1] * weight -W_TOKEN_COVERAGE = 2.0 # fraction of query tokens found in result -W_STREET_TYPE_MATCH = 1.5 # "st" matches "street", etc. -W_LOCALITY_FUZZ = 2.0 # city/state fuzzy match -W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses -W_LAYER_RANK = 1.0 # type-appropriate results ranked higher -W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position) -W_STATE_EXACT = 1.0 # exact state code match -W_POI_CLASS_BOOST = 3.0 # amenity/shop/etc boost for business-name queries -W_HIGHWAY_CLASS_PENALTY = -4.0 # highway/route penalty for business-name queries - -# ── US abbreviation expansions ── -# Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals. -_STREET_TYPE_ABBREVS = { - 'st': 'street', 'ave': 'avenue', 'blvd': 'boulevard', 'dr': 'drive', - 'rd': 'road', 'ln': 'lane', 'ct': 'court', 'cir': 'circle', - 'pl': 'place', 'way': 'way', 'pkwy': 'parkway', 'hwy': 'highway', - 'trl': 'trail', 'ter': 'terrace', 'sq': 'square', -} -_DIRECTIONAL_ABBREVS = { - 'n': 'north', 's': 'south', 'e': 'east', 'w': 'west', - 'ne': 'northeast', 'nw': 'northwest', 'se': 'southeast', 'sw': 'southwest', -} -_ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE) - -# ── Road keywords (for detecting when query is about a road vs a business) ── -_ROAD_KEYWORDS = ( - set(_STREET_TYPE_ABBREVS.keys()) - | set(_STREET_TYPE_ABBREVS.values()) - | {'route', 'rte', 'pass'} -) - -# ── US state codes ── -_STATE_CODES = { - 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', - 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', - 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', - 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', - 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC', -} - -# ── Full state name → code (for intent classifier) ── -_STATE_NAME_TO_CODE = { - 'alabama': 'AL', 'alaska': 'AK', 'arizona': 'AZ', 'arkansas': 'AR', - 'california': 'CA', 'colorado': 'CO', 'connecticut': 'CT', 'delaware': 'DE', - 'florida': 'FL', 'georgia': 'GA', 'hawaii': 'HI', 'idaho': 'ID', - 'illinois': 'IL', 'indiana': 'IN', 'iowa': 'IA', 'kansas': 'KS', - 'kentucky': 'KY', 'louisiana': 'LA', 'maine': 'ME', 'maryland': 'MD', - 'massachusetts': 'MA', 'michigan': 'MI', 'minnesota': 'MN', - 'mississippi': 'MS', 'missouri': 'MO', 'montana': 'MT', 'nebraska': 'NE', - 'nevada': 'NV', 'new hampshire': 'NH', 'new jersey': 'NJ', - 'new mexico': 'NM', 'new york': 'NY', 'north carolina': 'NC', - 'north dakota': 'ND', 'ohio': 'OH', 'oklahoma': 'OK', 'oregon': 'OR', - 'pennsylvania': 'PA', 'rhode island': 'RI', 'south carolina': 'SC', - 'south dakota': 'SD', 'tennessee': 'TN', 'texas': 'TX', 'utah': 'UT', - 'vermont': 'VT', 'virginia': 'VA', 'washington': 'WA', - 'west virginia': 'WV', 'wisconsin': 'WI', 'wyoming': 'WY', -} - -# Coordinate regex -_COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$') - - -# ═══════════════════════════════════════════════════════════════════ -# STEP 1: PREPROCESSING -# ═══════════════════════════════════════════════════════════════════ - -def _parse_coords(text): - """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" - m = _COORD_RE.match(text.strip()) - if not m: - return None - lat, lon = float(m.group(1)), float(m.group(2)) - if -90 <= lat <= 90 and -180 <= lon <= 180: - return lat, lon - return None - - -def _classify_and_parse(query): - """ - Parse query with usaddress, classify intent, expand abbreviations. - - Returns (intent, parsed_dict) where: - intent: 'ADDRESS' | 'POI' | 'LOCALITY' | 'POSTCODE' | 'COORD' | 'UNKNOWN' - parsed_dict: {number, street, city, state, zipcode, raw_query, expanded_query} - """ - q = query.strip() - parsed = { - 'number': None, 'street': None, 'street_raw': None, - 'city': None, 'state': None, - 'zipcode': None, 'raw_query': q, 'expanded_query': q, - } - - # Coordinate check first - if _parse_coords(q): - return 'COORD', parsed - - # Try usaddress - try: - tagged, addr_type = usaddress.tag(q) - except usaddress.RepeatedLabelError: - # Ambiguous input — fall back to free-text Photon - return 'UNKNOWN', parsed - - # Extract components - number = tagged.get('AddressNumber', '').strip() - street_name = tagged.get('StreetName', '').strip() - street_pre_dir = tagged.get('StreetNamePreDirectional', '').strip() - street_post_type = tagged.get('StreetNamePostType', '').strip() - place = tagged.get('PlaceName', '').strip() - state = tagged.get('StateName', '').strip() - zipcode = tagged.get('ZipCode', '').strip() - - # ── Fix usaddress edge case: "214 N St Filer" ── - # usaddress reads single-letter directional + "St" as PreDirectional + empty, - # mashing "St Filer" into StreetName. Detect: PreDirectional is single letter, - # StreetName has 2+ tokens where the first is a street type. - if (street_pre_dir and len(street_pre_dir) <= 2 - and not street_name.strip().startswith(street_pre_dir) - and ' ' in street_name): - name_tokens = street_name.split() - first_lower = name_tokens[0].lower() - if first_lower in _STREET_TYPE_ABBREVS or first_lower in _STREET_TYPE_ABBREVS.values(): - # "N" is actually the street name, "St" is the post-type - street_name = street_pre_dir - street_post_type = name_tokens[0] - if len(name_tokens) > 1: - place = ' '.join(name_tokens[1:]) - street_pre_dir = '' - - # ── Expand abbreviations (guard ordinals) ── - expanded_parts = [] - - if number: - parsed['number'] = number - expanded_parts.append(number) - - if street_pre_dir: - exp = _DIRECTIONAL_ABBREVS.get(street_pre_dir.lower(), street_pre_dir) - expanded_parts.append(exp) - - if street_name: - # Don't expand ordinals: "21st" stays "21st" - if _ORDINAL_RE.match(street_name): - expanded_parts.append(street_name) - else: - # Expand directional abbreviation if it IS the street name - exp = _DIRECTIONAL_ABBREVS.get(street_name.lower(), street_name) - expanded_parts.append(exp) - parsed['street'] = street_name - - if street_post_type: - if _ORDINAL_RE.match(street_post_type): - expanded_parts.append(street_post_type) - else: - exp = _STREET_TYPE_ABBREVS.get(street_post_type.lower(), street_post_type) - expanded_parts.append(exp) - - # Build raw street (original abbreviations, for Netsyms) and expanded (for Photon) - raw_street_parts = [] - if street_pre_dir: - raw_street_parts.append(street_pre_dir) - if street_name: - raw_street_parts.append(street_name) - if street_post_type: - raw_street_parts.append(street_post_type) - parsed['street_raw'] = ' '.join(raw_street_parts) - - # Build the full expanded street - if expanded_parts: - # The street is everything after the number - street_full = ' '.join(expanded_parts[1:] if number else expanded_parts) - parsed['street'] = street_full - - if place: - parsed['city'] = place - expanded_parts.append(place) - if state: - parsed['state'] = state.upper() - expanded_parts.append(state) - if zipcode: - parsed['zipcode'] = zipcode - expanded_parts.append(zipcode) - - parsed['expanded_query'] = ' '.join(expanded_parts) - - # ── Intent classification ── - if addr_type == 'Street Address' and number: - return 'ADDRESS', parsed - elif zipcode and not number and not street_name: - return 'POSTCODE', parsed - elif addr_type == 'Ambiguous': - # Check if it looks like a locality: last token(s) are a state code or name - tokens = q.replace(',', ' ').split() - if len(tokens) >= 2: - last_upper = tokens[-1].upper() - if last_upper in _STATE_CODES: - parsed['city'] = ' '.join(tokens[:-1]) - parsed['state'] = last_upper - return 'LOCALITY', parsed - # Check full state names (single-word like "idaho" or two-word like "new york") - last_lower = tokens[-1].lower() - if last_lower in _STATE_NAME_TO_CODE: - parsed['city'] = ' '.join(tokens[:-1]) - parsed['state'] = _STATE_NAME_TO_CODE[last_lower] - return 'LOCALITY', parsed - if len(tokens) >= 3: - two_word = f"{tokens[-2].lower()} {last_lower}" - if two_word in _STATE_NAME_TO_CODE: - parsed['city'] = ' '.join(tokens[:-2]) - parsed['state'] = _STATE_NAME_TO_CODE[two_word] - return 'LOCALITY', parsed - return 'UNKNOWN', parsed - else: - return 'UNKNOWN', parsed - - -# ═══════════════════════════════════════════════════════════════════ -# STEP 2: RETRIEVAL -# ═══════════════════════════════════════════════════════════════════ - -def _retrieve_netsyms(parsed, limit=10, lat=None, lon=None): - """Query Netsyms for structured address lookup. Returns list of candidate dicts.""" - try: - from . import netsyms - except Exception: - return [] - - results = [] - number = parsed.get('number', '') - street = parsed.get('street_raw') or parsed.get('street', '') - city = parsed.get('city', '') - state = parsed.get('state', '') - zipcode = parsed.get('zipcode', '') - - # When viewport provided, fetch more results to sort from - fetch_limit = 200 if (lat is not None and lon is not None) else limit - - if number and street: - rows = netsyms.lookup_by_street( - number, street, city=city, state=state, zipcode=zipcode, limit=fetch_limit - ) - elif zipcode: - rows = netsyms.lookup_by_zipcode(zipcode, limit=fetch_limit) - else: - return [] - - for row in rows: - addr_parts = [row['number'], row['street']] - if row.get('street2'): - addr_parts.append(row['street2']) - addr_parts.extend([row['city'], row['state'], row['zipcode']]) - display = ' '.join(p for p in addr_parts if p) - results.append({ - 'name': display, - 'lat': row['lat'], - 'lon': row['lon'], - 'source': 'netsyms', - 'type': 'street_address', - 'raw': row, - '_number': row.get('number', ''), - '_street': row.get('street', ''), - '_city': row.get('city', ''), - '_state': row.get('state', ''), - }) - # Sort by viewport distance if lat/lon provided, then limit - if lat is not None and lon is not None and results: - results.sort(key=lambda r: (r["lat"] - lat)**2 + (r["lon"] - lon)**2) - results = results[:limit] - return results - - -def _retrieve_photon_structured(parsed, limit=10): - """Query Photon /structured endpoint for address lookup.""" - params = {'limit': limit, 'countrycode': 'US'} - if parsed.get('street'): - params['street'] = parsed['street'] - if parsed.get('number'): - params['housenumber'] = parsed['number'] - if parsed.get('city'): - params['city'] = parsed['city'] - if parsed.get('state'): - params['state'] = parsed['state'] - - if 'street' not in params: - return [] - - try: - resp = requests.get(f"{PHOTON_URL}/structured", params=params, timeout=5) - resp.raise_for_status() - data = resp.json() - except Exception as e: - logger.debug("Photon /structured failed: %s", e) - return [] - - return _parse_photon_features(data.get('features', []), 'photon') - - -def _retrieve_photon_freetext(query, limit=10, lat=None, lon=None, zoom=None): - """Query Photon /api for free-text search with location bias.""" - try: - params = { - 'q': query, - 'limit': limit, - 'lat': lat if lat is not None else GEOCODE_BIAS_LAT, - 'lon': lon if lon is not None else GEOCODE_BIAS_LON, - 'zoom': int(zoom) if zoom is not None else GEOCODE_BIAS_ZOOM, - } - resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) - resp.raise_for_status() - data = resp.json() - except Exception as e: - return [] - - return _parse_photon_features(data.get('features', []), 'photon') - - -def _parse_photon_features(features, source): - """Convert Photon GeoJSON features to candidate dicts.""" - results = [] - for i, feature in enumerate(features): - props = feature.get('properties', {}) - coords = feature.get('geometry', {}).get('coordinates', [0, 0]) - - osm_key = props.get('osm_key', '') - osm_value = props.get('osm_value', '') - feat_type = props.get('type', '') - has_hn = bool(props.get('housenumber')) - - if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office'): - rtype = 'poi' - elif has_hn or osm_value in ('house', 'residential'): - rtype = 'street_address' - elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): - rtype = 'locality' - else: - rtype = 'poi' - - # Build display name - parts = [] - hn = props.get('housenumber') - street = props.get('street') - name = props.get('name', '') - if hn and street: - parts.append(f"{hn} {street}") - if name and name != street: - parts.append(name) - elif name: - parts.append(name) - elif street: - parts.append(street) - for key in ('city', 'county', 'state', 'country'): - v = props.get(key) - if v and (not parts or v != parts[-1]): - parts.append(v) - display = ', '.join(p for p in parts if p) or 'Unknown' - - results.append({ - 'name': display, - 'lat': coords[1], - 'lon': coords[0], - 'source': source, - 'type': rtype, - 'raw': props, - '_photon_rank': i, - '_number': props.get('housenumber', ''), - '_street': props.get('street', ''), - # For locality results, the name IS the city (Photon omits 'city' on city-type features) - '_city': props.get('city', '') or (props.get('name', '') if rtype == 'locality' else ''), - '_state': props.get('state', ''), - }) - return results - - -# ═══════════════════════════════════════════════════════════════════ -# STEP 3: RERANKER -# ═══════════════════════════════════════════════════════════════════ - -def _expand_street_type(s): - """Expand a street type abbreviation for comparison.""" - return _STREET_TYPE_ABBREVS.get(s.lower(), s.lower()) - - -def _score_candidate(candidate, parsed, intent): - """ - Score a candidate against the parsed query. - Returns (total_score, signal_breakdown_dict). - """ - signals = {} - total = 0.0 - - query_number = (parsed.get('number') or '').strip().upper() - query_street = (parsed.get('street') or '').strip().upper() - query_city = (parsed.get('city') or '').strip().upper() - query_state = (parsed.get('state') or '').strip().upper() - - cand_number = (candidate.get('_number') or '').strip().upper() - cand_street = (candidate.get('_street') or '').strip().upper() - cand_city = (candidate.get('_city') or '').strip().upper() - cand_state = (candidate.get('_state') or '').strip().upper() - - # ── Housenumber ── - if intent == 'ADDRESS' and query_number: - if cand_number == query_number: - signals['housenumber_exact'] = W_HOUSENUMBER_EXACT - total += W_HOUSENUMBER_EXACT - elif cand_number and cand_number != query_number: - signals['housenumber_mismatch'] = W_HOUSENUMBER_MISMATCH - total += W_HOUSENUMBER_MISMATCH - - # ── Street name fuzz ── - if query_street and cand_street: - # Expand both for comparison - q_expanded = ' '.join(_expand_street_type(t) for t in query_street.split()) - c_expanded = ' '.join(_expand_street_type(t) for t in cand_street.split()) - ratio = fuzz.token_sort_ratio(q_expanded, c_expanded) / 100.0 - score = ratio * W_STREET_NAME_FUZZ - signals['street_name_fuzz'] = round(score, 2) - total += score - - # ── Street type match ── - if query_street and cand_street: - q_tokens = set(_expand_street_type(t) for t in query_street.split()) - c_tokens = set(_expand_street_type(t) for t in cand_street.split()) - # Check if the street type words overlap - street_types = set(_STREET_TYPE_ABBREVS.values()) - q_types = q_tokens & street_types - c_types = c_tokens & street_types - if q_types and q_types & c_types: - signals['street_type_match'] = W_STREET_TYPE_MATCH - total += W_STREET_TYPE_MATCH - - # ── Token coverage ── - raw_q = parsed.get('raw_query', '').upper() - q_tokens = set(raw_q.replace(',', ' ').split()) - if q_tokens: - cand_text = candidate.get('name', '').upper() - matched = sum(1 for t in q_tokens if t in cand_text) - coverage = matched / len(q_tokens) - score = coverage * W_TOKEN_COVERAGE - signals['token_coverage'] = round(score, 2) - total += score - - # ── Locality fuzz ── - if query_city and cand_city: - ratio = fuzz.ratio(query_city, cand_city) / 100.0 - score = ratio * W_LOCALITY_FUZZ - signals['locality_fuzz'] = round(score, 2) - total += score - - # ── State exact ── - if query_state and cand_state: - if cand_state == query_state: - signals['state_exact'] = W_STATE_EXACT - total += W_STATE_EXACT - - # ── Source authority ── - if candidate.get('source') == 'netsyms' and intent == 'ADDRESS': - signals['source_authority'] = W_SOURCE_AUTHORITY - total += W_SOURCE_AUTHORITY - - # ── Layer rank (type-appropriate bonus) ── - cand_type = candidate.get('type', '') - if intent == 'ADDRESS' and cand_type == 'street_address': - signals['layer_rank'] = W_LAYER_RANK - total += W_LAYER_RANK - elif intent == 'LOCALITY' and cand_type == 'locality': - signals['layer_rank'] = W_LAYER_RANK - total += W_LAYER_RANK - elif intent == 'POI' and cand_type == 'poi': - signals['layer_rank'] = W_LAYER_RANK - total += W_LAYER_RANK - - # ── Photon position normalization ── - photon_rank = candidate.get('_photon_rank') - if photon_rank is not None: - # Top result gets full bonus, decays linearly - score = max(0, (1.0 - photon_rank / 10.0)) * W_PHOTON_POSITION_NORM - signals['photon_position'] = round(score, 2) - total += score - - # ── Business intent POI boost ── - # When the query has no road keywords (likely a business/POI search), - # boost amenity/shop/etc results and penalize highway/route results. - # Skipped for LOCALITY, POSTCODE, COORD queries where class is irrelevant. - if intent not in ('LOCALITY', 'POSTCODE', 'COORD'): - q_tokens_lower = set(parsed.get('raw_query', '').lower().replace(',', ' ').split()) - if not (q_tokens_lower & _ROAD_KEYWORDS): - osm_key = (candidate.get('raw') or {}).get('osm_key', '') - if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'): - signals['poi_class_boost'] = W_POI_CLASS_BOOST - total += W_POI_CLASS_BOOST - elif osm_key in ('highway', 'route'): - signals['highway_class_penalty'] = W_HIGHWAY_CLASS_PENALTY - total += W_HIGHWAY_CLASS_PENALTY - - return round(total, 2), signals - - -def _build_match_code(candidate, parsed, intent): - """Build a match_code dict indicating match quality for each field.""" - mc = {} - if intent == 'ADDRESS': - q_num = (parsed.get('number') or '').strip().upper() - c_num = (candidate.get('_number') or '').strip().upper() - if q_num and c_num == q_num: - mc['housenumber'] = 'matched' - elif q_num and c_num: - mc['housenumber'] = 'unmatched' - elif q_num and not c_num: - mc['housenumber'] = 'inferred' - - q_street = (parsed.get('street') or '').strip().upper() - c_street = (candidate.get('_street') or '').strip().upper() - if q_street and c_street: - q_exp = ' '.join(_expand_street_type(t) for t in q_street.split()) - c_exp = ' '.join(_expand_street_type(t) for t in c_street.split()) - ratio = fuzz.token_sort_ratio(q_exp, c_exp) / 100.0 - mc['street'] = 'matched' if ratio > 0.8 else 'unmatched' - elif q_street: - mc['street'] = 'inferred' - - q_city = (parsed.get('city') or '').strip().upper() - c_city = (candidate.get('_city') or '').strip().upper() - if q_city and c_city: - ratio = fuzz.ratio(q_city, c_city) / 100.0 - mc['city'] = 'matched' if ratio > 0.8 else 'unmatched' - elif q_city: - mc['city'] = 'inferred' - - return mc - - -def _rerank(candidates, parsed, intent, query, limit): - """Score, sort, and trim candidates. Trace-log top 3.""" - scored = [] - for c in candidates: - total, signals = _score_candidate(c, parsed, intent) - c['_score'] = total - c['_signals'] = signals - scored.append(c) - - scored.sort(key=lambda c: c['_score'], reverse=True) - - # Trace log for audit - _trace_logger.debug("─── Query: %r intent=%s ───", query, intent) - for i, c in enumerate(scored): - osm_key = (c.get('raw') or {}).get('osm_key', '—') - osm_val = (c.get('raw') or {}).get('osm_value', '—') - _trace_logger.debug( - " #%d score=%.2f src=%s key=%s/%s name=%s", - i, c['_score'], c.get('source', '?'), osm_key, osm_val, - c.get('name', '?')[:60] - ) - _trace_logger.debug(" signals=%s", c.get('_signals', {})) - - # Clean internal fields and add match_code - result = [] - for c in scored[:limit]: - mc = _build_match_code(c, parsed, intent) - - # Assign confidence from score - score = c.get('_score', 0) - if score >= 10: - confidence = 'exact' - elif score >= 5: - confidence = 'high' - elif score >= 2: - confidence = 'medium' - else: - confidence = 'low' - - entry = { - 'name': c['name'], - 'lat': c['lat'], - 'lon': c['lon'], - 'source': c['source'], - 'confidence': confidence, - 'type': c.get('type', 'poi'), - 'raw': c.get('raw'), - } - if mc: - entry['match_code'] = mc - result.append(entry) - - return result - - -# ═══════════════════════════════════════════════════════════════════ -# STEP 4: ANNOTATION -# ═══════════════════════════════════════════════════════════════════ - -def _haversine_m(lat1, lon1, lat2, lon2): - """Haversine distance in meters.""" - R = 6_371_000 - rlat1, rlat2 = math.radians(lat1), math.radians(lat2) - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 - return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) - - -def _annotate_with_address_book(results): - """Add labeled_as to results within radius of an address book entry.""" - try: - from . import address_book - entries = address_book.load() - except Exception: - return - for result in results: - rlat, rlon = result.get('lat'), result.get('lon') - if rlat is None or rlon is None: - continue - for entry in entries: - elat, elon = entry.get('lat'), entry.get('lon') - if elat is None or elon is None: - continue - if _haversine_m(rlat, rlon, elat, elon) <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: - result['labeled_as'] = entry['name'] - break - - -# ═══════════════════════════════════════════════════════════════════ -# PUBLIC API -# ═══════════════════════════════════════════════════════════════════ - -def geocode(query, limit=10, lat=None, lon=None, zoom=None): - """ - Structured geocoding with multi-source retrieval and reranking. - - Returns {query, results: [...], count} — always 200-safe. - """ - limit = max(1, min(limit, 20)) - q = (query or '').strip() - empty = {'query': q, 'results': [], 'count': 0} - - if not q: - return empty - - # ── Coordinate detection ── - coords = _parse_coords(q) - if coords: - return { - 'query': q, - 'results': [{ - 'name': q, - 'lat': coords[0], - 'lon': coords[1], - 'source': 'coordinates', - 'confidence': 'exact', - 'type': 'coordinates', - 'raw': None, - }], - 'count': 1, - } - - # ── Address book nickname short-circuit ── - normalized_q = ' '.join(q.lower().replace(',', ' ').split()) - is_single_word = ' ' not in normalized_q - try: - from . import address_book - ab_match = address_book.lookup(q) - if (ab_match - and ab_match['confidence'] == 'exact' - and ab_match.get('lat') and ab_match.get('lon') - and is_single_word): - logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) - return { - 'query': q, - 'results': [{ - 'name': ab_match.get('address') or ab_match['name'], - 'lat': ab_match['lat'], - 'lon': ab_match['lon'], - 'source': 'address_book', - 'confidence': 'exact', - 'type': 'nickname', - 'raw': ab_match, - }], - 'count': 1, - } - except Exception as e: - logger.debug("geocode: address_book lookup failed: %s", e) - - # ── Classify intent + parse ── - intent, parsed = _classify_and_parse(q) - logger.debug("geocode: intent=%s parsed=%s", intent, parsed) - - # ── Retrieve candidates ── - candidates = [] - - if intent == 'ADDRESS': - # Parallel: Netsyms (structured) + Photon (freetext with expanded query) - netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) - photon_results = _retrieve_photon_freetext( - parsed.get('expanded_query', q), limit=limit, lat=lat, lon=lon, zoom=zoom - ) - # Also try Photon /structured for addresses - photon_struct = _retrieve_photon_structured(parsed, limit=5) - candidates = netsyms_results + photon_results + photon_struct - - elif intent == 'POSTCODE': - netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) - photon_results = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) - candidates = netsyms_results + photon_results - - elif intent in ('LOCALITY', 'POI', 'UNKNOWN'): - candidates = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) - - # ── Deduplicate by (lat, lon) proximity ── - deduped = [] - for c in candidates: - is_dup = False - for existing in deduped: - if (_haversine_m(c['lat'], c['lon'], existing['lat'], existing['lon']) < 50 - and c.get('source') == existing.get('source')): - is_dup = True - break - if not is_dup: - deduped.append(c) - candidates = deduped - - # ── Rerank ── - results = _rerank(candidates, parsed, intent, q, limit) - - # ── Address book annotation ── - _annotate_with_address_book(results) - - logger.info("geocode: %r → intent=%s, %d results", q, intent, len(results)) - return {'query': q, 'results': results, 'count': len(results)} diff --git a/lib/geocode_test.py b/lib/geocode_test.py deleted file mode 100644 index 4717b1e..0000000 --- a/lib/geocode_test.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for RECON Photon-first geocode chain.""" -import sys -import os -import json -import urllib.request -import urllib.parse - -BASE = "http://localhost:8420" - -TESTS = [ - { - "name": "home → nickname short-circuit", - "query": "home", - "check": lambda r: ( - r["count"] == 1 - and r["results"][0]["source"] == "address_book" - and r["results"][0]["confidence"] == "exact" - and r["results"][0]["type"] == "nickname" - ), - }, - { - "name": "214 north st filer → netsyms exact match (multi-word, not nickname)", - "query": "214 north st filer", - "check": lambda r: ( - r["count"] >= 1 - and r["results"][0]["source"] == "netsyms" - and r["results"][0]["confidence"] == "exact" - and r["results"][0]["type"] == "street_address" - ), - }, - { - "name": "214 North St, Filer, ID → netsyms (case/punctuation)", - "query": "214 North St, Filer, ID", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", - }, - { - "name": "214 NORTH ST FILER ID → netsyms (uppercase)", - "query": "214 NORTH ST FILER ID", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", - }, - { - "name": "1600 Pennsylvania Ave Washington DC → White House", - "query": "1600 Pennsylvania Ave Washington DC", - "check": lambda r: ( - r["count"] >= 1 - and r["results"][0]["source"] == "photon" - ), - }, - { - "name": "1600 pennsylvania ave washington dc → lowercase", - "query": "1600 pennsylvania ave washington dc", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", - }, - { - "name": "starbucks filer → POI result", - "query": "starbucks filer", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", - }, - { - "name": "filer idaho → locality", - "query": "filer idaho", - "check": lambda r: ( - r["count"] >= 1 - and r["results"][0]["source"] == "photon" - and r["results"][0]["type"] == "locality" - ), - }, - { - "name": "filer → partial query, at least 1 result", - "query": "filer", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", - }, - { - "name": "42.5736, -114.6066 → coordinates (with space)", - "query": "42.5736, -114.6066", - "check": lambda r: ( - r["count"] == 1 - and r["results"][0]["source"] == "coordinates" - and r["results"][0]["confidence"] == "exact" - and r["results"][0]["type"] == "coordinates" - ), - }, - { - "name": "42.5736,-114.6066 → coordinates (no space)", - "query": "42.5736,-114.6066", - "check": lambda r: ( - r["count"] == 1 - and r["results"][0]["source"] == "coordinates" - and r["results"][0]["confidence"] == "exact" - ), - }, - { - "name": "boise → at least 1 result", - "query": "boise", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", - }, - { - "name": "toronto → CA canary", - "query": "toronto", - "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", - }, - { - "name": "asdfghjklqwerty → empty results, 200 OK", - "query": "asdfghjklqwerty", - "check": lambda r: r["count"] == 0 and r["results"] == [], - }, - { - "name": "empty query → empty results", - "query": "", - "check": lambda r: r["count"] == 0 and r["results"] == [], - }, -] - -passed = 0 -failed = 0 - -for t in TESTS: - q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q=" - url = f"{BASE}/api/geocode?{q}" - try: - req = urllib.request.Request(url) - with urllib.request.urlopen(req, timeout=10) as resp: - status = resp.status - body = json.loads(resp.read()) - except urllib.error.HTTPError as e: - status = e.code - try: - body = json.loads(e.read()) - except Exception: - body = {} - except Exception as e: - status = 0 - body = {} - print(f" [FAIL] {t['name']}") - print(f" EXCEPTION: {e}") - failed += 1 - continue - - ok = status == 200 and t["check"](body) - tag = "PASS" if ok else "FAIL" - if ok: - passed += 1 - else: - failed += 1 - - top = body.get("results", [{}])[0] if body.get("results") else {} - top_summary = f"source={top.get('source','—')} type={top.get('type','—')} conf={top.get('confidence','—')} name={top.get('name','—')[:50]}" - print(f" [{tag}] {t['name']}") - if not ok: - print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}") - else: - labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else "" - print(f" → {top_summary}{labeled}") - -print(f"\n{passed} passed, {failed} failed") -sys.exit(0 if failed == 0 else 1) diff --git a/lib/nav_tools.py b/lib/nav_tools.py deleted file mode 100644 index d4bb1f7..0000000 --- a/lib/nav_tools.py +++ /dev/null @@ -1,168 +0,0 @@ -"""Navigation tools: geocoding via Photon and routing via Valhalla.""" - -import math -import re -import requests - -from .utils import setup_logging - -logger = setup_logging('recon.nav_tools') - -PHOTON_URL = "http://localhost:2322" -VALHALLA_URL = "http://localhost:8002" - -# Regional bias for Photon searches (Idaho-centric for Matt's use case). -# Adjustable — Photon uses these to rank nearby results higher. -GEOCODE_BIAS_LAT = 42.5736 -GEOCODE_BIAS_LON = -114.6066 -GEOCODE_BIAS_ZOOM = 10 - -# Distance threshold (meters) for annotating Photon results with address -# book labels. 75m covers GPS jitter + geocoder imprecision. -ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 - -# Coordinate regex — handles comma-separated and space-separated forms. -_COORD_RE = re.compile( - r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$' -) - -VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"} - - -def _parse_coords(text: str): - """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" - m = _COORD_RE.match(text.strip()) - if not m: - return None - lat, lon = float(m.group(1)), float(m.group(2)) - if -90 <= lat <= 90 and -180 <= lon <= 180: - return lat, lon - return None - - -def _haversine_m(lat1, lon1, lat2, lon2): - """Haversine distance in meters between two (lat, lon) points.""" - R = 6_371_000 # Earth radius in meters - rlat1, rlat2 = math.radians(lat1), math.radians(lat2) - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 - return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) - - -def geocode(query: str, limit: int = 10, lat=None, lon=None, zoom=None): - """Delegate to the structured geocode module. See lib/geocode.py.""" - from . import geocode as geocode_mod - return geocode_mod.geocode(query, limit=limit, lat=lat, lon=lon, zoom=zoom) - - -def _geocode(query: str): - """Internal: returns (lat, lon, display_name) tuple for route().""" - result = geocode(query, limit=1) - results = result.get('results', []) - if not results: - raise ValueError(f"Could not find location: {query}") - top = results[0] - return top['lat'], top['lon'], top['name'] - - -def reverse_geocode(lat: float, lon: float) -> str: - """Reverse geocode coordinates via Photon. Returns formatted address string.""" - try: - resp = requests.get( - f"{PHOTON_URL}/reverse", - params={"lat": lat, "lon": lon, "limit": 1}, - timeout=10, - ) - resp.raise_for_status() - except requests.RequestException: - raise RuntimeError("Navigation service unavailable") - - data = resp.json() - features = data.get("features", []) - if not features: - return f"{lat}, {lon}" - - props = features[0]["properties"] - parts = [] - for key in ("name", "housenumber", "street", "city", "state", "country", "postcode"): - v = props.get(key) - if v: - parts.append(v) - return ", ".join(parts) if parts else f"{lat}, {lon}" - - -def route(origin: str, destination: str, mode: str = "auto") -> dict: - """ - Get a route between two locations. - - Args: - origin: Starting location — address, place name, or "lat,lon" - destination: Destination — address, place name, or "lat,lon" - mode: Travel mode — auto, pedestrian, bicycle, truck - - Returns: - dict with summary, maneuvers, origin/destination info, and raw shape - """ - if mode not in VALID_MODES: - mode = "auto" - - # Geocode both endpoints - orig_lat, orig_lon, orig_name = _geocode(origin) - dest_lat, dest_lon, dest_name = _geocode(destination) - - # Query Valhalla - valhalla_req = { - "locations": [ - {"lat": orig_lat, "lon": orig_lon}, - {"lat": dest_lat, "lon": dest_lon}, - ], - "costing": mode, - "directions_options": {"units": "miles"}, - } - - try: - resp = requests.post( - f"{VALHALLA_URL}/route", - json=valhalla_req, - timeout=30, - ) - except requests.RequestException: - raise RuntimeError("Navigation service unavailable") - - if resp.status_code != 200: - try: - err = resp.json() - msg = err.get("error", "Unknown routing error") - except Exception: - msg = f"Routing error (HTTP {resp.status_code})" - raise RuntimeError(f"No route found between locations: {msg}") - - data = resp.json() - trip = data["trip"] - summary = trip["summary"] - leg = trip["legs"][0] - - # Build maneuver list - maneuvers = [] - for m in leg["maneuvers"]: - streets = m.get("street_names", []) - maneuvers.append({ - "instruction": m["instruction"], - "distance_miles": round(m.get("length", 0), 2), - "street_name": streets[0] if streets else "", - "type": m.get("type", 0), - "verbal_succinct": m.get("verbal_succinct_transition_instruction", ""), - }) - - return { - "origin": {"name": orig_name, "lat": orig_lat, "lon": orig_lon}, - "destination": {"name": dest_name, "lat": dest_lat, "lon": dest_lon}, - "summary": { - "distance_miles": round(summary["length"], 1), - "time_minutes": round(summary["time"] / 60, 1), - "mode": mode, - }, - "maneuvers": maneuvers, - "shape": leg.get("shape", ""), - } diff --git a/lib/nav_tools_test.py b/lib/nav_tools_test.py deleted file mode 100644 index b987293..0000000 --- a/lib/nav_tools_test.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Tests for nav_tools — run against live Photon + Valhalla services.""" - -import sys -import json - -from nav_tools import route, reverse_geocode - - -def test_route_named(): - """route("Buhl Idaho", "Boise Idaho", "auto") returns maneuvers.""" - print("TEST 1: route('Buhl Idaho', 'Boise Idaho', 'auto')") - r = route("Buhl Idaho", "Boise Idaho", "auto") - assert r["summary"]["distance_miles"] > 50, f"Expected >50 mi, got {r['summary']['distance_miles']}" - assert r["summary"]["time_minutes"] > 60, f"Expected >60 min, got {r['summary']['time_minutes']}" - assert len(r["maneuvers"]) > 5, f"Expected >5 maneuvers, got {len(r['maneuvers'])}" - assert r["shape"], "Missing polyline shape" - print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min, {len(r['maneuvers'])} maneuvers") - print(f" Origin: {r['origin']['name']}") - print(f" Destination: {r['destination']['name']}") - print(f" First maneuver: {r['maneuvers'][0]['instruction']}") - - -def test_route_coords(): - """route with raw lat,lon coordinates.""" - print("\nTEST 2: route('42.5991,-114.7636', '43.615,-116.2023', 'auto')") - r = route("42.5991,-114.7636", "43.615,-116.2023", "auto") - assert r["summary"]["distance_miles"] > 100, f"Expected >100 mi, got {r['summary']['distance_miles']}" - assert len(r["maneuvers"]) > 3, f"Expected >3 maneuvers" - print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min") - - -def test_route_pedestrian(): - """route with pedestrian mode.""" - print("\nTEST 3: route('Buhl Idaho', 'Boise Idaho', 'pedestrian')") - r = route("Buhl Idaho", "Boise Idaho", "pedestrian") - assert r["summary"]["mode"] == "pedestrian" - assert r["summary"]["time_minutes"] > r["summary"]["distance_miles"], "Walking should take more min than miles" - print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min (pedestrian)") - - -def test_reverse_geocode(): - """reverse_geocode near Buhl, Idaho.""" - print("\nTEST 4: reverse_geocode(42.5991, -114.7636)") - result = reverse_geocode(42.5991, -114.7636) - assert "Buhl" in result or "Twin Falls" in result or "Idaho" in result, f"Expected Buhl/Idaho, got: {result}" - print(f" OK — {result}") - - -def test_route_bad_origin(): - """route with nonexistent place returns clean error.""" - print("\nTEST 5: route('nonexistent place xyz123abc', 'Boise Idaho')") - try: - r = route("nonexistent place xyz123abc", "Boise Idaho") - print(f" FAIL — expected error, got result: {r['summary']}") - return False - except ValueError as e: - print(f" OK — clean error: {e}") - except RuntimeError as e: - print(f" OK — runtime error: {e}") - - -if __name__ == "__main__": - passed = 0 - failed = 0 - tests = [test_route_named, test_route_coords, test_route_pedestrian, test_reverse_geocode, test_route_bad_origin] - - for test in tests: - try: - test() - passed += 1 - except Exception as e: - print(f" FAIL — {e}") - failed += 1 - - print(f"\n{'='*40}") - print(f"Results: {passed} passed, {failed} failed out of {len(tests)}") - sys.exit(1 if failed else 0) diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index d217eb0..2caf47c 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -1,29 +1,19 @@ """ -RECON Netsyms API + Geocode — Flask Blueprints. +RECON Netsyms API — Flask Blueprint. GET /api/netsyms/lookup?q=&country= GET /api/netsyms/health -GET /api/geocode?q=&limit= (Photon-first search with ranked results) -GET /api/reverse// (localhost-sourced enrichment bundle for Central) """ -import sqlite3 -import threading - -from cachetools import TTLCache from flask import Blueprint, request, jsonify from . import netsyms from . import address_book -from . import nav_tools -from .geocode import PHOTON_URL -from .offroute.dem import DEMReader from .utils import setup_logging logger = setup_logging('recon.netsyms_api') netsyms_bp = Blueprint('netsyms', __name__) -geocode_bp = Blueprint('geocode', __name__) @netsyms_bp.route('/api/netsyms/lookup') @@ -40,252 +30,3 @@ def api_netsyms_lookup(): @netsyms_bp.route('/api/netsyms/health') def api_netsyms_health(): return jsonify(netsyms.health()) - - - -def _safe_float(val, lo, hi): - """Parse val as float; return None if missing, non-numeric, or out of [lo, hi].""" - if val is None: - return None - try: - f = float(val) - if lo <= f <= hi: - return f - except (ValueError, TypeError): - pass - return None - -@geocode_bp.route('/api/geocode') -def api_geocode(): - """ - Photon-first geocoding with ranked candidates. - - GET /api/geocode?q=&limit= - - Always returns 200 OK with: - {query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count} - - - source: "address_book" | "coordinates" | "photon" - - confidence: "exact" | "high" | "medium" | "low" - - type: "nickname" | "coordinates" | "street_address" | "poi" | "locality" - - labeled_as: present when result is within 75m of an address book entry - - Empty results array is valid (no match). No 404s. - """ - q = request.args.get('q', '').strip() - limit = request.args.get('limit', '10') - try: - limit = max(1, min(int(limit), 20)) - except (ValueError, TypeError): - limit = 10 - - # Viewport bias parameters (optional) - lat = _safe_float(request.args.get("lat"), -90, 90) - lon = _safe_float(request.args.get("lon"), -180, 180) - zoom = _safe_float(request.args.get("zoom"), 0, 22) - - result = nav_tools.geocode(q, limit=limit, lat=lat, lon=lon, zoom=zoom) - return jsonify(result) - - -@geocode_bp.route('/api/reverse') -def api_reverse(): - """ - Reverse geocode coordinates via Photon. - - GET /api/reverse?lat=X&lon=Y - - Returns same shape as /api/geocode: - {query: "lat,lon", results: [{name, lat, lon, source, type, raw, ...}], count} - - Returns 200 OK with empty results on no match. 400 on invalid coords. - """ - try: - lat = float(request.args.get('lat', '')) - lon = float(request.args.get('lon', '')) - except (ValueError, TypeError): - return jsonify({'error': 'Missing or invalid lat/lon parameters'}), 400 - - if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): - return jsonify({'error': 'Coordinates out of range'}), 400 - - query_str = f"{lat},{lon}" - - try: - import requests as http_requests - resp = http_requests.get( - "http://localhost:2322/reverse", - params={"lat": lat, "lon": lon, "limit": 1}, - timeout=10, - ) - resp.raise_for_status() - data = resp.json() - features = data.get("features", []) - except Exception: - logger.warning("Photon reverse geocode failed for %s", query_str) - return jsonify({'query': query_str, 'results': [], 'count': 0}) - - if not features: - return jsonify({'query': query_str, 'results': [], 'count': 0}) - - from .geocode import _parse_photon_features - results = _parse_photon_features(features, source='photon_reverse') - - return jsonify({'query': query_str, 'results': results, 'count': len(results)}) - - -# ───────────────────────────────────────────────────────────────────────── -# /api/reverse// — localhost-sourced enrichment bundle (Central) -# -# Sibling to the query-string /api/reverse above; that route is unchanged. -# Every component is sourced from localhost only (Photon, timezones.sqlite, -# in-process landclass/PostGIS, planet-DEM PMTiles). Each lookup is -# independent: a component failure logs a warning and yields null — never 5xx. -# ───────────────────────────────────────────────────────────────────────── - -_TZ_DB_PATH = "/mnt/nav/sources/timezones.sqlite" - -# Full bundle cache: key=(round(lat,4), round(lon,4)) -> dict. ~10k entries, 24h TTL. -_REVERSE_BUNDLE_CACHE = TTLCache(maxsize=10_000, ttl=86_400) -_REVERSE_BUNDLE_LOCK = threading.Lock() - -_BUNDLE_KEYS = ('name', 'city', 'county', 'state', 'country', - 'postal_code', 'timezone', 'landclass', 'elevation_m') - -# planet-DEM elevation source (single PMTiles, replaces Valhalla /height). -# Instantiated once at import; the underlying mmap is lazy. None if unavailable. -try: - _DEM = DEMReader() -except Exception as e: # pragma: no cover - depends on PMTiles availability - logger.warning("DEMReader unavailable, elevation will be null: %s", e) - _DEM = None - - -def _spatialite_blob_to_wkb(blob): - """Recover standard WKB from a SpatiaLite geometry BLOB. - - Layout: [00][endian][srid:4][mbr:32][7C][WKB body][FE]. The body omits the - leading byte-order marker, so we re-prepend it and drop the trailing 0xFE. - """ - return bytes([blob[1]]) + blob[39:-1] - - -def _reverse_photon(lat, lon): - """Nearest-feature admin fields from local Photon. Returns the six address - fields (any value may be None). Mirrors the existing /api/reverse call.""" - import requests as http_requests - resp = http_requests.get( - f"{PHOTON_URL}/reverse", - params={"lat": lat, "lon": lon, "limit": 1}, - timeout=10, - ) - resp.raise_for_status() - features = resp.json().get("features", []) - if not features: - return {} - props = features[0].get("properties", {}) - return { - "name": props.get("name"), - "city": props.get("city"), - "county": props.get("county"), - "state": props.get("state"), - "country": props.get("country"), - "postal_code": props.get("postcode"), - } - - -def _reverse_timezone(lat, lon): - """IANA tzid for the point from local timezones.sqlite (SpatiaLite tz_world). - - Uses the table's R-tree index for an MBR prefilter, then shapely - point-in-polygon on the few candidates. Returns None if unresolved. - """ - from shapely import wkb - from shapely.geometry import Point - con = sqlite3.connect(f"file:{_TZ_DB_PATH}?mode=ro", uri=True) - try: - cur = con.cursor() - cur.execute( - "SELECT pkid FROM idx_tz_world_geom " - "WHERE xmin<=? AND xmax>=? AND ymin<=? AND ymax>=?", - (lon, lon, lat, lat), - ) - candidates = [r[0] for r in cur.fetchall()] - if not candidates: - return None - pt = Point(lon, lat) - for pk in candidates: - row = cur.execute( - "SELECT tzid, geom FROM tz_world WHERE pk_uid=?", (pk,) - ).fetchone() - if row and wkb.loads(_spatialite_blob_to_wkb(row[1])).contains(pt): - return row[0] - return None - finally: - con.close() - - -def _reverse_landclass(lat, lon): - """Most-specific PAD-US land class for the point, looked up in-process. - Returns None when there is no coverage or landclass is unavailable.""" - from .landclass import lookup_landclass, format_summary - return format_summary(lookup_landclass(lat, lon)) - - -def _reverse_elevation(lat, lon): - """Elevation in metres from the planet-DEM PMTiles — the single elevation - source per OFFROUTE-ARCHITECTURE.md §9. None on failure, on untiled points - (e.g. true ocean), or if DEMReader could not be initialized at startup.""" - if _DEM is None: - return None - return _DEM.sample_point(lat, lon) - - -@geocode_bp.route('/api/reverse//') -def api_reverse_bundle(lat, lon): - """Localhost-sourced reverse-geocode enrichment bundle for Central. - - GET /api/reverse// - - Always returns 200 with EXACTLY these keys (any may be null): - name, city, county, state, country, postal_code, timezone, landclass, elevation_m - - lat/lon are parsed manually (not via Flask's converter, which - rejects negative and integer coordinates) so out-of-range or unparseable - input yields 400 per contract; 503 is reserved for catastrophic failure. - """ - try: - lat = float(lat) - lon = float(lon) - except (ValueError, TypeError): - return jsonify({'error': 'lat and lon must be numbers'}), 400 - if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): - return jsonify({'error': 'lat must be -90..90, lon must be -180..180'}), 400 - - key = (round(lat, 4), round(lon, 4)) - with _REVERSE_BUNDLE_LOCK: - cached = _REVERSE_BUNDLE_CACHE.get(key) - if cached is not None: - return jsonify(cached) - - bundle = {k: None for k in _BUNDLE_KEYS} - - try: - bundle.update(_reverse_photon(lat, lon)) - except Exception: - logger.warning("reverse-bundle: Photon lookup failed for %s,%s", lat, lon) - try: - bundle['timezone'] = _reverse_timezone(lat, lon) - except Exception: - logger.warning("reverse-bundle: timezone lookup failed for %s,%s", lat, lon) - try: - bundle['landclass'] = _reverse_landclass(lat, lon) - except Exception: - logger.warning("reverse-bundle: landclass lookup failed for %s,%s", lat, lon) - try: - bundle['elevation_m'] = _reverse_elevation(lat, lon) - except Exception: - logger.warning("reverse-bundle: elevation lookup failed for %s,%s", lat, lon) - - with _REVERSE_BUNDLE_LOCK: - _REVERSE_BUNDLE_CACHE[key] = bundle - return jsonify(bundle) diff --git a/lib/reverse_bundle_test.py b/lib/reverse_bundle_test.py deleted file mode 100644 index 6defd9e..0000000 --- a/lib/reverse_bundle_test.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for the /api/reverse// enrichment bundle (lib.netsyms_api). - -Photon/DEM/landclass are mocked so the suite runs without live services; -one timezone test exercises the real SpatiaLite DB when it is present. Plain -asserts + a __main__ runner, matching the rest of lib/*_test.py. -""" - -import os -import sys -from unittest import mock - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from flask import Flask -from lib import netsyms_api - -EXPECTED_KEYS = set(netsyms_api._BUNDLE_KEYS) - - -def _client(): - app = Flask(__name__) - app.register_blueprint(netsyms_api.geocode_bp) - return app.test_client() - - -def _clear_cache(): - netsyms_api._REVERSE_BUNDLE_CACHE.clear() - - -def test_happy_path(): - _clear_cache() - with mock.patch.object(netsyms_api, '_reverse_photon', return_value={ - 'name': 'Where you are', 'city': 'Boise', 'county': 'Ada', - 'state': 'Idaho', 'country': 'United States', 'postal_code': '83701'}), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value='America/Boise'), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value='Boise National Forest'), \ - mock.patch.object(netsyms_api, '_reverse_elevation', return_value=824): - resp = _client().get('/api/reverse/43.6150/-116.2023') - assert resp.status_code == 200, resp.status_code - data = resp.get_json() - assert set(data.keys()) == EXPECTED_KEYS, data.keys() - assert data['city'] == 'Boise' and data['timezone'] == 'America/Boise' - assert data['landclass'] == 'Boise National Forest' and data['elevation_m'] == 824 - print(" PASS: happy path — all 9 fields populated, exact key set") - - -def test_negative_and_integer_coords_parse(): - # Regression: Flask's converter would 404 these; manual parse must not. - _clear_cache() - with mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_elevation', return_value=None): - for path in ('/api/reverse/43.6/-116.2', '/api/reverse/43/-116'): - resp = _client().get(path) - assert resp.status_code == 200, f"{path} -> {resp.status_code}" - assert set(resp.get_json().keys()) == EXPECTED_KEYS - print(" PASS: negative and integer coordinates parse (200, not 404)") - - -def test_partial_failure_returns_200_with_nulls(): - _clear_cache() - with mock.patch.object(netsyms_api, '_reverse_photon', - side_effect=RuntimeError('photon down')), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value='America/Boise'), \ - mock.patch.object(netsyms_api, '_reverse_landclass', - side_effect=RuntimeError('postgis down')), \ - mock.patch.object(netsyms_api, '_reverse_elevation', return_value=824): - resp = _client().get('/api/reverse/43.6150/-116.2023') - assert resp.status_code == 200, resp.status_code - data = resp.get_json() - assert set(data.keys()) == EXPECTED_KEYS - assert data['name'] is None and data['city'] is None # photon failed -> nulls - assert data['landclass'] is None # landclass failed -> null - assert data['timezone'] == 'America/Boise' and data['elevation_m'] == 824 - print(" PASS: per-component failure -> 200 with nulls, no 5xx") - - -def test_ocean_point_mostly_null(): - _clear_cache() - with mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value='Etc/GMT+2'), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_elevation', return_value=0): - resp = _client().get('/api/reverse/0.0/-30.0') - assert resp.status_code == 200, resp.status_code - data = resp.get_json() - assert set(data.keys()) == EXPECTED_KEYS - assert data['city'] is None and data['country'] is None and data['landclass'] is None - print(" PASS: ocean point -> 200, mostly null") - - -def test_invalid_input_400(): - _clear_cache() - client = _client() - for path in ('/api/reverse/9999/0', '/api/reverse/0/9999', '/api/reverse/abc/0'): - resp = client.get(path) - assert resp.status_code == 400, f"{path} -> {resp.status_code}" - print(" PASS: out-of-range / unparseable input -> 400") - - -def test_cache_hit_serves_without_recompute(): - _clear_cache() - with mock.patch.object(netsyms_api, '_reverse_photon', - return_value={'name': 'X'}) as m_photon, \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_elevation', return_value=None): - client = _client() - client.get('/api/reverse/12.3456/-65.4321') - client.get('/api/reverse/12.3456/-65.4321') # same key (rounded) -> cached - assert m_photon.call_count == 1, f"expected 1 compute, got {m_photon.call_count}" - print(" PASS: second identical request served from cache (no recompute)") - - -def test_real_timezone_db(): - if not os.path.exists(netsyms_api._TZ_DB_PATH): - print(" SKIP: real timezone test (timezones.sqlite not present)") - return - assert netsyms_api._reverse_timezone(43.6150, -116.2023) == 'America/Boise' - assert netsyms_api._reverse_timezone(40.7128, -74.0060) == 'America/New_York' - print(" PASS: real timezones.sqlite point-in-polygon") - - -def test_elevation_from_dem_reader_mock(): - # elevation_m comes from DEMReader.sample_point (not Valhalla); other - # components stubbed to null so the bundle is hermetic. - _clear_cache() - fake_dem = mock.Mock() - fake_dem.sample_point.return_value = 824 - with mock.patch.object(netsyms_api, '_DEM', fake_dem), \ - mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None): - resp = _client().get('/api/reverse/43.6150/-116.2023') - assert resp.status_code == 200, resp.status_code - data = resp.get_json() - assert set(data.keys()) == EXPECTED_KEYS - assert data['elevation_m'] == 824, data['elevation_m'] - fake_dem.sample_point.assert_called_once() - print(" PASS: elevation_m sourced from DEMReader.sample_point") - - -def test_elevation_dem_unavailable(): - # DEMReader failed to init at startup (_DEM is None) -> elevation_m null, 200. - _clear_cache() - with mock.patch.object(netsyms_api, '_DEM', None), \ - mock.patch.object(netsyms_api, '_reverse_photon', return_value={}), \ - mock.patch.object(netsyms_api, '_reverse_timezone', return_value=None), \ - mock.patch.object(netsyms_api, '_reverse_landclass', return_value=None): - resp = _client().get('/api/reverse/43.6150/-116.2023') - assert resp.status_code == 200, resp.status_code - data = resp.get_json() - assert set(data.keys()) == EXPECTED_KEYS - assert data['elevation_m'] is None - print(" PASS: DEMReader unavailable -> elevation_m null, still 200") - - -if __name__ == '__main__': - print("Running reverse-bundle tests...") - test_happy_path() - test_negative_and_integer_coords_parse() - test_partial_failure_returns_200_with_nulls() - test_ocean_point_mostly_null() - test_invalid_input_400() - test_cache_hit_serves_without_recompute() - test_real_timezone_db() - test_elevation_from_dem_reader_mock() - test_elevation_dem_unavailable() - print("All tests passed.") From 1f05d4b4d6ce05f6682b9d43f5a025a7fc58e36e Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 04:14:47 -0600 Subject: [PATCH 63/72] cleanup: remove /api/landclass handler + lib/landclass.py (extraction #4 shadow) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /api/landclass is edge-shadowed since extraction #4 — navi-landclass :8424 serves the route via nginx. Cleanup #4 removed the last in-process consumer (netsyms_api._reverse_landclass), so lib/landclass.py is now fully orphaned. - api.py: drop the @app.route('/api/landclass') handler + the `from .landclass import lookup_landclass, format_summary` import. - DELETE lib/landclass.py (only consumer was the deleted handler). - DELETE lib/landclass_test.py (SUT gone). PADUS_DB_* vars in /opt/recon/.env are now dead in recon — flagged for an out-of-band post-merge cleanup, not touched here (data, not code). Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 33 ------ lib/landclass.py | 255 ------------------------------------------ lib/landclass_test.py | 44 -------- 3 files changed, 332 deletions(-) delete mode 100644 lib/landclass.py delete mode 100644 lib/landclass_test.py diff --git a/lib/api.py b/lib/api.py index e4a186f..9d57de1 100644 --- a/lib/api.py +++ b/lib/api.py @@ -25,7 +25,6 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB from .deployment_config import get_deployment_config -from .landclass import lookup_landclass, format_summary logger = setup_logging('recon.api') @@ -1170,38 +1169,6 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) -@app.route('/api/landclass') -def api_landclass(): - """PAD-US land classification lookup for a point.""" - config = get_deployment_config() - if not config.get('features', {}).get('has_landclass'): - return jsonify({'error': 'Land classification not available'}), 404 - - try: - lat = float(request.args.get('lat', '')) - lon = float(request.args.get('lon', '')) - except (ValueError, TypeError): - return jsonify({'error': 'lat and lon required as numbers'}), 400 - - if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): - return jsonify({'error': 'lat must be -90..90, lon must be -180..180'}), 400 - - classifications = lookup_landclass(lat, lon) - is_public = len(classifications) > 0 - is_private = len(classifications) == 0 - summary = format_summary(classifications) - - return jsonify({ - 'lat': lat, - 'lon': lon, - 'classifications': classifications, - 'count': len(classifications), - 'is_public': is_public, - 'is_private': is_private, - 'summary': summary, - }) - - @app.route('/api/health') def api_health(): """Health check endpoint for monitoring.""" diff --git a/lib/landclass.py b/lib/landclass.py deleted file mode 100644 index 7760cce..0000000 --- a/lib/landclass.py +++ /dev/null @@ -1,255 +0,0 @@ -""" -PAD-US land classification lookup. - -Provides point-in-polygon queries against the USGS Protected Areas Database -(PAD-US) stored in a local PostGIS database. Returns land ownership, -management, and public access information for any lat/lon coordinate. - -Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, -functions return empty results gracefully (feature degrades, doesn't crash). -""" -import os - -import psycopg2 -import psycopg2.pool - -from .utils import setup_logging - -logger = setup_logging('recon.landclass') - -_pool = None -_pool_failed = False - -# ── Label mappings from PAD-US domain tables ──────────────────────────── -# Extracted from PADUS4_0_Geodatabase.gdb domain lookup layers. -# ogr2ogr lowercases all column names. - -AGENCY_NAME_MAP = { - 'TVA': 'Tennessee Valley Authority', - 'BLM': 'Bureau of Land Management', - 'BOEM': 'Bureau of Ocean Energy Management', - 'USBR': 'Bureau of Reclamation', - 'FWS': 'U.S. Fish and Wildlife Service', - 'USFS': 'Forest Service', - 'DOD': 'Department of Defense', - 'USACE': 'Army Corps of Engineers', - 'DOE': 'Department of Energy', - 'NPS': 'National Park Service', - 'NRCS': 'Natural Resources Conservation Service', - 'ARS': 'Agricultural Research Service', - 'BIA': 'Bureau of Indian Affairs', - 'NOAA': 'National Oceanic and Atmospheric Administration', - 'BPA': 'Bonneville Power Administration', - 'OTHF': 'Other or Unknown Federal Land', - 'TRIB': 'American Indian Lands', - 'SPR': 'State Park and Recreation', - 'SDC': 'State Department of Conservation', - 'SLB': 'State Land Board', -} - -AGENCY_TYPE_MAP = { - 'FED': 'Federal', - 'TRIB': 'American Indian Lands', - 'STAT': 'State', - 'DIST': 'Regional Agency Special District', - 'LOC': 'Local Government', - 'NGO': 'Non-Governmental Organization', - 'PVT': 'Private', - 'JNT': 'Joint', - 'UNK': 'Unknown', - 'TERR': 'Territorial', - 'DESG': 'Designation', -} - -DESIGNATION_TYPE_MAP = { - 'NP': 'National Park', - 'NM': 'National Monument', - 'NCA': 'Conservation Area', - 'NF': 'National Forest', - 'NG': 'National Grassland', - 'PUB': 'National Public Lands', - 'NT': 'National Scenic or Historic Trail', - 'NWR': 'National Wildlife Refuge', - 'WA': 'Wilderness Area', - 'WSR': 'Wild and Scenic River', - 'WSA': 'Wilderness Study Area', - 'MPA': 'Marine Protected Area', - 'NRA': 'National Recreation Area', - 'NSBV': 'National Scenic, Botanical or Volcanic Area', - 'NLS': 'National Lakeshore or Seashore', - 'IRA': 'Inventoried Roadless Area', - 'ACEC': 'Area of Critical Environmental Concern', - 'RNA': 'Research Natural Area', - 'REC': 'Recreation Management Area', - 'RMA': 'Resource Management Area', - 'WPA': 'Watershed Protection Area', - 'REA': 'Research or Educational Area', - 'HCA': 'Historic or Cultural Area', - 'MIT': 'Mitigation Land or Bank', - 'MIL': 'Military Land', - 'ACC': 'Access Area', - 'SDA': 'Special Designation Area', - 'PROC': 'Approved or Proclamation Boundary', - 'FOTH': 'Federal Other or Unknown', - 'ND': 'Not Designated', -} - -PUBLIC_ACCESS_MAP = { - 'OA': 'Open Access', - 'RA': 'Restricted Access', - 'XA': 'Closed', - 'UK': 'Unknown', -} - -GAP_STATUS_MAP = { - '1': 'Managed for biodiversity (disturbance events proceed)', - '2': 'Managed for biodiversity (disturbance suppressed)', - '3': 'Multiple uses (extractive/OHV)', - '4': 'No known mandate for biodiversity protection', -} - -CATEGORY_MAP = { - 'Fee': 'Fee', - 'Easement': 'Easement', - 'Other': 'Other', - 'Unknown': 'Unknown', - 'Designation': 'Designation', - 'Marine': 'Marine Area', - 'Proclamation': 'Approved, Proclamation or Extent Boundary', -} - -STATE_MAP = { - 'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', - 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', - 'DC': 'District of Columbia', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', - 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', - 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', - 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', - 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', - 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', - 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', - 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', - 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', - 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', - 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', -} - - -def _decode(code, label_map): - """Decode a PAD-US code using a label map. Returns decoded label or the raw code.""" - if not code: - return '' - code = str(code).strip() - return label_map.get(code, code) - - -def _get_pool(): - """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" - global _pool, _pool_failed - if _pool is not None: - return _pool - if _pool_failed: - return None - - try: - _pool = psycopg2.pool.SimpleConnectionPool( - minconn=1, - maxconn=3, - host=os.environ.get('PADUS_DB_HOST', 'localhost'), - port=int(os.environ.get('PADUS_DB_PORT', '5432')), - dbname=os.environ.get('PADUS_DB_NAME', 'padus'), - user=os.environ.get('PADUS_DB_USER', 'overture'), - password=os.environ.get('PADUS_DB_PASSWORD', ''), - connect_timeout=5, - ) - logger.info("PAD-US PostgreSQL connection pool initialized") - return _pool - except Exception as e: - _pool_failed = True - logger.warning(f"PAD-US PostgreSQL unavailable, land classification disabled: {e}") - return None - - -def _query_all(sql, params): - """Execute a query and return all rows as a list of dicts, or empty list.""" - pool = _get_pool() - if pool is None: - return [] - - conn = None - try: - conn = pool.getconn() - with conn.cursor() as cur: - cur.execute(sql, params) - rows = cur.fetchall() - if not rows: - return [] - cols = [desc[0] for desc in cur.description] - return [dict(zip(cols, row)) for row in rows] - except Exception as e: - logger.warning(f"PAD-US query error: {e}") - if conn: - try: - conn.rollback() - except Exception: - pass - return [] - finally: - if conn: - try: - pool.putconn(conn) - except Exception: - pass - - -def lookup_landclass(lat, lon): - """ - Look up PAD-US land classifications for a point. - - Returns a list of classification dicts, ordered by area ascending - (smallest/most specific first). Empty list on error or no results. - """ - rows = _query_all( - """SELECT unit_nm, mang_name, mang_type, own_name, own_type, - des_tp, gap_sts, pub_access, category, gis_acres, state_nm - FROM pad_units - WHERE ST_Intersects(geom, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) - -- exclude antimeridian-wrapping polygons: 47 BOEM marine artifacts - -- span ~360 deg longitude and false-match non-US points at their lat band - AND (ST_XMax(geom) - ST_XMin(geom)) < 60 - ORDER BY gis_acres ASC - LIMIT 10""", - (lon, lat) - ) - - results = [] - for row in rows: - pa_code = str(row.get('pub_access', '')).strip() - - results.append({ - 'unit_name': (row.get('unit_nm') or '').strip(), - 'manager_name': _decode(row.get('mang_name'), AGENCY_NAME_MAP), - 'manager_type': _decode(row.get('mang_type'), AGENCY_TYPE_MAP), - 'owner_type': _decode(row.get('own_type'), AGENCY_TYPE_MAP), - 'designation_type': _decode(row.get('des_tp'), DESIGNATION_TYPE_MAP), - 'gap_status': str(row.get('gap_sts', '')).strip(), - 'public_access': _decode(pa_code, PUBLIC_ACCESS_MAP), - 'public_access_code': pa_code, - 'category': _decode(row.get('category'), CATEGORY_MAP), - 'acres': row.get('gis_acres'), - 'state': _decode(row.get('state_nm'), STATE_MAP), - }) - - return results - - -def format_summary(classifications): - """ - Format a human-readable summary from classification results. - - Returns the most specific unit name, or None if no results. - """ - if not classifications: - return None - # First result is smallest/most specific (ordered by acres ASC) - return classifications[0].get('unit_name') or None diff --git a/lib/landclass_test.py b/lib/landclass_test.py deleted file mode 100644 index cba8ca7..0000000 --- a/lib/landclass_test.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for lib.landclass PAD-US lookups. - -Live-PostgreSQL regression test using the skip-if-not-available pattern -(matching test_real_timezone_db in reverse_bundle_test.py). Plain asserts + -a __main__ runner, matching the rest of lib/*_test.py. - -Note: lookup_landclass swallows DB errors and returns [] (it never raises), -so PG availability is probed via a known US point (Boise) rather than by -catching an exception. -""" - -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from lib import landclass - - -def test_landclass_no_antimeridian_false_match(): - # Yosemite doubles as the liveness probe: a point on real US public land. - # (lookup_landclass returns [] when PG is unreachable AND when the point is - # off public land, so the probe must be a known-public-land point — e.g. - # downtown Boise is private and would yield [] even with PG up.) - yosemite = landclass.lookup_landclass(37.85, -119.55) - if not yosemite: - print(" SKIP: live PG not available (Yosemite returned no rows)") - return - # Filter must NOT drop legitimate (non-wrapping) US units. - assert len(yosemite) >= 1, f"Yosemite should match >=1 PAD-US unit, got {len(yosemite)}" - - # London (51.5074 N) previously false-matched the antimeridian-wrapping - # 'Rat Islands' record (ogc_fid 3974, ~360 deg lon span). The < 60 deg - # filter must now drop it -> empty result. - london = landclass.lookup_landclass(51.5074, -0.1278) - assert london == [], f"London should match no PAD-US unit, got {[r.get('unit_name') for r in london]}" - print(" PASS: antimeridian filter drops London false-match, keeps Yosemite coverage") - - -if __name__ == '__main__': - print("Running landclass tests...") - test_landclass_no_antimeridian_false_match() - print("All tests passed.") From 86c902f7b5666890cf8024d2281780e572efebf2 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 04:25:54 -0600 Subject: [PATCH 64/72] cleanup: remove /api/offroute + /api/mvum handlers + lib/offroute/ package (extraction #8 shadow) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /api/offroute (POST) and /api/mvum (GET) are edge-shadowed since extraction #8 — navi-offroute :8428 serves both via nginx. Cleanup #4 removed the last in-process consumer of lib/offroute/dem.py (netsyms_api._reverse_elevation + the module-level _DEM = DEMReader()), so the entire 9-file lib/offroute/ package is now orphaned and goes with this PR. - api.py: drop both handlers (api_offroute, api_mvum) + their section comments. Both used in-function lazy imports of offroute, so no top-of-file import survives. - DELETE lib/offroute/ wholesale (__init__, router, mvum, cost, barriers, dem, friction, trails, prototype). prototype.py was already dead at runtime. Closes the recon->navi navi-shadow cleanup loop: recon now serves zero navi-* shadow routes. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 211 ----- lib/offroute/__init__.py | 1 - lib/offroute/barriers.py | 440 ---------- lib/offroute/cost.py | 494 ----------- lib/offroute/dem.py | 211 ----- lib/offroute/friction.py | 137 --- lib/offroute/mvum.py | 623 -------------- lib/offroute/prototype.py | 414 --------- lib/offroute/router.py | 1682 ------------------------------------- lib/offroute/trails.py | 174 ---- 10 files changed, 4387 deletions(-) delete mode 100644 lib/offroute/__init__.py delete mode 100644 lib/offroute/barriers.py delete mode 100644 lib/offroute/cost.py delete mode 100644 lib/offroute/dem.py delete mode 100644 lib/offroute/friction.py delete mode 100644 lib/offroute/mvum.py delete mode 100755 lib/offroute/prototype.py delete mode 100644 lib/offroute/router.py delete mode 100644 lib/offroute/trails.py diff --git a/lib/api.py b/lib/api.py index 9d57de1..3a7e5ca 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2553,214 +2553,3 @@ def api_auth_whoami(): 'authenticated': False, 'username': None, }) - - -# ── OFFROUTE API ── - -@app.route("/api/offroute", methods=["POST"]) -def api_offroute(): - """ - Off-network routing from wilderness to destination. - - Request body: - { - "start": [lat, lon], - "end": [lat, lon], - "mode": "foot" | "mtb" | "atv", (default: "foot") - "boundary_mode": "strict" | "pragmatic" | "emergency" (default: "pragmatic") - } - - Response: - { - "status": "ok", - "route": { GeoJSON FeatureCollection with wilderness + network segments }, - "summary": { total_distance_km, total_effort_minutes, ... } - } - """ - try: - data = request.get_json() - if not data: - return jsonify({"status": "error", "message": "No JSON body provided"}), 400 - - # Parse coordinates - start = data.get("start") - end = data.get("end") - - if not start or not end: - return jsonify({"status": "error", "message": "Missing start or end coordinates"}), 400 - - if not isinstance(start, (list, tuple)) or len(start) != 2: - return jsonify({"status": "error", "message": "start must be [lat, lon]"}), 400 - if not isinstance(end, (list, tuple)) or len(end) != 2: - return jsonify({"status": "error", "message": "end must be [lat, lon]"}), 400 - - start_lat, start_lon = float(start[0]), float(start[1]) - end_lat, end_lon = float(end[0]), float(end[1]) - - # Parse options - mode = data.get("mode", "foot") - if mode not in ("auto", "foot", "mtb", "atv", "vehicle"): - return jsonify({"status": "error", "message": "mode must be auto, foot, mtb, atv, or vehicle"}), 400 - - boundary_mode = data.get("boundary_mode", "pragmatic") - if boundary_mode not in ("strict", "pragmatic", "emergency"): - return jsonify({"status": "error", "message": "boundary_mode must be strict, pragmatic, or emergency"}), 400 - - # Import and run router - from .offroute.router import OffrouteRouter - - router = OffrouteRouter() - try: - result = router.route( - start_lat=start_lat, - start_lon=start_lon, - end_lat=end_lat, - end_lon=end_lon, - mode=mode, - boundary_mode=boundary_mode - ) - finally: - router.close() - - if result.get("status") == "error": - return jsonify(result), 400 - - return jsonify(result) - - except Exception as e: - logger.exception("Offroute error") - return jsonify({"status": "error", "message": str(e)}), 500 - - -# ── MVUM Places Panel API ── - -@app.route("/api/mvum", methods=["GET"]) -def api_mvum(): - """ - Query MVUM (Motor Vehicle Use Map) features near a point. - - Used by the Navi frontend places panel when a user taps near a road/trail. - - Query params: - lat: Latitude - lon: Longitude - radius: Search radius in meters (default: 50) - - Response: - { - "status": "ok", - "feature": { - "id": "FR 123", - "name": "Some Forest Road", - "forest": "Sawtooth National Forest", - "district": "Ketchum Ranger District", - "surface": "NAT", - "maintenance_level": 2, - "seasonal": "Seasonal", - "symbol": 2, - "access": { - "passenger_vehicle": { "status": "Open", "dates": "06/15-10/15" }, - "high_clearance": { "status": "Open", "dates": "06/15-10/15" }, - "atv": { "status": "Open", "dates": "06/15-10/15" }, - ... - }, - "geometry": { GeoJSON LineString } - } - } - - If no MVUM feature within radius: - { "status": "ok", "feature": null } - """ - try: - lat = request.args.get("lat", type=float) - lon = request.args.get("lon", type=float) - radius = request.args.get("radius", 50, type=float) - - if lat is None or lon is None: - return jsonify({"status": "error", "message": "lat and lon required"}), 400 - - from .offroute.mvum import MVUMReader - - reader = MVUMReader() - try: - # Try roads first, then trails - feature = reader.query_nearest(lat, lon, radius, "mvum_roads") - if feature is None: - feature = reader.query_nearest(lat, lon, radius, "mvum_trails") - - if feature is None: - return jsonify({"status": "ok", "feature": None}) - - # Format access info - access = { - "passenger_vehicle": { - "status": feature.get("passengervehicle"), - "dates": feature.get("passengervehicle_datesopen") - }, - "high_clearance": { - "status": feature.get("highclearancevehicle"), - "dates": feature.get("highclearancevehicle_datesopen") - }, - "atv": { - "status": feature.get("atv"), - "dates": feature.get("atv_datesopen") - }, - "motorcycle": { - "status": feature.get("motorcycle"), - "dates": feature.get("motorcycle_datesopen") - }, - "4wd_gt50": { - "status": feature.get("fourwd_gt50inches"), - "dates": feature.get("fourwd_gt50_datesopen") - }, - "2wd_gt50": { - "status": feature.get("twowd_gt50inches"), - "dates": feature.get("twowd_gt50_datesopen") - }, - "e_bike_class1": { - "status": feature.get("e_bike_class1"), - "dates": feature.get("e_bike_class1_dur") - }, - "e_bike_class2": { - "status": feature.get("e_bike_class2"), - "dates": feature.get("e_bike_class2_dur") - }, - "e_bike_class3": { - "status": feature.get("e_bike_class3"), - "dates": feature.get("e_bike_class3_dur") - }, - } - - # Parse maintenance level - maint_level = feature.get("operationalmaintlevel", "") - maint_num = None - if maint_level: - # Extract first digit: "2 - HIGH CLEARANCE VEHICLES" -> 2 - import re - match = re.match(r"(\d+)", maint_level) - if match: - maint_num = int(match.group(1)) - - result = { - "id": feature.get("id"), - "name": feature.get("name"), - "forest": feature.get("forestname"), - "district": feature.get("districtname"), - "surface": feature.get("surfacetype"), - "maintenance_level": maint_num, - "seasonal": feature.get("seasonal"), - "symbol": feature.get("symbol"), - "trail_class": feature.get("trailclass"), - "trail_system": feature.get("trailsystem"), - "access": access, - "geometry": feature.get("geojson") - } - - return jsonify({"status": "ok", "feature": result}) - - finally: - reader.close() - - except Exception as e: - logger.exception("MVUM query error") - return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/lib/offroute/__init__.py b/lib/offroute/__init__.py deleted file mode 100644 index b0536cd..0000000 --- a/lib/offroute/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""OFFROUTE: Off-network effort-based routing module.""" diff --git a/lib/offroute/barriers.py b/lib/offroute/barriers.py deleted file mode 100644 index f68e892..0000000 --- a/lib/offroute/barriers.py +++ /dev/null @@ -1,440 +0,0 @@ -""" -PAD-US barrier and wilderness layers for OFFROUTE. - -Provides access to: -1. Barrier raster (Pub_Access = 'XA' - closed/restricted areas) -2. Wilderness raster (Des_Tp = 'WA' - designated wilderness areas) - -Build functions rasterize PAD-US geodatabase to aligned GeoTIFFs. -Runtime functions read the rasters and resample to match elevation grids. -""" -import numpy as np -from pathlib import Path -from typing import Tuple, Optional -import subprocess -import tempfile -import os - -try: - import rasterio - from rasterio.windows import from_bounds - from rasterio.enums import Resampling -except ImportError: - raise ImportError("rasterio is required for barriers layer support") - -# Paths -DEFAULT_BARRIERS_PATH = Path("/mnt/nav/worldcover/padus_barriers.tif") -DEFAULT_WILDERNESS_PATH = Path("/mnt/nav/worldcover/wilderness.tif") -PADUS_GDB_PATH = Path("/mnt/nav/padus/PADUS4_0_Geodatabase.gdb") -PADUS_LAYER = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement" - -# CONUS bounding box in WGS84 -CONUS_BOUNDS = { - "west": -125.0, - "east": -66.0, - "south": 24.0, - "north": 50.0, -} - -# Resolution in degrees (~30m at mid-latitudes) -PIXEL_SIZE = 0.0003 # ~33m - - -class BarrierReader: - """Reader for PAD-US barrier raster (closed/restricted areas).""" - - def __init__(self, barrier_path: Path = DEFAULT_BARRIERS_PATH): - self.barrier_path = barrier_path - self._dataset = None - - def _open(self): - """Lazy open the dataset.""" - if self._dataset is None: - if not self.barrier_path.exists(): - raise FileNotFoundError( - f"Barrier raster not found at {self.barrier_path}. " - f"Run build_barriers_raster() first." - ) - self._dataset = rasterio.open(self.barrier_path) - return self._dataset - - def get_barrier_grid( - self, - south: float, - north: float, - west: float, - east: float, - target_shape: Tuple[int, int] - ) -> np.ndarray: - """ - Get barrier values for a bounding box, resampled to target shape. - - Args: - south, north, west, east: Bounding box coordinates (WGS84) - target_shape: (rows, cols) to resample to (matches elevation grid) - - Returns: - np.ndarray of uint8 barrier values: - 255 = closed/restricted (impassable when respect_boundaries=True) - 0 = public/accessible - """ - ds = self._open() - window = from_bounds(west, south, east, north, ds.transform) - barriers = ds.read( - 1, - window=window, - out_shape=target_shape, - resampling=Resampling.nearest - ) - return barriers - - def sample_point(self, lat: float, lon: float) -> int: - """Sample barrier value at a single point.""" - ds = self._open() - row, col = ds.index(lon, lat) - if row < 0 or row >= ds.height or col < 0 or col >= ds.width: - return 0 - window = rasterio.windows.Window(col, row, 1, 1) - value = ds.read(1, window=window) - return int(value[0, 0]) - - def close(self): - """Close the dataset.""" - if self._dataset is not None: - self._dataset.close() - self._dataset = None - - -class WildernessReader: - """Reader for PAD-US wilderness raster (designated wilderness areas).""" - - def __init__(self, wilderness_path: Path = DEFAULT_WILDERNESS_PATH): - self.wilderness_path = wilderness_path - self._dataset = None - - def _open(self): - """Lazy open the dataset.""" - if self._dataset is None: - if not self.wilderness_path.exists(): - raise FileNotFoundError( - f"Wilderness raster not found at {self.wilderness_path}. " - f"Run build_wilderness_raster() first." - ) - self._dataset = rasterio.open(self.wilderness_path) - return self._dataset - - def get_wilderness_grid( - self, - south: float, - north: float, - west: float, - east: float, - target_shape: Tuple[int, int] - ) -> np.ndarray: - """ - Get wilderness values for a bounding box, resampled to target shape. - - Args: - south, north, west, east: Bounding box coordinates (WGS84) - target_shape: (rows, cols) to resample to (matches elevation grid) - - Returns: - np.ndarray of uint8 wilderness values: - 255 = designated wilderness area - 0 = not wilderness - """ - ds = self._open() - window = from_bounds(west, south, east, north, ds.transform) - wilderness = ds.read( - 1, - window=window, - out_shape=target_shape, - resampling=Resampling.nearest - ) - return wilderness - - def sample_point(self, lat: float, lon: float) -> int: - """Sample wilderness value at a single point.""" - ds = self._open() - row, col = ds.index(lon, lat) - if row < 0 or row >= ds.height or col < 0 or col >= ds.width: - return 0 - window = rasterio.windows.Window(col, row, 1, 1) - value = ds.read(1, window=window) - return int(value[0, 0]) - - def close(self): - """Close the dataset.""" - if self._dataset is not None: - self._dataset.close() - self._dataset = None - - -def build_barriers_raster( - output_path: Path = DEFAULT_BARRIERS_PATH, - gdb_path: Path = PADUS_GDB_PATH, - pixel_size: float = PIXEL_SIZE, - bounds: dict = CONUS_BOUNDS, -) -> Path: - """ - Build the PAD-US barriers raster from the source geodatabase. - - Extracts polygons where Pub_Access = 'XA' (Closed) and rasterizes them. - """ - import shutil - - if not gdb_path.exists(): - raise FileNotFoundError(f"PAD-US geodatabase not found at {gdb_path}") - - if not shutil.which('ogr2ogr'): - raise RuntimeError("ogr2ogr not found. Install GDAL.") - if not shutil.which('gdal_rasterize'): - raise RuntimeError("gdal_rasterize not found. Install GDAL.") - - output_path.parent.mkdir(parents=True, exist_ok=True) - - print(f"Building PAD-US barriers raster...") - print(f" Source: {gdb_path}") - print(f" Output: {output_path}") - print(f" Pixel size: {pixel_size} degrees (~{pixel_size * 111000:.0f}m)") - print(f" Bounds: {bounds}") - - with tempfile.TemporaryDirectory() as tmpdir: - closed_gpkg = Path(tmpdir) / "closed_areas.gpkg" - - print(f"\n[1/3] Extracting closed areas (Pub_Access = 'XA')...") - - ogr_cmd = [ - "ogr2ogr", - "-f", "GPKG", - str(closed_gpkg), - str(gdb_path), - PADUS_LAYER, - "-where", "Pub_Access = 'XA'", - "-t_srs", "EPSG:4326", - "-nlt", "MULTIPOLYGON", - "-nln", "closed_areas", - ] - - result = subprocess.run(ogr_cmd, capture_output=True, text=True) - if result.returncode != 0: - print(f"STDERR: {result.stderr}") - raise RuntimeError(f"ogr2ogr failed: {result.stderr}") - - info_cmd = ["ogrinfo", "-so", str(closed_gpkg), "closed_areas"] - info_result = subprocess.run(info_cmd, capture_output=True, text=True) - print(f" Extraction result:\n{info_result.stdout}") - - print(f"\n[2/3] Creating raster grid...") - - width = int((bounds['east'] - bounds['west']) / pixel_size) - height = int((bounds['north'] - bounds['south']) / pixel_size) - print(f" Grid size: {width} x {height} pixels") - - print(f"\n[3/3] Rasterizing closed areas...") - - rasterize_cmd = [ - "gdal_rasterize", - "-burn", "255", - "-init", "0", - "-a_nodata", "0", - "-te", str(bounds['west']), str(bounds['south']), - str(bounds['east']), str(bounds['north']), - "-tr", str(pixel_size), str(pixel_size), - "-ot", "Byte", - "-co", "COMPRESS=LZW", - "-co", "TILED=YES", - "-l", "closed_areas", - str(closed_gpkg), - str(output_path), - ] - - result = subprocess.run(rasterize_cmd, capture_output=True, text=True) - if result.returncode != 0: - print(f"STDERR: {result.stderr}") - raise RuntimeError(f"gdal_rasterize failed: {result.stderr}") - - print(f"\n[Done] Verifying output...") - with rasterio.open(output_path) as ds: - print(f" Size: {ds.width} x {ds.height}") - print(f" CRS: {ds.crs}") - sample = ds.read(1, window=rasterio.windows.Window(0, 0, 1000, 1000)) - closed_count = np.sum(sample == 255) - print(f" Sample (1000x1000): {closed_count} closed cells") - - file_size = output_path.stat().st_size / (1024**2) - print(f" File size: {file_size:.1f} MB") - - return output_path - - -def build_wilderness_raster( - output_path: Path = DEFAULT_WILDERNESS_PATH, - gdb_path: Path = PADUS_GDB_PATH, - pixel_size: float = PIXEL_SIZE, - bounds: dict = CONUS_BOUNDS, -) -> Path: - """ - Build the PAD-US wilderness raster from the source geodatabase. - - Extracts polygons where Des_Tp = 'WA' (Wilderness Area) and rasterizes them. - """ - import shutil - - if not gdb_path.exists(): - raise FileNotFoundError(f"PAD-US geodatabase not found at {gdb_path}") - - if not shutil.which('ogr2ogr'): - raise RuntimeError("ogr2ogr not found. Install GDAL.") - if not shutil.which('gdal_rasterize'): - raise RuntimeError("gdal_rasterize not found. Install GDAL.") - - output_path.parent.mkdir(parents=True, exist_ok=True) - - print(f"Building PAD-US wilderness raster...") - print(f" Source: {gdb_path}") - print(f" Output: {output_path}") - print(f" Pixel size: {pixel_size} degrees (~{pixel_size * 111000:.0f}m)") - print(f" Bounds: {bounds}") - - with tempfile.TemporaryDirectory() as tmpdir: - wilderness_gpkg = Path(tmpdir) / "wilderness_areas.gpkg" - - print(f"\n[1/3] Extracting wilderness areas (Des_Tp = 'WA')...") - - ogr_cmd = [ - "ogr2ogr", - "-f", "GPKG", - str(wilderness_gpkg), - str(gdb_path), - PADUS_LAYER, - "-where", "Des_Tp = 'WA'", - "-t_srs", "EPSG:4326", - "-nlt", "MULTIPOLYGON", - "-nln", "wilderness_areas", - ] - - result = subprocess.run(ogr_cmd, capture_output=True, text=True) - if result.returncode != 0: - print(f"STDERR: {result.stderr}") - raise RuntimeError(f"ogr2ogr failed: {result.stderr}") - - info_cmd = ["ogrinfo", "-so", str(wilderness_gpkg), "wilderness_areas"] - info_result = subprocess.run(info_cmd, capture_output=True, text=True) - print(f" Extraction result:\n{info_result.stdout}") - - print(f"\n[2/3] Creating raster grid...") - - width = int((bounds['east'] - bounds['west']) / pixel_size) - height = int((bounds['north'] - bounds['south']) / pixel_size) - print(f" Grid size: {width} x {height} pixels") - - print(f"\n[3/3] Rasterizing wilderness areas...") - - rasterize_cmd = [ - "gdal_rasterize", - "-burn", "255", - "-init", "0", - "-a_nodata", "0", - "-te", str(bounds['west']), str(bounds['south']), - str(bounds['east']), str(bounds['north']), - "-tr", str(pixel_size), str(pixel_size), - "-ot", "Byte", - "-co", "COMPRESS=LZW", - "-co", "TILED=YES", - "-l", "wilderness_areas", - str(wilderness_gpkg), - str(output_path), - ] - - result = subprocess.run(rasterize_cmd, capture_output=True, text=True) - if result.returncode != 0: - print(f"STDERR: {result.stderr}") - raise RuntimeError(f"gdal_rasterize failed: {result.stderr}") - - print(f"\n[Done] Verifying output...") - with rasterio.open(output_path) as ds: - print(f" Size: {ds.width} x {ds.height}") - print(f" CRS: {ds.crs}") - sample = ds.read(1, window=rasterio.windows.Window(0, 0, 1000, 1000)) - wilderness_count = np.sum(sample == 255) - print(f" Sample (1000x1000): {wilderness_count} wilderness cells") - - file_size = output_path.stat().st_size / (1024**2) - print(f" File size: {file_size:.1f} MB") - - return output_path - - -if __name__ == "__main__": - import sys - - if len(sys.argv) > 1: - cmd = sys.argv[1] - - if cmd == "build": - print("=" * 60) - print("PAD-US Barriers Raster Build") - print("=" * 60) - build_barriers_raster() - - elif cmd == "build-wilderness": - print("=" * 60) - print("PAD-US Wilderness Raster Build") - print("=" * 60) - build_wilderness_raster() - - elif cmd == "build-all": - print("=" * 60) - print("Building all PAD-US rasters") - print("=" * 60) - build_barriers_raster() - print("\n") - build_wilderness_raster() - - else: - print(f"Unknown command: {cmd}") - print("Usage:") - print(" python barriers.py build # Build barriers raster") - print(" python barriers.py build-wilderness # Build wilderness raster") - print(" python barriers.py build-all # Build both rasters") - sys.exit(1) - - else: - # Test readers - print("Testing BarrierReader...") - - if not DEFAULT_BARRIERS_PATH.exists(): - print(f"Barrier raster not found at {DEFAULT_BARRIERS_PATH}") - print(f"Run: python barriers.py build") - sys.exit(1) - - reader = BarrierReader() - barriers = reader.get_barrier_grid( - south=42.2, north=42.6, west=-114.8, east=-113.8, - target_shape=(400, 1000) - ) - print(f"\nBarrier grid shape: {barriers.shape}") - print(f"Unique values: {np.unique(barriers)}") - closed_cells = np.sum(barriers == 255) - print(f"Closed cells: {closed_cells} ({100*closed_cells/barriers.size:.2f}%)") - reader.close() - - print("\nTesting WildernessReader...") - - if not DEFAULT_WILDERNESS_PATH.exists(): - print(f"Wilderness raster not found at {DEFAULT_WILDERNESS_PATH}") - print(f"Run: python barriers.py build-wilderness") - else: - wilderness_reader = WildernessReader() - wilderness = wilderness_reader.get_wilderness_grid( - south=42.2, north=42.6, west=-114.8, east=-113.8, - target_shape=(400, 1000) - ) - print(f"Wilderness grid shape: {wilderness.shape}") - print(f"Unique values: {np.unique(wilderness)}") - wilderness_cells = np.sum(wilderness == 255) - print(f"Wilderness cells: {wilderness_cells} ({100*wilderness_cells/wilderness.size:.2f}%)") - wilderness_reader.close() - - print("\nDone.") diff --git a/lib/offroute/cost.py b/lib/offroute/cost.py deleted file mode 100644 index 16b8514..0000000 --- a/lib/offroute/cost.py +++ /dev/null @@ -1,494 +0,0 @@ -""" -Multi-mode travel cost functions for OFFROUTE. - -Supports four travel modes: foot, mtb, atv, vehicle. -Each mode has its own speed function, max slope, trail access rules, -and terrain friction overrides. - -Mode profiles are data-driven — adding a new mode means adding a profile entry. -""" -import math -import numpy as np -from dataclasses import dataclass, field -from typing import Optional, Literal, Dict, Callable - -# ═══════════════════════════════════════════════════════════════════════════════ -# SPEED FUNCTIONS -# ═══════════════════════════════════════════════════════════════════════════════ - -def tobler_off_path_speed(grade: np.ndarray, base_speed: float = 6.0) -> np.ndarray: - """ - Tobler off-path hiking function. - - W = 0.6 * base_speed * exp(-3.5 * |S + 0.05|) - - Peak ~3.6 km/h at grade = -0.05 (slight downhill). - The 0.6 multiplier is the off-trail penalty. - """ - return 0.6 * base_speed * np.exp(-3.5 * np.abs(grade + 0.05)) - - -def herzog_wheeled_speed(grade: np.ndarray, base_speed: float = 12.0) -> np.ndarray: - """ - Herzog wheeled-transport polynomial. - - Relative speed factor: - 1 / (1337.8·S^6 + 278.19·S^5 − 517.39·S^4 − 78.199·S^3 + 93.419·S^2 + 19.825·|S| + 1.64) - - Multiply by base_speed to get km/h. - """ - S = grade - S_abs = np.abs(S) - - # Herzog polynomial (returns relative speed factor 0-1) - denom = (1337.8 * S**6 + 278.19 * S**5 - 517.39 * S**4 - - 78.199 * S**3 + 93.419 * S**2 + 19.825 * S_abs + 1.64) - - # Avoid division by zero and negative speeds - denom = np.maximum(denom, 0.1) - rel_speed = 1.0 / denom - - # Clamp relative speed to reasonable bounds (0.05 to 1.5) - rel_speed = np.clip(rel_speed, 0.05, 1.5) - - return base_speed * rel_speed - - -def linear_degrade_speed(grade: np.ndarray, base_speed: float = 40.0, max_grade: float = 0.364) -> np.ndarray: - """ - Linear speed degradation with slope. - - speed = base_speed * max(0, 1 - |grade| / max_grade) - - max_grade = tan(20°) ≈ 0.364 for 20° max slope. - """ - speed = base_speed * np.maximum(0, 1.0 - np.abs(grade) / max_grade) - return np.maximum(speed, 0.1) # Minimum crawl speed - - -# ═══════════════════════════════════════════════════════════════════════════════ -# MODE PROFILES (Data-driven configuration) -# ═══════════════════════════════════════════════════════════════════════════════ - -@dataclass -class ModeProfile: - """Configuration for a travel mode.""" - - name: str - description: str - - # Speed function parameters - speed_function: str # "tobler", "herzog", "linear" - base_speed_kmh: float - max_slope_deg: float - - # Trail access: trail_value -> friction multiplier (None = impassable) - # Trail values: 5=road, 15=track, 25=foot trail - trail_friction: Dict[int, Optional[float]] = field(default_factory=dict) - - # Off-trail terrain friction overrides (by WorldCover class) - # These MULTIPLY the base WorldCover friction - # None = use default, np.inf = impassable - # WorldCover values: 10=tree, 20=shrub, 30=grass, 40=crop, 50=urban, - # 60=bare, 80=water, 90=wetland, 95=mangrove, 100=moss - terrain_friction_override: Dict[int, Optional[float]] = field(default_factory=dict) - - # Should wilderness areas be impassable? - wilderness_impassable: bool = False - - # For vehicle mode: can traverse off-trail flat terrain? - off_trail_flat_threshold_deg: float = 0.0 # 0 = no off-trail allowed - off_trail_flat_friction: float = np.inf # friction if allowed - - -# Define all mode profiles -MODE_PROFILES: Dict[str, ModeProfile] = { - "foot": ModeProfile( - name="foot", - description="Hiking on foot (Tobler off-path model)", - speed_function="tobler", - base_speed_kmh=6.0, - max_slope_deg=40.0, - trail_friction={ - 5: 0.1, # road - 15: 0.3, # track - 25: 0.5, # foot trail - }, - terrain_friction_override={ - # Use default WorldCover friction for foot mode - }, - wilderness_impassable=False, - ), - - "mtb": ModeProfile( - name="mtb", - description="Mountain bike / dirt bike (Herzog wheeled model)", - speed_function="herzog", - base_speed_kmh=12.0, - max_slope_deg=25.0, - trail_friction={ - 5: 0.1, # road - 15: 0.2, # track - 25: 0.5, # foot trail (rideable but slow) - }, - terrain_friction_override={ - 30: 2.0, # Grassland: rideable but slow - 20: 4.0, # Shrubland: barely rideable - 10: 8.0, # Tree cover/forest: effectively impassable - 60: 3.0, # Bare/rocky - 90: np.inf, # Wetland: impassable - 95: np.inf, # Mangrove: impassable - 80: np.inf, # Water: impassable - }, - wilderness_impassable=True, - ), - - "atv": ModeProfile( - name="atv", - description="ATV / side-by-side (Herzog wheeled model, higher base speed)", - speed_function="herzog", - base_speed_kmh=25.0, - max_slope_deg=30.0, - trail_friction={ - 5: 0.1, # road - 15: 0.3, # track - 25: None, # foot trail: impassable (too narrow) - }, - terrain_friction_override={ - 30: 1.5, # Grassland: passable - 20: 3.0, # Shrubland: rough - 10: np.inf, # Forest: impassable - 60: 2.0, # Bare/rocky - 90: np.inf, # Wetland: impassable - 95: np.inf, # Mangrove: impassable - 80: np.inf, # Water: impassable - }, - wilderness_impassable=True, - ), - - "vehicle": ModeProfile( - name="vehicle", - description="4x4 truck / jeep (linear speed degradation)", - speed_function="linear", - base_speed_kmh=40.0, - max_slope_deg=20.0, - trail_friction={ - 5: 0.1, # road - 15: 0.5, # track (rough but passable) - 25: None, # foot trail: impassable - }, - terrain_friction_override={ - # All off-trail terrain is impassable by default - 10: np.inf, # Forest - 20: np.inf, # Shrubland - 30: np.inf, # Grassland (except flat - see below) - 40: np.inf, # Cropland (except flat - see below) - 60: np.inf, # Bare - 90: np.inf, # Wetland - 95: np.inf, # Mangrove - 80: np.inf, # Water - }, - wilderness_impassable=True, - off_trail_flat_threshold_deg=5.0, # Can drive on flat fields - off_trail_flat_friction=5.0, # But very slow - ), -} - - -# Pragmatic mode friction multiplier for private land -PRAGMATIC_BARRIER_MULTIPLIER = 5.0 - - -# ═══════════════════════════════════════════════════════════════════════════════ -# COST GRID COMPUTATION -# ═══════════════════════════════════════════════════════════════════════════════ - -def compute_cost_grid( - elevation: np.ndarray, - cell_size_m: float, - cell_size_lat_m: float = None, - cell_size_lon_m: float = None, - friction: Optional[np.ndarray] = None, - friction_raw: Optional[np.ndarray] = None, - trails: Optional[np.ndarray] = None, - barriers: Optional[np.ndarray] = None, - wilderness: Optional[np.ndarray] = None, - mvum: Optional[np.ndarray] = None, - boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic", - mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot" -) -> np.ndarray: - """ - Compute isotropic travel cost grid from elevation data. - - Args: - elevation: 2D array of elevation values in meters - cell_size_m: Average cell size in meters - cell_size_lat_m: Cell size in latitude direction (optional) - cell_size_lon_m: Cell size in longitude direction (optional) - friction: Optional 2D array of friction multipliers (WorldCover). - Values should be float (1.0 = baseline, 2.0 = 2x slower). - np.inf marks impassable cells. - friction_raw: Optional 2D array of raw WorldCover class values (uint8). - Used for mode-specific terrain overrides. - Values: 10=tree, 20=shrub, 30=grass, etc. - trails: Optional 2D array of trail values (uint8). - 0 = no trail, 5 = road, 15 = track, 25 = foot trail - barriers: Optional 2D array of barrier values (uint8). - 255 = closed/restricted area (PAD-US Pub_Access = XA). - wilderness: Optional[np.ndarray] of wilderness values (uint8). - 255 = designated wilderness area. - mvum: Optional[np.ndarray] of MVUM access values (uint8). - 0 = no MVUM data, 1 = open, 255 = closed to this mode. - MVUM closures respond to boundary_mode (strict/pragmatic/emergency). - Foot mode should pass None (MVUM is motor-vehicle specific). - boundary_mode: How to handle barriers ("strict", "pragmatic", "emergency") - mode: Travel mode ("foot", "mtb", "atv", "vehicle") - - Returns: - 2D array of travel cost in seconds per cell. - np.inf for impassable cells. - """ - if boundary_mode not in ("strict", "pragmatic", "emergency"): - raise ValueError(f"boundary_mode must be 'strict', 'pragmatic', or 'emergency'") - - if mode not in MODE_PROFILES: - raise ValueError(f"mode must be one of {list(MODE_PROFILES.keys())}") - - profile = MODE_PROFILES[mode] - - if cell_size_lat_m is None: - cell_size_lat_m = cell_size_m - if cell_size_lon_m is None: - cell_size_lon_m = cell_size_m - - rows, cols = elevation.shape - - # ─── Compute gradients (in-place where possible) ───────────────────────── - # Use float32 to reduce memory footprint - grade = np.zeros(elevation.shape, dtype=np.float32) - - # Compute dy contribution to grade squared - dy_contrib = np.zeros(elevation.shape, dtype=np.float32) - dy_contrib[1:-1, :] = ((elevation[:-2, :] - elevation[2:, :]) / (2 * cell_size_lat_m)) ** 2 - dy_contrib[0, :] = ((elevation[0, :] - elevation[1, :]) / cell_size_lat_m) ** 2 - dy_contrib[-1, :] = ((elevation[-2, :] - elevation[-1, :]) / cell_size_lat_m) ** 2 - - # Compute dx contribution and add to dy_contrib in-place - dy_contrib[:, 1:-1] += ((elevation[:, 2:] - elevation[:, :-2]) / (2 * cell_size_lon_m)) ** 2 - dy_contrib[:, 0] += ((elevation[:, 1] - elevation[:, 0]) / cell_size_lon_m) ** 2 - dy_contrib[:, -1] += ((elevation[:, -1] - elevation[:, -2]) / cell_size_lon_m) ** 2 - - # grade = sqrt(dx^2 + dy^2) - np.sqrt(dy_contrib, out=grade) - del dy_contrib # Free memory immediately - - # ─── Compute speed based on mode ───────────────────────────────────────── - max_grade_val = np.tan(np.radians(profile.max_slope_deg)) - - if profile.speed_function == "tobler": - speed_kmh = tobler_off_path_speed(grade, profile.base_speed_kmh) - elif profile.speed_function == "herzog": - speed_kmh = herzog_wheeled_speed(grade, profile.base_speed_kmh) - elif profile.speed_function == "linear": - speed_kmh = linear_degrade_speed(grade, profile.base_speed_kmh, max_grade_val) - else: - raise ValueError(f"Unknown speed function: {profile.speed_function}") - - # ─── Base cost (seconds per cell) ───────────────────────────────────────── - avg_cell_size = (cell_size_lat_m + cell_size_lon_m) / 2 - cost = (avg_cell_size * 3.6) / speed_kmh - del speed_kmh - - # ─── Max slope limit ────────────────────────────────────────────────────── - cost[grade > max_grade_val] = np.inf - - # ─── NaN elevations ────────────────────────────────────────────────────── - cost[np.isnan(elevation)] = np.inf - - # ─── Apply friction in-place ───────────────────────────────────────────── - # Instead of creating effective_friction copy, apply directly to cost - - # Start with base friction - if friction is not None: - if friction.shape != elevation.shape: - raise ValueError(f"Friction shape mismatch") - np.multiply(cost, friction, out=cost) - - # ─── Mode-specific terrain friction overrides (memory-efficient) ───────── - if friction_raw is not None and profile.terrain_friction_override: - if friction_raw.shape != elevation.shape: - raise ValueError(f"Friction_raw shape mismatch") - - # Process all overrides without creating large intermediate masks - for wc_class, override in profile.terrain_friction_override.items(): - if override is not None: - if override == np.inf: - # Use np.where for in-place-like behavior - np.putmask(cost, friction_raw == wc_class, np.inf) - else: - # Multiply cost where friction_raw matches - # Using a loop with putmask is more memory efficient - mask = friction_raw == wc_class - cost[mask] *= override - del mask - - # ─── Vehicle mode: allow flat grassland/cropland ───────────────────────── - if mode == "vehicle" and profile.off_trail_flat_threshold_deg > 0: - if friction_raw is not None: - # Compute slope in degrees for flat terrain check - slope_deg = np.degrees(np.arctan(grade)) - # Flat grassland or cropland - recompute cost for these cells - flat_field_mask = ( - (slope_deg <= profile.off_trail_flat_threshold_deg) & - ((friction_raw == 30) | (friction_raw == 40)) - ) - del slope_deg - # Recalculate cost for these cells with flat field friction - if np.any(flat_field_mask): - base_time = avg_cell_size * 3.6 / linear_degrade_speed( - grade[flat_field_mask], profile.base_speed_kmh, max_grade_val - ) - cost[flat_field_mask] = base_time * profile.off_trail_flat_friction - del base_time - del flat_field_mask - - # ─── Trail friction (mode-specific) ────────────────────────────────────── - if trails is not None: - if trails.shape != elevation.shape: - raise ValueError(f"Trails shape mismatch") - - for trail_value, trail_friction in profile.trail_friction.items(): - if trail_friction is None: - # Impassable for this mode - np.putmask(cost, trails == trail_value, np.inf) - else: - # Trail friction REPLACES terrain friction - # Recalculate cost = base_time * trail_friction - trail_mask = trails == trail_value - if np.any(trail_mask): - # Get base travel time (without friction) - if profile.speed_function == "tobler": - trail_speed = tobler_off_path_speed(grade[trail_mask], profile.base_speed_kmh) - elif profile.speed_function == "herzog": - trail_speed = herzog_wheeled_speed(grade[trail_mask], profile.base_speed_kmh) - else: - trail_speed = linear_degrade_speed( - grade[trail_mask], profile.base_speed_kmh, max_grade_val - ) - cost[trail_mask] = (avg_cell_size * 3.6 / trail_speed) * trail_friction - del trail_speed - del trail_mask - - # ─── Wilderness areas (mode-specific) ──────────────────────────────────── - if wilderness is not None and profile.wilderness_impassable: - if wilderness.shape != elevation.shape: - raise ValueError(f"Wilderness shape mismatch") - np.putmask(cost, wilderness == 255, np.inf) - - # ─── Barriers (private land) ───────────────────────────────────────────── - if barriers is not None and boundary_mode != "emergency": - if barriers.shape != elevation.shape: - raise ValueError(f"Barriers shape mismatch") - - if boundary_mode == "strict": - np.putmask(cost, barriers == 255, np.inf) - elif boundary_mode == "pragmatic": - barrier_mask = barriers == 255 - cost[barrier_mask] *= PRAGMATIC_BARRIER_MULTIPLIER - del barrier_mask - - # ─── MVUM closures (motor vehicle restrictions) ────────────────────────── - # MVUM only applies to motorized modes, not foot. Foot mode should pass mvum=None. - # MVUM closures respond to the same boundary_mode as PAD-US barriers: - # "strict" = MVUM-closed road/trail is impassable - # "pragmatic" = MVUM-closed road/trail gets 5× friction penalty - # "emergency" = MVUM closures ignored entirely - if mvum is not None and mode != "foot" and boundary_mode != "emergency": - if mvum.shape != elevation.shape: - raise ValueError(f"MVUM shape mismatch") - - # Value 255 = road/trail exists but is closed to this mode - mvum_closed_mask = mvum == 255 - - if boundary_mode == "strict": - np.putmask(cost, mvum_closed_mask, np.inf) - elif boundary_mode == "pragmatic": - cost[mvum_closed_mask] *= PRAGMATIC_BARRIER_MULTIPLIER - - del mvum_closed_mask - - return cost - - -# ═══════════════════════════════════════════════════════════════════════════════ -# LEGACY API (backward compatibility) -# ═══════════════════════════════════════════════════════════════════════════════ - -def tobler_speed(grade: float) -> float: - """Legacy single-value Tobler speed function.""" - return 0.6 * 6.0 * math.exp(-3.5 * abs(grade + 0.05)) - - -# ═══════════════════════════════════════════════════════════════════════════════ -# TESTING -# ═══════════════════════════════════════════════════════════════════════════════ - -if __name__ == "__main__": - print("=" * 70) - print("OFFROUTE Multi-Mode Cost Function Tests") - print("=" * 70) - - print("\n[1] Speed functions at various grades:") - print(f"{'Grade':<10} {'Foot':<12} {'MTB':<12} {'ATV':<12} {'Vehicle':<12}") - print("-" * 60) - - for grade_val in [-0.3, -0.1, 0.0, 0.1, 0.2, 0.3]: - grade_arr = np.array([grade_val]) - foot = tobler_off_path_speed(grade_arr, 6.0)[0] - mtb = herzog_wheeled_speed(grade_arr, 12.0)[0] - atv = herzog_wheeled_speed(grade_arr, 25.0)[0] - veh = linear_degrade_speed(grade_arr, 40.0, np.tan(np.radians(20)))[0] - print(f"{grade_val:+.2f} {foot:>6.2f} km/h {mtb:>6.2f} km/h {atv:>6.2f} km/h {veh:>6.2f} km/h") - - print("\n[2] Mode profiles:") - for name, profile in MODE_PROFILES.items(): - print(f"\n {name.upper()}: {profile.description}") - print(f" Max slope: {profile.max_slope_deg}°") - print(f" Trail access: {profile.trail_friction}") - print(f" Wilderness blocked: {profile.wilderness_impassable}") - - print("\n[3] Cost grid test (flat terrain, forest):") - elev = np.ones((10, 10), dtype=np.float32) * 1000 - friction = np.ones((10, 10), dtype=np.float32) * 2.0 # Forest friction - friction_raw = np.ones((10, 10), dtype=np.uint8) * 10 # Tree cover class - - trails = np.zeros((10, 10), dtype=np.uint8) - trails[5, :] = 5 # Road across middle - - for mode_name in ["foot", "mtb", "atv", "vehicle"]: - cost = compute_cost_grid( - elev, cell_size_m=30.0, - friction=friction, - friction_raw=friction_raw, - trails=trails, - mode=mode_name - ) - off_trail_cost = cost[0, 0] - road_cost = cost[5, 0] - impassable = np.sum(np.isinf(cost)) - print(f" {mode_name:8s}: off-trail={off_trail_cost:>8.1f}s, road={road_cost:>6.1f}s, impassable={impassable}") - - print("\n[4] Wilderness blocking test:") - wilderness = np.zeros((10, 10), dtype=np.uint8) - wilderness[3:7, 3:7] = 255 - - for mode_name in ["foot", "mtb", "atv", "vehicle"]: - cost = compute_cost_grid( - elev, cell_size_m=30.0, - wilderness=wilderness, - mode=mode_name - ) - wilderness_impassable = np.sum(np.isinf(cost[3:7, 3:7])) - print(f" {mode_name:8s}: wilderness cells impassable = {wilderness_impassable}/16") - - print("\nDone.") diff --git a/lib/offroute/dem.py b/lib/offroute/dem.py deleted file mode 100644 index 06cfcea..0000000 --- a/lib/offroute/dem.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -DEM tile reader for OFFROUTE. - -Reads elevation tiles from planet-dem.pmtiles (Terrarium-encoded WebP), -decodes them into numpy arrays, and provides a stitched elevation grid -for a given bounding box. -""" -import math -from functools import lru_cache -from io import BytesIO -from pathlib import Path -from typing import Tuple, Optional - -import numpy as np -from PIL import Image -from pmtiles.reader import MmapSource, Reader as PMTilesReader - -# Default path to the planet DEM PMTiles file -DEFAULT_DEM_PATH = Path("/mnt/nas/nav/planet-dem.pmtiles") - -# Tile size in pixels (z12 tiles are 512x512 in this tileset) -TILE_SIZE = 512 - -# Zoom level to use for elevation data -ZOOM_LEVEL = 12 - - -def terrarium_decode(rgb_array: np.ndarray) -> np.ndarray: - """ - Decode Terrarium-encoded RGB values to elevation in meters. - - Formula: elevation = (R * 256 + G + B/256) - 32768 - """ - r = rgb_array[:, :, 0].astype(np.float32) - g = rgb_array[:, :, 1].astype(np.float32) - b = rgb_array[:, :, 2].astype(np.float32) - - elevation = (r * 256.0 + g + b / 256.0) - 32768.0 - return elevation - - -def lat_lon_to_tile(lat: float, lon: float, zoom: int) -> Tuple[int, int]: - """Convert lat/lon to tile coordinates at given zoom level.""" - n = 2 ** zoom - x = int((lon + 180.0) / 360.0 * n) - lat_rad = math.radians(lat) - y = int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n) - return x, y - - -def tile_to_lat_lon(x: int, y: int, zoom: int) -> Tuple[float, float, float, float]: - """Convert tile coordinates to bounding box (north, south, west, east).""" - n = 2 ** zoom - lon_west = x / n * 360.0 - 180.0 - lon_east = (x + 1) / n * 360.0 - 180.0 - lat_north = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y / n)))) - lat_south = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * (y + 1) / n)))) - return lat_north, lat_south, lon_west, lon_east - - -class DEMReader: - """Reader for Terrarium-encoded DEM tiles from PMTiles.""" - - def __init__(self, pmtiles_path: Path = DEFAULT_DEM_PATH, tile_cache_size: int = 128): - self.pmtiles_path = pmtiles_path - self._source = MmapSource(open(pmtiles_path, "rb")) - self._reader = PMTilesReader(self._source) - self._header = self._reader.header() - self._decode_tile = lru_cache(maxsize=tile_cache_size)(self._decode_tile_impl) - - def _decode_tile_impl(self, z: int, x: int, y: int) -> Optional[np.ndarray]: - """Fetch and decode a single tile.""" - tile_data = self._reader.get(z, x, y) - if tile_data is None: - return None - - img = Image.open(BytesIO(tile_data)) - rgb_array = np.array(img) - - if rgb_array.shape[2] == 4: - rgb_array = rgb_array[:, :, :3] - - elevation = terrarium_decode(rgb_array) - return elevation - - def get_elevation_grid( - self, - south: float, - north: float, - west: float, - east: float, - zoom: int = ZOOM_LEVEL - ) -> Tuple[np.ndarray, dict]: - """Get a stitched elevation grid for the given bounding box.""" - x_min, y_max = lat_lon_to_tile(south, west, zoom) - x_max, y_min = lat_lon_to_tile(north, east, zoom) - - n = 2 ** zoom - x_min = max(0, x_min) - x_max = min(n - 1, x_max) - y_min = max(0, y_min) - y_max = min(n - 1, y_max) - - n_tiles_x = x_max - x_min + 1 - n_tiles_y = y_max - y_min + 1 - out_height = n_tiles_y * TILE_SIZE - out_width = n_tiles_x * TILE_SIZE - - elevation = np.full((out_height, out_width), np.nan, dtype=np.float32) - - for ty in range(y_min, y_max + 1): - for tx in range(x_min, x_max + 1): - tile_elev = self._decode_tile(zoom, tx, ty) - if tile_elev is not None: - out_y = (ty - y_min) * TILE_SIZE - out_x = (tx - x_min) * TILE_SIZE - elevation[out_y:out_y + TILE_SIZE, out_x:out_x + TILE_SIZE] = tile_elev - - grid_north, _, grid_west, _ = tile_to_lat_lon(x_min, y_min, zoom) - _, grid_south, _, grid_east = tile_to_lat_lon(x_max, y_max, zoom) - - pixel_size_lat = (grid_north - grid_south) / out_height - pixel_size_lon = (grid_east - grid_west) / out_width - - origin_lat = grid_north - pixel_size_lat / 2 - origin_lon = grid_west + pixel_size_lon / 2 - - center_lat = (south + north) / 2 - lat_m = 111320.0 - lon_m = 111320.0 * math.cos(math.radians(center_lat)) - cell_size_lat_m = abs(pixel_size_lat) * lat_m - cell_size_lon_m = abs(pixel_size_lon) * lon_m - cell_size_m = (cell_size_lat_m + cell_size_lon_m) / 2 - - row_start = int((grid_north - north) / abs(pixel_size_lat)) - row_end = int((grid_north - south) / abs(pixel_size_lat)) - col_start = int((west - grid_west) / pixel_size_lon) - col_end = int((east - grid_west) / pixel_size_lon) - - row_start = max(0, row_start) - row_end = min(out_height, row_end) - col_start = max(0, col_start) - col_end = min(out_width, col_end) - - elevation = elevation[row_start:row_end, col_start:col_end] - - origin_lat = grid_north - (row_start + 0.5) * abs(pixel_size_lat) - origin_lon = grid_west + (col_start + 0.5) * pixel_size_lon - - metadata = { - "bounds": (south, north, west, east), - "pixel_size_lat": -abs(pixel_size_lat), - "pixel_size_lon": pixel_size_lon, - "origin_lat": origin_lat, - "origin_lon": origin_lon, - "cell_size_m": cell_size_m, - "shape": elevation.shape, - } - - return elevation, metadata - - def sample_point(self, lat: float, lon: float) -> Optional[float]: - """Return elevation in meters at a single point, or None if untiled. - - Reads one z12 Terrarium tile (LRU-cached) and indexes the matching - pixel. Sub-ms warm, ~15 ms cold per tile via NFS. Returns None when the - tile is absent (e.g. true ocean nodata) or lat is outside the - Web-Mercator pole cap (~+/-85.05 deg). - """ - if not -85.05112878 <= lat <= 85.05112878: - return None - n = 2 ** ZOOM_LEVEL - fx = (lon + 180.0) / 360.0 * n - fy = (1.0 - math.asinh(math.tan(math.radians(lat))) / math.pi) / 2.0 * n - tx, ty = int(fx), int(fy) - tile = self._decode_tile(ZOOM_LEVEL, tx, ty) - if tile is None: - return None - row = min(TILE_SIZE - 1, int((fy - ty) * TILE_SIZE)) - col = min(TILE_SIZE - 1, int((fx - tx) * TILE_SIZE)) - return float(tile[row, col]) - - def pixel_to_latlon(self, row: int, col: int, metadata: dict) -> Tuple[float, float]: - """Convert pixel coordinates to lat/lon.""" - lat = metadata["origin_lat"] + row * metadata["pixel_size_lat"] - lon = metadata["origin_lon"] + col * metadata["pixel_size_lon"] - return lat, lon - - def latlon_to_pixel(self, lat: float, lon: float, metadata: dict) -> Tuple[int, int]: - """Convert lat/lon to pixel coordinates.""" - row = int((metadata["origin_lat"] - lat) / abs(metadata["pixel_size_lat"])) - col = int((lon - metadata["origin_lon"]) / metadata["pixel_size_lon"]) - return row, col - - def close(self): - """Close the PMTiles file.""" - pass # MmapSource handles cleanup - - -if __name__ == "__main__": - reader = DEMReader() - elevation, meta = reader.get_elevation_grid( - south=42.4, north=42.6, west=-114.5, east=-114.3 - ) - print(f"Elevation grid shape: {elevation.shape}") - print(f"Cell size: {meta['cell_size_m']:.1f} m") - print(f"Elevation range: {np.nanmin(elevation):.1f} - {np.nanmax(elevation):.1f} m") - center_row, center_col = elevation.shape[0] // 2, elevation.shape[1] // 2 - lat, lon = reader.pixel_to_latlon(center_row, center_col, meta) - print(f"Center pixel lat/lon: {lat:.4f}, {lon:.4f}") - reader.close() diff --git a/lib/offroute/friction.py b/lib/offroute/friction.py deleted file mode 100644 index 32df0c0..0000000 --- a/lib/offroute/friction.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Friction layer reader for OFFROUTE. - -Reads friction values from the WorldCover friction VRT and resamples -to match the elevation grid dimensions. -""" -import numpy as np -from pathlib import Path -from typing import Tuple, Optional - -try: - import rasterio - from rasterio.windows import from_bounds - from rasterio.enums import Resampling -except ImportError: - raise ImportError("rasterio is required for friction layer support") - -# Default path to the friction VRT -DEFAULT_FRICTION_PATH = Path("/mnt/nav/worldcover/friction/friction_conus.vrt") - - -class FrictionReader: - """Reader for WorldCover friction raster.""" - - def __init__(self, friction_path: Path = DEFAULT_FRICTION_PATH): - self.friction_path = friction_path - self._dataset = None - - def _open(self): - """Lazy open the dataset.""" - if self._dataset is None: - self._dataset = rasterio.open(self.friction_path) - return self._dataset - - def get_friction_grid( - self, - south: float, - north: float, - west: float, - east: float, - target_shape: Tuple[int, int] - ) -> np.ndarray: - """ - Get friction values for a bounding box, resampled to target shape. - - Args: - south, north, west, east: Bounding box coordinates - target_shape: (rows, cols) to resample to (matches elevation grid) - - Returns: - np.ndarray of uint8 friction values, same shape as target_shape. - Values: 10-40 = friction multiplier (divide by 10) - 255 = impassable - 0 = nodata (treat as impassable) - """ - ds = self._open() - - # Create a window from the bounding box - window = from_bounds(west, south, east, north, ds.transform) - - # Read with resampling to target shape - # Use nearest neighbor for categorical data - friction = ds.read( - 1, - window=window, - out_shape=target_shape, - resampling=Resampling.nearest - ) - - return friction - - def sample_point(self, lat: float, lon: float) -> int: - """Sample friction value at a single point.""" - ds = self._open() - - # Get pixel coordinates - row, col = ds.index(lon, lat) - - # Check bounds - if row < 0 or row >= ds.height or col < 0 or col >= ds.width: - return 0 # Out of bounds = nodata - - # Read single pixel - window = rasterio.windows.Window(col, row, 1, 1) - value = ds.read(1, window=window) - return int(value[0, 0]) - - def close(self): - """Close the dataset.""" - if self._dataset is not None: - self._dataset.close() - self._dataset = None - - -def friction_to_multiplier(friction: np.ndarray) -> np.ndarray: - """ - Convert friction values to cost multipliers. - - Args: - friction: uint8 array of friction values - - Returns: - float32 array of multipliers. - Values 10-40 become 1.0-4.0 (divide by 10). - Values 0 or 255 become np.inf (impassable). - """ - multiplier = friction.astype(np.float32) / 10.0 - - # Mark impassable cells - multiplier[friction == 0] = np.inf # nodata - multiplier[friction == 255] = np.inf # water/impassable - - return multiplier - - -if __name__ == "__main__": - print("Testing FrictionReader...") - - reader = FrictionReader() - - # Test point sampling - Murtaugh Lake (should be water = 255) - lake_lat, lake_lon = 42.47, -114.15 - lake_friction = reader.sample_point(lake_lat, lake_lon) - print(f"Murtaugh Lake ({lake_lat}, {lake_lon}): friction = {lake_friction}") - print(f" Expected: 255 (water/impassable)") - - # Test grid read for small bbox - friction = reader.get_friction_grid( - south=42.4, north=42.5, west=-114.2, east=-114.1, - target_shape=(100, 100) - ) - print(f"\nGrid test shape: {friction.shape}") - print(f"Unique values: {np.unique(friction)}") - print(f"Water cells (255): {np.sum(friction == 255)}") - - reader.close() - print("\nFrictionReader test complete.") diff --git a/lib/offroute/mvum.py b/lib/offroute/mvum.py deleted file mode 100644 index 31e503d..0000000 --- a/lib/offroute/mvum.py +++ /dev/null @@ -1,623 +0,0 @@ -""" -MVUM (Motor Vehicle Use Map) legal access layer for OFFROUTE. - -Queries USFS MVUM data from navi.db and provides rasterized access grids -indicating which roads/trails are open or closed to specific vehicle modes. - -MVUM is motor-vehicle specific — foot mode should skip this layer entirely. -""" -import re -import sqlite3 -import warnings -from datetime import datetime -from pathlib import Path -from typing import Dict, List, Optional, Tuple, Literal - -import numpy as np - -# Path to navi.db -NAVI_DB_PATH = Path("/mnt/nav/navi.db") - - -def parse_date_range(date_str: str) -> List[Tuple[int, int, int, int]]: - """ - Parse MVUM date range strings like "05/01-11/30" or "06/15-10/15,12/01-03/31". - - Returns list of (start_month, start_day, end_month, end_day) tuples. - Returns empty list if unparseable. - """ - if not date_str or date_str.strip() == "": - return [] - - ranges = [] - # Split by comma for multi-period strings - for part in date_str.split(","): - part = part.strip() - # Match MM/DD-MM/DD pattern - match = re.match(r"(\d{1,2})/(\d{1,2})-(\d{1,2})/(\d{1,2})", part) - if match: - try: - sm, sd, em, ed = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4)) - if 1 <= sm <= 12 and 1 <= sd <= 31 and 1 <= em <= 12 and 1 <= ed <= 31: - ranges.append((sm, sd, em, ed)) - except ValueError: - pass - - return ranges - - -def is_date_in_range(month: int, day: int, ranges: List[Tuple[int, int, int, int]]) -> bool: - """ - Check if a given month/day falls within any of the date ranges. - Handles ranges that wrap around year end (e.g., 12/01-03/31). - """ - if not ranges: - return True # No ranges = assume open - - date_num = month * 100 + day # Simple numeric comparison - - for sm, sd, em, ed in ranges: - start_num = sm * 100 + sd - end_num = em * 100 + ed - - if start_num <= end_num: - # Normal range (e.g., 05/01-11/30) - if start_num <= date_num <= end_num: - return True - else: - # Wrapping range (e.g., 12/01-03/31) - if date_num >= start_num or date_num <= end_num: - return True - - return False - - -def check_access( - status_field: Optional[str], - dates_field: Optional[str], - seasonal: Optional[str], - check_date: Optional[Tuple[int, int]] = None -) -> Optional[bool]: - """ - Determine if a road/trail is open to a vehicle type. - - Args: - status_field: Value of vehicle-class field (e.g., "open", null) - dates_field: Value of *_DATESOPEN field (e.g., "05/01-11/30") - seasonal: Value of SEASONAL field ("yearlong", "seasonal") - check_date: Optional (month, day) tuple to check against date ranges - - Returns: - True = open - False = closed - None = no data (field not populated, defer to SYMBOL) - """ - if status_field is None or status_field.strip() == "": - return None # No data - - status = status_field.strip().lower() - - if status != "open": - return False # Explicitly closed or restricted - - # Status is "open" - check seasonal restrictions - if check_date is not None: - month, day = check_date - - # Parse date ranges - if dates_field: - ranges = parse_date_range(dates_field) - if ranges: - return is_date_in_range(month, day, ranges) - - # No date field but seasonal = "yearlong" means always open - if seasonal and seasonal.strip().lower() == "yearlong": - return True - - # Seasonal with no dates - assume open (data quality issue) - if seasonal and seasonal.strip().lower() == "seasonal": - warnings.warn(f"Seasonal road/trail with no DATESOPEN, assuming open") - return True - - return True # Open with no date check - - -def get_mode_field(mode: str) -> Tuple[str, str]: - """ - Get the MVUM field names for a given travel mode. - - Returns (status_field, dates_field) tuple. - """ - mode_mapping = { - "atv": ("atv", "atv_datesopen"), - "motorcycle": ("motorcycle", "motorcycle_datesopen"), - "mtb": ("e_bike_class1", "e_bike_class1_dur"), # Closest analog for e-bikes - "vehicle": ("highclearancevehicle", "highclearancevehicle_datesopen"), - "passenger": ("passengervehicle", "passengervehicle_datesopen"), - } - - return mode_mapping.get(mode, ("highclearancevehicle", "highclearancevehicle_datesopen")) - - -def symbol_to_access(symbol: str, mode: str, maint_level: Optional[str] = None) -> Optional[bool]: - """ - Fallback: interpret SYMBOL field when per-vehicle-class fields are null. - - MVUM SYMBOL meanings (roads): - 1 = Open to all vehicles - 2 = Open to highway legal vehicles only - 3 = Road closed to motorized - 4 = Road open seasonally - 11 = Administrative use only - 12 = Decommissioned - - For trails, similar logic applies based on TRAILCLASS. - """ - if symbol is None: - return None - - sym = str(symbol).strip() - - # Symbol 1: Open to all - if sym == "1": - return True - - # Symbol 2: Highway legal only - if sym == "2": - # ATVs/motorcycles typically not highway legal - if mode in ("atv", "motorcycle"): - return False - return True - - # Symbol 3: Closed to motorized - if sym == "3": - return False - - # Symbol 4: Seasonally open (assume open if no date check) - if sym == "4": - return True - - # Symbol 11/12: Administrative/decommissioned = closed - if sym in ("11", "12"): - return False - - # Unknown symbol - defer - return None - - -class MVUMReader: - """ - Reader for MVUM data from navi.db. - - Queries roads and trails by bounding box and returns access grids. - """ - - def __init__(self, db_path: Path = NAVI_DB_PATH): - self.db_path = db_path - self._conn = None - - def _get_conn(self) -> sqlite3.Connection: - if self._conn is None: - if not self.db_path.exists(): - raise FileNotFoundError(f"navi.db not found at {self.db_path}") - self._conn = sqlite3.connect(str(self.db_path)) - self._conn.row_factory = sqlite3.Row - # Load Spatialite extension if available - try: - self._conn.enable_load_extension(True) - self._conn.load_extension("mod_spatialite") - except Exception: - pass # Spatialite not available, will use manual bbox queries - return self._conn - - def table_exists(self, table_name: str) -> bool: - """Check if an MVUM table exists.""" - conn = self._get_conn() - cur = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name=?", - (table_name,) - ) - return cur.fetchone() is not None - - def query_roads_bbox( - self, - south: float, north: float, west: float, east: float, - mode: str = "atv", - check_date: Optional[Tuple[int, int]] = None - ) -> List[Dict]: - """ - Query MVUM roads within a bounding box. - - Returns list of dicts with access info for the given mode. - """ - if not self.table_exists("mvum_roads"): - return [] - - conn = self._get_conn() - - # Query using bbox on geometry - # Since we don't have spatialite, we'll query all and filter in Python - # For production, consider pre-computing bbox columns - cur = conn.execute(""" - SELECT ogc_fid, id, name, symbol, operationalmaintlevel, seasonal, - atv, atv_datesopen, motorcycle, motorcycle_datesopen, - highclearancevehicle, highclearancevehicle_datesopen, - passengervehicle, passengervehicle_datesopen, - e_bike_class1, e_bike_class1_dur, - shape - FROM mvum_roads - """) - - status_field, dates_field = get_mode_field(mode) - results = [] - - for row in cur: - # Parse geometry to check bbox intersection - # The shape is stored as WKB blob - shape = row["shape"] - if shape is None: - continue - - # Quick bbox check using geometry extent - # Since we don't have Spatialite functions, we'll include all - # and let the rasterization handle it - - access = check_access( - row[status_field] if status_field in row.keys() else None, - row[dates_field] if dates_field in row.keys() else None, - row["seasonal"], - check_date - ) - - # Fallback to SYMBOL if no per-vehicle data - if access is None: - access = symbol_to_access(row["symbol"], mode, row["operationalmaintlevel"]) - - if access is not None: - results.append({ - "id": row["id"], - "name": row["name"], - "access": access, - "symbol": row["symbol"], - "maint_level": row["operationalmaintlevel"], - "shape": shape, - }) - - return results - - def query_trails_bbox( - self, - south: float, north: float, west: float, east: float, - mode: str = "atv", - check_date: Optional[Tuple[int, int]] = None - ) -> List[Dict]: - """ - Query MVUM trails within a bounding box. - """ - if not self.table_exists("mvum_trails"): - return [] - - conn = self._get_conn() - - cur = conn.execute(""" - SELECT ogc_fid, id, name, symbol, seasonal, trailclass, - atv, atv_datesopen, motorcycle, motorcycle_datesopen, - highclearancevehicle, highclearancevehicle_datesopen, - passengervehicle, passengervehicle_datesopen, - e_bike_class1, e_bike_class1_dur, - shape - FROM mvum_trails - """) - - status_field, dates_field = get_mode_field(mode) - results = [] - - for row in cur: - shape = row["shape"] - if shape is None: - continue - - access = check_access( - row[status_field] if status_field in row.keys() else None, - row[dates_field] if dates_field in row.keys() else None, - row["seasonal"], - check_date - ) - - if access is None: - access = symbol_to_access(row["symbol"], mode) - - if access is not None: - results.append({ - "id": row["id"], - "name": row["name"], - "access": access, - "symbol": row["symbol"], - "trail_class": row["trailclass"], - "shape": shape, - }) - - return results - - def query_nearest( - self, - lat: float, lon: float, - radius_m: float = 50, - table: str = "mvum_roads" - ) -> Optional[Dict]: - """ - Query the nearest MVUM feature to a point. - - Used for the places panel API. - """ - if not self.table_exists(table): - return None - - conn = self._get_conn() - - # Convert radius to degrees (approximate) - radius_deg = radius_m / 111000 - - # Query features in bbox around point - if table == "mvum_roads": - cur = conn.execute(""" - SELECT ogc_fid, id, name, forestname, districtname, symbol, - operationalmaintlevel, surfacetype, seasonal, jurisdiction, - passengervehicle, passengervehicle_datesopen, - highclearancevehicle, highclearancevehicle_datesopen, - atv, atv_datesopen, motorcycle, motorcycle_datesopen, - fourwd_gt50inches, fourwd_gt50_datesopen, - twowd_gt50inches, twowd_gt50_datesopen, - e_bike_class1, e_bike_class1_dur, - e_bike_class2, e_bike_class2_dur, - e_bike_class3, e_bike_class3_dur, - shape - FROM mvum_roads - LIMIT 1000 - """) - else: - cur = conn.execute(""" - SELECT ogc_fid, id, name, forestname, districtname, symbol, - seasonal, jurisdiction, trailclass, trailsystem, - passengervehicle, passengervehicle_datesopen, - highclearancevehicle, highclearancevehicle_datesopen, - atv, atv_datesopen, motorcycle, motorcycle_datesopen, - fourwd_gt50inches, fourwd_gt50_datesopen, - twowd_gt50inches, twowd_gt50_datesopen, - e_bike_class1, e_bike_class1_dur, - e_bike_class2, e_bike_class2_dur, - e_bike_class3, e_bike_class3_dur, - shape - FROM mvum_trails - LIMIT 1000 - """) - - # Find nearest feature - # This is a simplified approach - for production, use spatial index - try: - from shapely import wkb - from shapely.geometry import Point - - query_point = Point(lon, lat) - nearest = None - min_dist = float('inf') - - for row in cur: - try: - geom = wkb.loads(row["shape"]) - dist = query_point.distance(geom) - if dist < min_dist and dist < radius_deg: - min_dist = dist - nearest = dict(row) - nearest["geometry"] = geom - except Exception: - continue - - if nearest: - # Convert geometry to GeoJSON - nearest["geojson"] = nearest["geometry"].__geo_interface__ - del nearest["geometry"] - del nearest["shape"] - return nearest - - except ImportError: - warnings.warn("shapely not available for nearest query") - - return None - - def close(self): - if self._conn: - self._conn.close() - self._conn = None - - -def get_mvum_access_grid( - south: float, north: float, west: float, east: float, - target_shape: Tuple[int, int], - mode: Literal["foot", "mtb", "atv", "vehicle"] = "atv", - check_date: Optional[str] = None, - db_path: Path = NAVI_DB_PATH -) -> np.ndarray: - """ - Get MVUM access grid for pathfinding. - - Args: - south, north, west, east: Bounding box (WGS84) - target_shape: (rows, cols) to match elevation grid - mode: Travel mode (foot skips MVUM entirely) - check_date: Optional "MM/DD" string for seasonal checking - db_path: Path to navi.db - - Returns: - np.ndarray of uint8: - 0 = no MVUM data (defer to existing trail/friction logic) - 1 = road/trail is OPEN to this vehicle mode - 255 = road/trail EXISTS but is CLOSED to this mode - """ - # Foot mode bypasses MVUM entirely - if mode == "foot": - return np.zeros(target_shape, dtype=np.uint8) - - # Parse check_date if provided - parsed_date = None - if check_date: - match = re.match(r"(\d{1,2})/(\d{1,2})", check_date) - if match: - parsed_date = (int(match.group(1)), int(match.group(2))) - - # Initialize output grid - grid = np.zeros(target_shape, dtype=np.uint8) - rows, cols = target_shape - - # Pixel size - pixel_lat = (north - south) / rows - pixel_lon = (east - west) / cols - - reader = MVUMReader(db_path) - - try: - # Query roads and trails - roads = reader.query_roads_bbox(south, north, west, east, mode, parsed_date) - trails = reader.query_trails_bbox(south, north, west, east, mode, parsed_date) - - # Rasterize features - try: - from shapely import wkb - - for features in [roads, trails]: - for feat in features: - try: - geom = wkb.loads(feat["shape"]) - - # Get geometry bounds - minx, miny, maxx, maxy = geom.bounds - - # Check if intersects our bbox - if maxx < west or minx > east or maxy < south or miny > north: - continue - - # Rasterize line - value = 1 if feat["access"] else 255 - - # Simple line rasterization - if geom.geom_type in ("LineString", "MultiLineString"): - if geom.geom_type == "MultiLineString": - coords_list = [list(line.coords) for line in geom.geoms] - else: - coords_list = [list(geom.coords)] - - for coords in coords_list: - for i in range(len(coords) - 1): - x1, y1 = coords[i] - x2, y2 = coords[i + 1] - - # Convert to pixel coordinates - col1 = int((x1 - west) / pixel_lon) - row1 = int((north - y1) / pixel_lat) - col2 = int((x2 - west) / pixel_lon) - row2 = int((north - y2) / pixel_lat) - - # Bresenham's line algorithm - _draw_line(grid, row1, col1, row2, col2, value) - - except Exception as e: - continue - - except ImportError: - warnings.warn("shapely not available, MVUM rasterization skipped") - - finally: - reader.close() - - return grid - - -def _draw_line(grid: np.ndarray, r1: int, c1: int, r2: int, c2: int, value: int): - """Draw a line on the grid using Bresenham's algorithm.""" - rows, cols = grid.shape - - dr = abs(r2 - r1) - dc = abs(c2 - c1) - sr = 1 if r1 < r2 else -1 - sc = 1 if c1 < c2 else -1 - err = dr - dc - - r, c = r1, c1 - - while True: - if 0 <= r < rows and 0 <= c < cols: - # Only overwrite if current value is 0 (no data) or we're marking closed - if grid[r, c] == 0 or value == 255: - grid[r, c] = value - - if r == r2 and c == c2: - break - - e2 = 2 * err - if e2 > -dc: - err -= dc - r += sr - if e2 < dr: - err += dr - c += sc - - -if __name__ == "__main__": - import sys - - print("=" * 60) - print("MVUM Reader Test") - print("=" * 60) - - reader = MVUMReader() - - if not reader.table_exists("mvum_roads"): - print("ERROR: mvum_roads table not found in navi.db") - sys.exit(1) - - # Test bbox query (Sawtooth NF area) - print("\n[1] Testing bbox query (Sawtooth NF area)...") - roads = reader.query_roads_bbox( - south=43.5, north=44.0, west=-115.0, east=-114.0, - mode="atv" - ) - print(f" Found {len(roads)} roads") - - open_count = sum(1 for r in roads if r["access"]) - closed_count = sum(1 for r in roads if not r["access"]) - print(f" Open to ATV: {open_count}") - print(f" Closed to ATV: {closed_count}") - - # Test with seasonal date - print("\n[2] Testing with date check (July 15)...") - roads_summer = reader.query_roads_bbox( - south=43.5, north=44.0, west=-115.0, east=-114.0, - mode="atv", - check_date=(7, 15) - ) - open_summer = sum(1 for r in roads_summer if r["access"]) - print(f" Open to ATV on 07/15: {open_summer}") - - print("\n[3] Testing with date check (January 15)...") - roads_winter = reader.query_roads_bbox( - south=43.5, north=44.0, west=-115.0, east=-114.0, - mode="atv", - check_date=(1, 15) - ) - open_winter = sum(1 for r in roads_winter if r["access"]) - print(f" Open to ATV on 01/15: {open_winter}") - - # Test grid generation - print("\n[4] Testing grid generation...") - grid = get_mvum_access_grid( - south=43.5, north=44.0, west=-115.0, east=-114.0, - target_shape=(500, 1000), - mode="atv" - ) - print(f" Grid shape: {grid.shape}") - print(f" No data (0): {np.sum(grid == 0)}") - print(f" Open (1): {np.sum(grid == 1)}") - print(f" Closed (255): {np.sum(grid == 255)}") - - reader.close() - print("\nDone.") diff --git a/lib/offroute/prototype.py b/lib/offroute/prototype.py deleted file mode 100755 index c9b78f0..0000000 --- a/lib/offroute/prototype.py +++ /dev/null @@ -1,414 +0,0 @@ -#!/usr/bin/env python3 -""" -OFFROUTE Phase O3a Prototype - -Validates trail burn-in integration with the MCP pathfinder. -The path should actively seek out trails and roads when nearby. - -Compares paths with and without trail burn-in to show the benefit -of trail-seeking behavior. -""" -import json -import time -import sys -from pathlib import Path - -import numpy as np -from skimage.graph import MCP_Geometric - -# Add parent to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - -from lib.offroute.dem import DEMReader -from lib.offroute.cost import compute_cost_grid -from lib.offroute.friction import FrictionReader, friction_to_multiplier -from lib.offroute.barriers import BarrierReader, DEFAULT_BARRIERS_PATH -from lib.offroute.trails import TrailReader, DEFAULT_TRAILS_PATH - -# Test bounding box - Idaho area -BBOX = { - "south": 42.21, - "north": 42.60, - "west": -114.76, - "east": -113.79, -} - -# Start point: wilderness area away from roads -START_LAT = 42.35 -START_LON = -114.60 - -# End point: near Twin Falls (has roads/trails) -END_LAT = 42.55 -END_LON = -114.20 - -# Output files -OUTPUT_PATH_WITH_TRAILS = Path("/opt/recon/data/offroute-test-trails.geojson") -OUTPUT_PATH_NO_TRAILS = Path("/opt/recon/data/offroute-test-no-trails.geojson") - -# Memory limit in GB -MEMORY_LIMIT_GB = 12 - - -def check_memory_usage(): - """Check current memory usage and abort if over limit.""" - try: - import psutil - process = psutil.Process() - mem_gb = process.memory_info().rss / (1024**3) - if mem_gb > MEMORY_LIMIT_GB: - print(f"ERROR: Memory usage {mem_gb:.1f}GB exceeds {MEMORY_LIMIT_GB}GB limit") - sys.exit(1) - return mem_gb - except ImportError: - return 0 - - -def run_pathfinder( - elevation: np.ndarray, - meta: dict, - friction_mult: np.ndarray, - trails: np.ndarray, - barriers: np.ndarray, - use_trails: bool, - start_row: int, - start_col: int, - end_row: int, - end_col: int, - dem_reader: DEMReader, -) -> dict: - """Run the MCP pathfinder with given parameters.""" - # Compute cost grid - cost = compute_cost_grid( - elevation, - cell_size_m=meta["cell_size_m"], - friction=friction_mult, - trails=trails if use_trails else None, - barriers=barriers, - boundary_mode="pragmatic", - ) - - # Run MCP - mcp = MCP_Geometric(cost, fully_connected=True) - cumulative_costs, traceback = mcp.find_costs([(start_row, start_col)]) - - end_cost = cumulative_costs[end_row, end_col] - - if np.isinf(end_cost): - return { - "success": False, - "reason": "No path found (blocked by impassable terrain)", - } - - # Traceback path - path_indices = mcp.traceback((end_row, end_col)) - - # Convert to coordinates and collect stats - coordinates = [] - elevations = [] - trail_values = [] - - for row, col in path_indices: - lat, lon = dem_reader.pixel_to_latlon(row, col, meta) - elev = elevation[row, col] - trail_val = trails[row, col] if trails is not None else 0 - coordinates.append([lon, lat]) - elevations.append(elev) - trail_values.append(trail_val) - - # Compute distance - total_distance_m = 0 - for i in range(1, len(coordinates)): - lon1, lat1 = coordinates[i-1] - lon2, lat2 = coordinates[i] - R = 6371000 - dlat = np.radians(lat2 - lat1) - dlon = np.radians(lon2 - lon1) - a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2 - c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) - total_distance_m += R * c - - # Elevation stats - elev_arr = np.array(elevations) - elev_diff = np.diff(elev_arr) - elev_gain = np.sum(elev_diff[elev_diff > 0]) - elev_loss = np.sum(np.abs(elev_diff[elev_diff < 0])) - - # Trail stats - trail_arr = np.array(trail_values) - road_cells = np.sum(trail_arr == 5) - track_cells = np.sum(trail_arr == 15) - trail_cells = np.sum(trail_arr == 25) - off_trail_cells = np.sum(trail_arr == 0) - on_trail_cells = road_cells + track_cells + trail_cells - total_cells = len(trail_arr) - - return { - "success": True, - "coordinates": coordinates, - "total_time_seconds": float(end_cost), - "total_time_minutes": float(end_cost / 60), - "total_distance_m": float(total_distance_m), - "total_distance_km": float(total_distance_m / 1000), - "elevation_gain_m": float(elev_gain), - "elevation_loss_m": float(elev_loss), - "min_elevation_m": float(np.min(elev_arr)), - "max_elevation_m": float(np.max(elev_arr)), - "cell_count": total_cells, - "road_cells": int(road_cells), - "track_cells": int(track_cells), - "trail_cells": int(trail_cells), - "off_trail_cells": int(off_trail_cells), - "on_trail_pct": float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0, - } - - -def main(): - print("=" * 80) - print("OFFROUTE Phase O3a Prototype (Trail Burn-In)") - print("=" * 80) - - t0 = time.time() - - # Check for required rasters - if not DEFAULT_BARRIERS_PATH.exists(): - print(f"\nERROR: Barrier raster not found at {DEFAULT_BARRIERS_PATH}") - sys.exit(1) - if not DEFAULT_TRAILS_PATH.exists(): - print(f"\nERROR: Trails raster not found at {DEFAULT_TRAILS_PATH}") - sys.exit(1) - - # Step 1: Load elevation data - print(f"\n[1] Loading DEM for bbox: {BBOX}") - dem_reader = DEMReader() - - elevation, meta = dem_reader.get_elevation_grid( - south=BBOX["south"], - north=BBOX["north"], - west=BBOX["west"], - east=BBOX["east"], - ) - - print(f" Elevation grid shape: {elevation.shape}") - print(f" Cell count: {elevation.size:,}") - print(f" Cell size: {meta['cell_size_m']:.1f} m") - - mem = check_memory_usage() - if mem > 0: - print(f" Memory usage: {mem:.1f} GB") - - # Step 2: Load friction data - print(f"\n[2] Loading WorldCover friction layer...") - friction_reader = FrictionReader() - - friction_raw = friction_reader.get_friction_grid( - south=BBOX["south"], - north=BBOX["north"], - west=BBOX["west"], - east=BBOX["east"], - target_shape=elevation.shape - ) - friction_mult = friction_to_multiplier(friction_raw) - - print(f" Friction grid shape: {friction_raw.shape}") - print(f" Water/impassable cells: {np.sum(np.isinf(friction_mult)):,}") - - # Step 3: Load barrier data - print(f"\n[3] Loading PAD-US barrier layer...") - barrier_reader = BarrierReader() - - barriers = barrier_reader.get_barrier_grid( - south=BBOX["south"], - north=BBOX["north"], - west=BBOX["west"], - east=BBOX["east"], - target_shape=elevation.shape - ) - - closed_cells = np.sum(barriers == 255) - print(f" Barrier grid shape: {barriers.shape}") - print(f" Closed/restricted cells: {closed_cells:,}") - - # Step 4: Load trails data - print(f"\n[4] Loading OSM trails layer...") - trail_reader = TrailReader() - - trails = trail_reader.get_trails_grid( - south=BBOX["south"], - north=BBOX["north"], - west=BBOX["west"], - east=BBOX["east"], - target_shape=elevation.shape - ) - - road_cells = np.sum(trails == 5) - track_cells = np.sum(trails == 15) - trail_cells = np.sum(trails == 25) - print(f" Trails grid shape: {trails.shape}") - print(f" Road cells: {road_cells:,}") - print(f" Track cells: {track_cells:,}") - print(f" Trail cells: {trail_cells:,}") - print(f" Total trail coverage: {100*(road_cells+track_cells+trail_cells)/trails.size:.2f}%") - - mem = check_memory_usage() - if mem > 0: - print(f" Memory usage: {mem:.1f} GB") - - # Step 5: Convert start/end to pixel coordinates - print(f"\n[5] Converting coordinates...") - start_row, start_col = dem_reader.latlon_to_pixel(START_LAT, START_LON, meta) - end_row, end_col = dem_reader.latlon_to_pixel(END_LAT, END_LON, meta) - - print(f" Start: ({START_LAT}, {START_LON}) -> pixel ({start_row}, {start_col})") - print(f" End: ({END_LAT}, {END_LON}) -> pixel ({end_row}, {end_col})") - - # Validate coordinates - rows, cols = elevation.shape - if not (0 <= start_row < rows and 0 <= start_col < cols): - print(f"ERROR: Start point outside grid bounds") - sys.exit(1) - if not (0 <= end_row < rows and 0 <= end_col < cols): - print(f"ERROR: End point outside grid bounds") - sys.exit(1) - - # Step 6: Run pathfinder WITH trails - print(f"\n[6] Running pathfinder WITH trail burn-in...") - t6a = time.time() - result_trails = run_pathfinder( - elevation, meta, friction_mult, trails, barriers, - use_trails=True, - start_row=start_row, start_col=start_col, - end_row=end_row, end_col=end_col, - dem_reader=dem_reader, - ) - t6b = time.time() - print(f" Completed in {t6b - t6a:.1f}s") - - # Step 7: Run pathfinder WITHOUT trails - print(f"\n[7] Running pathfinder WITHOUT trail burn-in...") - t7a = time.time() - result_no_trails = run_pathfinder( - elevation, meta, friction_mult, trails, barriers, - use_trails=False, - start_row=start_row, start_col=start_col, - end_row=end_row, end_col=end_col, - dem_reader=dem_reader, - ) - t7b = time.time() - print(f" Completed in {t7b - t7a:.1f}s") - - # Step 8: Save GeoJSON outputs - print(f"\n[8] Saving GeoJSON outputs...") - - OUTPUT_PATH_WITH_TRAILS.parent.mkdir(parents=True, exist_ok=True) - - if result_trails["success"]: - geojson = { - "type": "Feature", - "properties": { - "type": "offroute_with_trails", - "phase": "O3a", - "trail_burn_in": True, - "start": {"lat": START_LAT, "lon": START_LON}, - "end": {"lat": END_LAT, "lon": END_LON}, - **{k: v for k, v in result_trails.items() if k not in ["success", "coordinates"]}, - }, - "geometry": { - "type": "LineString", - "coordinates": result_trails["coordinates"], - } - } - with open(OUTPUT_PATH_WITH_TRAILS, "w") as f: - json.dump(geojson, f, indent=2) - print(f" Saved: {OUTPUT_PATH_WITH_TRAILS}") - - if result_no_trails["success"]: - geojson = { - "type": "Feature", - "properties": { - "type": "offroute_no_trails", - "phase": "O3a", - "trail_burn_in": False, - "start": {"lat": START_LAT, "lon": START_LON}, - "end": {"lat": END_LAT, "lon": END_LON}, - **{k: v for k, v in result_no_trails.items() if k not in ["success", "coordinates"]}, - }, - "geometry": { - "type": "LineString", - "coordinates": result_no_trails["coordinates"], - } - } - with open(OUTPUT_PATH_NO_TRAILS, "w") as f: - json.dump(geojson, f, indent=2) - print(f" Saved: {OUTPUT_PATH_NO_TRAILS}") - - t_total = time.time() - - # Final report - print(f"\n" + "=" * 80) - print("SIDE-BY-SIDE COMPARISON: Trail Burn-In Effect") - print("=" * 80) - - if result_trails["success"] and result_no_trails["success"]: - print(f"{'Metric':<25} {'WITH TRAILS':<20} {'WITHOUT TRAILS':<20} {'Delta':<15}") - print("-" * 80) - - metrics = [ - ("Distance (km)", "total_distance_km", ".2f"), - ("Effort time (min)", "total_time_minutes", ".1f"), - ("Cell count", "cell_count", "d"), - ("Elevation gain (m)", "elevation_gain_m", ".0f"), - ("On-trail %", "on_trail_pct", ".1f"), - ("Road cells", "road_cells", "d"), - ("Track cells", "track_cells", "d"), - ("Trail cells", "trail_cells", "d"), - ] - - for label, key, fmt in metrics: - val_with = result_trails[key] - val_without = result_no_trails[key] - if isinstance(val_with, int): - delta = val_with - val_without - delta_str = f"{delta:+d}" - else: - delta = val_with - val_without - delta_str = f"{delta:+.2f}" - print(f"{label:<25} {val_with:<20{fmt}} {val_without:<20{fmt}} {delta_str:<15}") - - # Analysis - print(f"\n" + "-" * 80) - print("ANALYSIS") - print("-" * 80) - - time_saved = result_no_trails["total_time_minutes"] - result_trails["total_time_minutes"] - if time_saved > 0: - print(f"Trail burn-in saves {time_saved:.1f} minutes ({100*time_saved/result_no_trails['total_time_minutes']:.1f}% faster)") - elif time_saved < 0: - print(f"Trail burn-in adds {-time_saved:.1f} minutes (path seeks trails even if longer)") - - on_trail_with = result_trails["on_trail_pct"] - on_trail_without = result_no_trails["on_trail_pct"] - if on_trail_with > on_trail_without: - print(f"Trail burn-in increases on-trail travel: {on_trail_without:.1f}% → {on_trail_with:.1f}%") - else: - print(f"Both paths have similar on-trail percentage") - - else: - if not result_trails["success"]: - print(f"WITH TRAILS: FAILED - {result_trails.get('reason', 'unknown')}") - if not result_no_trails["success"]: - print(f"WITHOUT TRAILS: FAILED - {result_no_trails.get('reason', 'unknown')}") - - print(f"\n" + "-" * 80) - print(f"Total wall time: {t_total - t0:.1f}s") - - # Cleanup - dem_reader.close() - friction_reader.close() - barrier_reader.close() - trail_reader.close() - - print("\nPrototype completed.") - - -if __name__ == "__main__": - main() diff --git a/lib/offroute/router.py b/lib/offroute/router.py deleted file mode 100644 index bd3d379..0000000 --- a/lib/offroute/router.py +++ /dev/null @@ -1,1682 +0,0 @@ -""" -OFFROUTE Router — Bidirectional wilderness-to-network path orchestration. - -Supports four routing scenarios: - A: off-network start → on-network end (wilderness then Valhalla) - B: off-network start → off-network end (wilderness, Valhalla, wilderness) - C: on-network start → off-network end (Valhalla then wilderness) - D: on-network start → on-network end (pure Valhalla passthrough) - -Off-network detection: Valhalla /locate snap distance > 500m = off-network. - -IMPORTANT: The wilderness segment ALWAYS uses foot mode for pathfinding. -The user's selected mode affects: - 1. Which entry points are valid (foot=any, mtb=tracks+roads, vehicle=roads only) - 2. The Valhalla costing profile for the network segment -""" -import gc -import json -import math -import subprocess -import tempfile -import time -from pathlib import Path -from typing import Dict, List, Optional, Tuple, Literal, Set - -import numpy as np -import requests -import psycopg2 -import psycopg2.extras -from shapely.geometry import LineString -from skimage.graph import MCP_Geometric - -from .dem import DEMReader -from .cost import compute_cost_grid -from .friction import FrictionReader, friction_to_multiplier -from .barriers import BarrierReader, WildernessReader, DEFAULT_WILDERNESS_PATH -from .trails import TrailReader -from .mvum import get_mvum_access_grid -from ..deployment_config import get_deployment_config - -# Load configuration -_deploy_config = get_deployment_config() -_offroute_config = _deploy_config.get("offroute", {}) - -# Paths (configurable via home.yaml) -OSM_PBF_PATH = Path(_offroute_config.get("osm_pbf_path", "/mnt/nav/sources/idaho-latest.osm.pbf")) -DENSIFY_INTERVAL_M = _offroute_config.get("densify_interval_m", 100) -POSTGIS_DSN = _offroute_config.get("postgis_dsn", "dbname=padus user=postgres") - -# Legacy SQLite path (still used by MVUM) -NAVI_DB_PATH = Path("/mnt/nav/navi.db") - -# Valhalla endpoint -VALHALLA_URL = "http://localhost:8002" - -# Search radius for entry points (km) -DEFAULT_SEARCH_RADIUS_KM = 50 -EXPANDED_SEARCH_RADIUS_KM = 100 - -# Memory limit -MEMORY_LIMIT_GB = 12 - -# Off-network detection threshold (meters) -OFF_NETWORK_THRESHOLD_M = 10 - -# Mode to Valhalla costing mapping -MODE_TO_COSTING = { - "auto": "auto", - "foot": "pedestrian", - "mtb": "bicycle", - "atv": "auto", - "vehicle": "auto", -} - -# Mode to valid entry point highway classes -# foot = any trail/track/road, mtb = tracks and roads, vehicle = roads only -MODE_TO_VALID_HIGHWAYS = { - "auto": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service"}, - "foot": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track", "path", "footway", "bridleway"}, - "mtb": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track"}, - "atv": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service", "track"}, - "vehicle": {"primary", "secondary", "tertiary", "unclassified", "residential", - "service"}, -} - - -def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: - """Calculate distance between two points in meters.""" - R = 6371000 - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2 - c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) - return R * c - - -def check_memory_usage() -> float: - """Check current memory usage in GB.""" - try: - import psutil - process = psutil.Process() - return process.memory_info().rss / (1024**3) - except ImportError: - return 0 - - -class EntryPointIndex: - """ - PostGIS-backed spatial index of road/trail entry points. - Uses ST_DWithin for fast radius queries with meter-accurate distances. - Densifies highway LineStrings at 100m intervals for better coverage. - """ - - def __init__(self, dsn: str = None): - self.dsn = dsn or POSTGIS_DSN - self._conn: Optional[psycopg2.extensions.connection] = None - - def _get_conn(self) -> psycopg2.extensions.connection: - if self._conn is None or self._conn.closed: - self._conn = psycopg2.connect(self.dsn) - return self._conn - - def table_exists(self) -> bool: - """Check if entry_points table exists.""" - conn = self._get_conn() - with conn.cursor() as cur: - cur.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_name = 'entry_points' - ) - """) - return cur.fetchone()[0] - - def get_entry_point_count(self) -> int: - """Return the number of entry points in the index.""" - if not self.table_exists(): - return 0 - conn = self._get_conn() - with conn.cursor() as cur: - cur.execute("SELECT COUNT(*) FROM entry_points") - return cur.fetchone()[0] - - def query_bbox( - self, - south: float, - north: float, - west: float, - east: float, - valid_highways: Optional[Set[str]] = None - ) -> List[Dict]: - """Find entry points within a bounding box.""" - if not self.table_exists(): - return [] - - conn = self._get_conn() - - highway_filter = "" - params = [west, south, east, north] - if valid_highways: - placeholders = ','.join(['%s'] * len(valid_highways)) - highway_filter = f"AND highway_class IN ({placeholders})" - params.extend(list(valid_highways)) - - query = f""" - SELECT - id, - ST_Y(geom) as lat, - ST_X(geom) as lon, - highway_class, - name, - land_status - FROM entry_points - WHERE geom && ST_MakeEnvelope(%s, %s, %s, %s, 4326) - {highway_filter} - """ - - with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: - cur.execute(query, params) - return [dict(row) for row in cur.fetchall()] - - def query_radius( - self, - lat: float, - lon: float, - radius_km: float, - valid_highways: Optional[Set[str]] = None, - limit: int = 50 - ) -> List[Dict]: - """ - Find entry points within radius_km of (lat, lon). - Uses PostGIS ST_DWithin with geography cast for meter-accurate distance. - """ - if not self.table_exists(): - return [] - - conn = self._get_conn() - radius_m = radius_km * 1000 - - # Build query with optional highway filter - highway_filter = "" - params = [lon, lat, lon, lat, radius_m] - if valid_highways: - placeholders = ','.join(['%s'] * len(valid_highways)) - highway_filter = f"AND highway_class IN ({placeholders})" - params.extend(list(valid_highways)) - params.append(limit) - - query = f""" - SELECT - id, - ST_Y(geom) as lat, - ST_X(geom) as lon, - highway_class, - name, - land_status, - ST_Distance( - geom::geography, - ST_SetSRID(ST_Point(%s, %s), 4326)::geography - ) as distance_m - FROM entry_points - WHERE ST_DWithin( - geom::geography, - ST_SetSRID(ST_Point(%s, %s), 4326)::geography, - %s - ) - {highway_filter} - ORDER BY distance_m - LIMIT %s - """ - - with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: - cur.execute(query, params) - return [dict(row) for row in cur.fetchall()] - - def build_index(self, osm_pbf_path: Path = None) -> Dict: - """ - Build the entry point index from OSM PBF. - Densifies LineStrings to sample points every 100m. - Tags points with land_status from PAD-US. - """ - if osm_pbf_path is None: - osm_pbf_path = OSM_PBF_PATH - - if not osm_pbf_path.exists(): - raise FileNotFoundError(f"OSM PBF not found: {osm_pbf_path}") - - print(f"Building entry point index from {osm_pbf_path}...") - start_time = time.time() - - highway_types = [ - "primary", "secondary", "tertiary", "unclassified", - "residential", "service", "track", "path", "footway", "bridleway" - ] - - stats = {"total": 0, "by_class": {}, "lines_processed": 0} - - with tempfile.TemporaryDirectory() as tmpdir: - geojson_path = Path(tmpdir) / "highways.geojson" - - # Extract highways with osmium - print(" Extracting highways with osmium...") - cmd = ["osmium", "tags-filter", str(osm_pbf_path)] - for ht in highway_types: - cmd.append(f"w/highway={ht}") - cmd.extend(["-o", str(Path(tmpdir) / "filtered.osm.pbf"), "--overwrite"]) - subprocess.run(cmd, check=True, capture_output=True) - - # Convert to GeoJSON - print(" Converting to GeoJSON with ogr2ogr...") - cmd = [ - "ogr2ogr", "-f", "GeoJSON", - str(geojson_path), - str(Path(tmpdir) / "filtered.osm.pbf"), - "lines", "-t_srs", "EPSG:4326" - ] - subprocess.run(cmd, check=True, capture_output=True) - - # Load GeoJSON - print(" Loading GeoJSON...") - with open(geojson_path) as f: - data = json.load(f) - - # Process features and densify - print(f" Densifying LineStrings at {DENSIFY_INTERVAL_M}m intervals...") - points_to_insert = [] - seen_keys = set() - - features = data.get("features", []) - total_features = len(features) - - for idx, feature in enumerate(features): - if idx > 0 and idx % 100000 == 0: - print(f" Processed {idx}/{total_features} features...") - - props = feature.get("properties", {}) - geom = feature.get("geometry", {}) - - if geom.get("type") != "LineString": - continue - - coords = geom.get("coordinates", []) - if len(coords) < 2: - continue - - highway_class = props.get("highway", "unknown") - name = props.get("name", "") - stats["lines_processed"] += 1 - - # Densify this LineString - densified = self._densify_line(coords, DENSIFY_INTERVAL_M) - - for lon, lat in densified: - # Deduplicate by rounding to 5 decimal places (~1m precision) - key = (round(lat, 5), round(lon, 5)) - if key in seen_keys: - continue - seen_keys.add(key) - - points_to_insert.append((lon, lat, highway_class, name)) - - # Insert into PostGIS - print(f" Inserting {len(points_to_insert)} entry points into PostGIS...") - conn = self._get_conn() - - with conn.cursor() as cur: - # Truncate existing data - cur.execute("TRUNCATE entry_points RESTART IDENTITY") - - # Batch insert with execute_values for speed - batch_size = 50000 - for i in range(0, len(points_to_insert), batch_size): - batch = points_to_insert[i:i+batch_size] - psycopg2.extras.execute_values( - cur, - """ - INSERT INTO entry_points (geom, highway_class, name) - VALUES %s - """, - batch, - template="(ST_SetSRID(ST_Point(%s, %s), 4326), %s, %s)", - page_size=10000 - ) - if i > 0 and i % 500000 == 0: - print(f" Inserted {i}/{len(points_to_insert)} points...") - - conn.commit() - - # Tag land_status from PAD-US - print(" Tagging land_status from PAD-US subdivided polygons...") - with conn.cursor() as cur: - cur.execute(""" - UPDATE entry_points e - SET land_status = 'public' - FROM padus_sub p - WHERE ST_Intersects(e.geom, p.geom) - """) - public_count = cur.rowcount - print(f" Tagged {public_count} points as public land") - - conn.commit() - - # Gather stats - elapsed = time.time() - start_time - stats["total"] = len(points_to_insert) - stats["build_time_sec"] = round(elapsed, 1) - - for lon, lat, hc, name in points_to_insert: - stats["by_class"][hc] = stats["by_class"].get(hc, 0) + 1 - - print(f" Done in {elapsed:.1f}s. Total: {stats['total']} entry points from {stats['lines_processed']} lines") - for hc, count in sorted(stats["by_class"].items(), key=lambda x: -x[1]): - print(f" {hc}: {count}") - - return stats - - def _densify_line(self, coords: List[List[float]], interval_m: float) -> List[tuple]: - """ - Sample points along a LineString at regular intervals. - coords: [[lon, lat], ...] in GeoJSON order - Returns: [(lon, lat), ...] sampled points including first and last - """ - if len(coords) < 2: - return [(coords[0][0], coords[0][1])] if coords else [] - - # Calculate line length in meters using haversine on segments - total_m = 0 - for i in range(len(coords) - 1): - lon1, lat1 = coords[i] - lon2, lat2 = coords[i + 1] - total_m += haversine_distance(lat1, lon1, lat2, lon2) - - if total_m == 0: - return [(coords[0][0], coords[0][1])] - - # Create Shapely LineString - line = LineString(coords) - - # Calculate number of points needed - n_points = max(2, int(total_m / interval_m) + 1) - - # Sample using normalized interpolation - result = [] - for i in range(n_points): - fraction = min(i / (n_points - 1), 1.0) if n_points > 1 else 0 - point = line.interpolate(fraction, normalized=True) - result.append((point.x, point.y)) # (lon, lat) - - # Always ensure first and last original coordinates are included - first_coord = (coords[0][0], coords[0][1]) - last_coord = (coords[-1][0], coords[-1][1]) - - if result[0] != first_coord: - result[0] = first_coord - if result[-1] != last_coord: - result[-1] = last_coord - - return result - - def _highway_priority(self, highway_class: str) -> int: - """Lower number = better priority for entry points.""" - priority = { - "primary": 1, "secondary": 2, "tertiary": 3, - "unclassified": 4, "residential": 5, "service": 6, - "track": 7, "path": 8, "footway": 9, "bridleway": 10 - } - return priority.get(highway_class, 99) - - def close(self): - if self._conn and not self._conn.closed: - self._conn.close() - self._conn = None - - -class OffrouteRouter: - """ - OFFROUTE Router — orchestrates wilderness pathfinding and Valhalla stitching. - - Supports four scenarios: - A: off-network start → on-network end - B: off-network start → off-network end - C: on-network start → off-network end - D: on-network start → on-network end (pure Valhalla) - - IMPORTANT: Wilderness segment ALWAYS uses foot mode for pathfinding. - User's mode affects entry point selection and Valhalla costing only. - """ - - def __init__(self): - self.dem_reader = None - self.friction_reader = None - self.barrier_reader = None - self.wilderness_reader = None - self.trail_reader = None - self.entry_index = EntryPointIndex() - - def _init_readers(self): - """Lazy init readers.""" - if self.dem_reader is None: - self.dem_reader = DEMReader() - if self.friction_reader is None: - self.friction_reader = FrictionReader() - if self.barrier_reader is None: - self.barrier_reader = BarrierReader() - if self.wilderness_reader is None and DEFAULT_WILDERNESS_PATH.exists(): - self.wilderness_reader = WildernessReader() - if self.trail_reader is None: - self.trail_reader = TrailReader() - - def _locate_on_network(self, lat: float, lon: float, mode: str) -> Dict: - """ - Check if a point is on the routable network using Valhalla's /locate. - - Returns: - { - "on_network": bool, - "snap_distance_m": float, - "snapped_lat": float, - "snapped_lon": float - } - """ - costing = MODE_TO_COSTING.get(mode, "pedestrian") - try: - resp = requests.post( - f"{VALHALLA_URL}/locate", - json={"locations": [{"lat": lat, "lon": lon}], "costing": costing}, - timeout=10 - ) - - if resp.status_code == 200: - data = resp.json() - if data and len(data) > 0 and data[0].get("edges"): - edge = data[0]["edges"][0] - snap_lat = edge.get("correlated_lat", lat) - snap_lon = edge.get("correlated_lon", lon) - snap_dist = haversine_distance(lat, lon, snap_lat, snap_lon) - return { - "on_network": snap_dist <= OFF_NETWORK_THRESHOLD_M, - "snap_distance_m": snap_dist, - "snapped_lat": snap_lat, - "snapped_lon": snap_lon - } - except Exception: - pass - - return { - "on_network": False, - "snap_distance_m": float('inf'), - "snapped_lat": lat, - "snapped_lon": lon - } - - def route( - self, - start_lat: float, - start_lon: float, - end_lat: float, - end_lon: float, - mode: Literal["foot", "mtb", "atv", "vehicle"] = "foot", - boundary_mode: Literal["strict", "pragmatic", "emergency"] = "pragmatic" - ) -> Dict: - """ - Route between two points, handling all four scenarios. - - Scenarios: - A: off-network start → on-network end (wilderness then network) - B: off-network start → off-network end (wilderness, network, wilderness) - C: on-network start → off-network end (network then wilderness) - D: on-network start → on-network end (pure network) - - Args: - start_lat, start_lon: Starting coordinates - end_lat, end_lon: Destination coordinates - mode: Travel mode (foot, mtb, atv, vehicle) - boundary_mode: How to handle private land (strict, pragmatic, emergency) - - Returns a GeoJSON FeatureCollection with route segments. - """ - if mode not in MODE_TO_COSTING: - return {"status": "error", "message": f"Unknown mode: {mode}"} - - # Detect network status for both endpoints - start_status = self._locate_on_network(start_lat, start_lon, mode) - end_status = self._locate_on_network(end_lat, end_lon, mode) - - start_off_network = not start_status["on_network"] - end_off_network = not end_status["on_network"] - - # Dispatch to appropriate handler - if not start_off_network and not end_off_network: - # Scenario D: on-network → on-network (pure Valhalla) - return self._route_D_network_only( - start_lat, start_lon, end_lat, end_lon, mode - ) - elif not start_off_network and end_off_network: - # Scenario C: on-network → off-network - return self._route_C_network_to_wilderness( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - elif start_off_network and not end_off_network: - # Scenario A: off-network → on-network - return self._route_A_wilderness_to_network( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - else: - # Scenario B: off-network → off-network - return self._route_B_wilderness_both( - start_lat, start_lon, end_lat, end_lon, mode, boundary_mode - ) - - def _route_D_network_only( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str - ) -> Dict: - """ - Scenario D: Both endpoints on-network. Pure Valhalla routing. - """ - t0 = time.time() - costing = MODE_TO_COSTING.get(mode, "pedestrian") - - valhalla_request = { - "locations": [ - {"lat": start_lat, "lon": start_lon}, - {"lat": end_lat, "lon": end_lon} - ], - "costing": costing, - "directions_options": {"units": "kilometers"} - } - - try: - resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) - - if resp.status_code != 200: - return { - "status": "error", - "message": f"Network routing failed: {resp.text[:200]}" - } - - valhalla_data = resp.json() - trip = valhalla_data.get("trip", {}) - legs = trip.get("legs", []) - - if not legs: - return {"status": "error", "message": "No route found"} - - leg = legs[0] - shape = leg.get("shape", "") - network_coords = self._decode_polyline(shape) - - maneuvers = [] - for m in leg.get("maneuvers", []): - maneuvers.append({ - "instruction": m.get("instruction", ""), - "type": m.get("type", 0), - "distance_km": m.get("length", 0), - "time_seconds": m.get("time", 0), - "street_names": m.get("street_names", []), - }) - - summary = trip.get("summary", {}) - distance_km = summary.get("length", 0) - duration_min = summary.get("time", 0) / 60 - - # Build response in same format as wilderness routes - network_feature = { - "type": "Feature", - "properties": { - "segment_type": "network", - "distance_km": distance_km, - "duration_minutes": duration_min, - "maneuvers": maneuvers, - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_coords} - } - - combined_feature = { - "type": "Feature", - "properties": { - "segment_type": "combined", - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_coords} - } - - geojson = {"type": "FeatureCollection", "features": [network_feature, combined_feature]} - - result = { - "status": "ok", - "route": geojson, - "summary": { - "total_distance_km": float(distance_km), - "total_effort_minutes": float(duration_min), - "wilderness_distance_km": 0.0, - "wilderness_effort_minutes": 0.0, - "network_distance_km": float(distance_km), - "network_duration_minutes": float(duration_min), - "on_trail_pct": 100.0, - "barrier_crossings": 0, - "network_mode": mode, - "scenario": "D", - "computation_time_s": time.time() - t0, - } - } - return result - - except Exception as e: - return {"status": "error", "message": f"Network routing failed: {e}"} - - def _route_A_wilderness_to_network( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario A: Off-network start → on-network end. - Wilderness pathfinding from start to entry point, then Valhalla to end. - """ - t0 = time.time() - - # Ensure entry point index exists - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - # Get valid highway classes for this mode - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - - # Find entry points near start, filtered by mode - MAX_ENTRY_POINTS = 10 - entry_points = self.entry_index.query_radius( - start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - - if not entry_points: - entry_points = self.entry_index.query_radius( - start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points: - if mode == "vehicle": - msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try a different mode." - elif mode in ("mtb", "atv"): - msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km. Try foot mode." - else: - msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of start." - return {"status": "error", "message": msg} - - entry_points = entry_points[:MAX_ENTRY_POINTS] - - # Run wilderness pathfinding - wilderness_result = self._pathfind_wilderness( - start_lat, start_lon, end_lat, end_lon, - entry_points, boundary_mode, "start" - ) - - if wilderness_result.get("status") == "error": - return wilderness_result - - # Extract results - wilderness_coords = wilderness_result["coords"] - wilderness_stats = wilderness_result["stats"] - wilderness_elevations = wilderness_result.get("elevations", []) - best_entry = wilderness_result["entry_point"] - - entry_lat = best_entry["lat"] - entry_lon = best_entry["lon"] - - # Call Valhalla from entry point to destination - network_result = self._valhalla_route(entry_lat, entry_lon, end_lat, end_lon, mode) - - # Build response - return self._build_response( - wilderness_start=wilderness_coords, - wilderness_start_stats=wilderness_stats, - wilderness_start_elevations=wilderness_elevations, - network_segment=network_result.get("segment"), - wilderness_end=None, - wilderness_end_stats=None, - wilderness_end_elevations=None, - mode=mode, - boundary_mode=boundary_mode, - entry_start=best_entry, - entry_end=None, - scenario="A", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _route_C_network_to_wilderness( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario C: On-network start → off-network end. - Valhalla from start to entry point, then wilderness pathfinding to end. - """ - t0 = time.time() - - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - - # Find entry points near END (destination) - MAX_ENTRY_POINTS = 10 - entry_points = self.entry_index.query_radius( - end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - - if not entry_points: - entry_points = self.entry_index.query_radius( - end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points: - if mode == "vehicle": - msg = f"No roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try a different mode." - elif mode in ("mtb", "atv"): - msg = f"No tracks or roads found within {EXPANDED_SEARCH_RADIUS_KM}km of destination. Try foot mode." - else: - msg = f"No trail entry points found within {EXPANDED_SEARCH_RADIUS_KM}km of destination." - return {"status": "error", "message": msg} - - entry_points = entry_points[:MAX_ENTRY_POINTS] - - # Run wilderness pathfinding FROM END toward entry points - wilderness_result = self._pathfind_wilderness( - end_lat, end_lon, start_lat, start_lon, - entry_points, boundary_mode, "end" - ) - - if wilderness_result.get("status") == "error": - return wilderness_result - - # The path is from end→entry, reverse it for display (entry→end) - wilderness_coords = list(reversed(wilderness_result["coords"])) - wilderness_stats = wilderness_result["stats"] - wilderness_elevations = list(reversed(wilderness_result.get("elevations", []))) - best_entry = wilderness_result["entry_point"] - - entry_lat = best_entry["lat"] - entry_lon = best_entry["lon"] - - # Call Valhalla from start to entry point - network_result = self._valhalla_route(start_lat, start_lon, entry_lat, entry_lon, mode) - - # Build response (network first, then wilderness) - return self._build_response( - wilderness_start=None, - wilderness_start_stats=None, - wilderness_start_elevations=None, - network_segment=network_result.get("segment"), - wilderness_end=wilderness_coords, - wilderness_end_stats=wilderness_stats, - wilderness_end_elevations=wilderness_elevations, - mode=mode, - boundary_mode=boundary_mode, - entry_start=None, - entry_end=best_entry, - scenario="C", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _route_B_wilderness_both( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str, boundary_mode: str - ) -> Dict: - """ - Scenario B: Off-network start → off-network end. - Wilderness from start to entry_A, Valhalla entry_A to entry_B, wilderness from entry_B to end. - """ - t0 = time.time() - - if not self.entry_index.table_exists() or self.entry_index.get_entry_point_count() == 0: - return { - "status": "error", - "message": "Trail entry point index not built. Run build_entry_index() first." - } - - valid_highways = MODE_TO_VALID_HIGHWAYS.get(mode) - MAX_ENTRY_POINTS = 10 - - # Find entry points near START - entry_points_start = self.entry_index.query_radius( - start_lat, start_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_start: - entry_points_start = self.entry_index.query_radius( - start_lat, start_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_start: - return {"status": "error", "message": f"No entry points found near start within {EXPANDED_SEARCH_RADIUS_KM}km."} - entry_points_start = entry_points_start[:MAX_ENTRY_POINTS] - - # Find entry points near END - entry_points_end = self.entry_index.query_radius( - end_lat, end_lon, DEFAULT_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_end: - entry_points_end = self.entry_index.query_radius( - end_lat, end_lon, EXPANDED_SEARCH_RADIUS_KM, valid_highways - ) - if not entry_points_end: - return {"status": "error", "message": f"No entry points found near destination within {EXPANDED_SEARCH_RADIUS_KM}km."} - entry_points_end = entry_points_end[:MAX_ENTRY_POINTS] - - # Phase 1: Wilderness pathfinding from START - wilderness_start_result = self._pathfind_wilderness( - start_lat, start_lon, end_lat, end_lon, - entry_points_start, boundary_mode, "start" - ) - - if wilderness_start_result.get("status") == "error": - return wilderness_start_result - - wilderness_start_coords = wilderness_start_result["coords"] - wilderness_start_stats = wilderness_start_result["stats"] - wilderness_start_elevations = wilderness_start_result.get("elevations", []) - entry_A = wilderness_start_result["entry_point"] - - # Phase 2: Wilderness pathfinding from END (run after freeing phase 1 memory) - wilderness_end_result = self._pathfind_wilderness( - end_lat, end_lon, start_lat, start_lon, - entry_points_end, boundary_mode, "end" - ) - - if wilderness_end_result.get("status") == "error": - return wilderness_end_result - - # Reverse the end wilderness path (it's end→entry, we want entry→end for display) - wilderness_end_coords = list(reversed(wilderness_end_result["coords"])) - wilderness_end_stats = wilderness_end_result["stats"] - wilderness_end_elevations = list(reversed(wilderness_end_result.get("elevations", []))) - entry_B = wilderness_end_result["entry_point"] - - # Phase 3: Valhalla from entry_A to entry_B - network_result = self._valhalla_route( - entry_A["lat"], entry_A["lon"], - entry_B["lat"], entry_B["lon"], - mode - ) - - # Build response - return self._build_response( - wilderness_start=wilderness_start_coords, - wilderness_start_stats=wilderness_start_stats, - wilderness_start_elevations=wilderness_start_elevations, - network_segment=network_result.get("segment"), - wilderness_end=wilderness_end_coords, - wilderness_end_stats=wilderness_end_stats, - wilderness_end_elevations=wilderness_end_elevations, - mode=mode, - boundary_mode=boundary_mode, - entry_start=entry_A, - entry_end=entry_B, - scenario="B", - t0=t0, - valhalla_error=network_result.get("error") - ) - - def _pathfind_wilderness( - self, - origin_lat: float, origin_lon: float, - dest_lat: float, dest_lon: float, - entry_points: List[Dict], - boundary_mode: str, - label: str - ) -> Dict: - """ - Run MCP wilderness pathfinding from origin toward entry points. - - Args: - origin_lat, origin_lon: Starting point for pathfinding - dest_lat, dest_lon: Ultimate destination (for bbox calculation) - entry_points: List of candidate entry points - boundary_mode: How to handle barriers - label: "start" or "end" for error messages - - Returns: - {"status": "ok", "coords": [...], "stats": {...}, "entry_point": {...}} - or {"status": "error", "message": "..."} - """ - # Build bbox - only include origin and entry points, NOT distant destination - # The destination is handled by Valhalla, wilderness only needs to reach entry points - MAX_BBOX_DEGREES = 2.0 - all_lats = [origin_lat] + [p["lat"] for p in entry_points] - all_lons = [origin_lon] + [p["lon"] for p in entry_points] - - padding = 0.05 - bbox = { - "south": min(all_lats) - padding, - "north": max(all_lats) + padding, - "west": min(all_lons) - padding, - "east": max(all_lons) + padding, - } - - # Clamp bbox size, centering on origin - lat_span = bbox["north"] - bbox["south"] - lon_span = bbox["east"] - bbox["west"] - if lat_span > MAX_BBOX_DEGREES or lon_span > MAX_BBOX_DEGREES: - half_span = MAX_BBOX_DEGREES / 2 - bbox = { - "south": origin_lat - half_span, - "north": origin_lat + half_span, - "west": origin_lon - half_span, - "east": origin_lon + half_span, - } - - # Initialize readers - self._init_readers() - - # Load elevation - try: - elevation, meta = self.dem_reader.get_elevation_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - ) - except Exception as e: - return {"status": "error", "message": f"Failed to load elevation for {label}: {e}"} - - # Check memory - mem = check_memory_usage() - if mem > MEMORY_LIMIT_GB: - return {"status": "error", "message": f"Memory limit exceeded: {mem:.1f}GB > {MEMORY_LIMIT_GB}GB"} - - # Load friction - friction_raw = self.friction_reader.get_friction_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - friction_mult = friction_to_multiplier(friction_raw) - - # Load barriers - barriers = self.barrier_reader.get_barrier_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - - # Load trails - trails = self.trail_reader.get_trails_grid( - south=bbox["south"], north=bbox["north"], - west=bbox["west"], east=bbox["east"], - target_shape=elevation.shape - ) - - # Compute cost grid (ALWAYS foot mode for wilderness) - cost = compute_cost_grid( - elevation, - cell_size_m=meta["cell_size_m"], - friction=friction_mult, - friction_raw=friction_raw, - trails=trails, - barriers=barriers, - wilderness=None, - mvum=None, - boundary_mode=boundary_mode, - mode="foot", - ) - - # Free intermediate arrays - del friction_mult, friction_raw - gc.collect() - - # Convert origin to pixel coordinates - origin_row, origin_col = self.dem_reader.latlon_to_pixel(origin_lat, origin_lon, meta) - - rows, cols = elevation.shape - if not (0 <= origin_row < rows and 0 <= origin_col < cols): - return {"status": "error", "message": f"{label.capitalize()} point outside grid bounds"} - - # Map entry points to pixels - entry_pixels = [] - for ep in entry_points: - row, col = self.dem_reader.latlon_to_pixel(ep["lat"], ep["lon"], meta) - if 0 <= row < rows and 0 <= col < cols: - entry_pixels.append({"row": row, "col": col, "entry_point": ep}) - - if not entry_pixels: - return {"status": "error", "message": f"No entry points map to grid bounds for {label}"} - - # Run MCP - mcp = MCP_Geometric(cost, fully_connected=True) - cumulative_costs, traceback = mcp.find_costs([(origin_row, origin_col)]) - - # Find nearest reachable entry point - best_entry = None - best_cost = np.inf - - for ep in entry_pixels: - ep_cost = cumulative_costs[ep["row"], ep["col"]] - if ep_cost < best_cost: - best_cost = ep_cost - best_entry = ep - - if best_entry is None or np.isinf(best_cost): - return { - "status": "error", - "message": f"No path found from {label} to any entry point (blocked by impassable terrain)" - } - - # Traceback path - path_indices = mcp.traceback((best_entry["row"], best_entry["col"])) - - # Convert to coordinates and collect stats - coords = [] - elevations = [] - trail_values = [] - barrier_crossings = 0 - - for row, col in path_indices: - lat, lon = self.dem_reader.pixel_to_latlon(row, col, meta) - coords.append([lon, lat]) - elevations.append(elevation[row, col]) - trail_values.append(trails[row, col]) - if barriers[row, col] == 255: - barrier_crossings += 1 - - # Calculate distance - distance_m = 0 - for i in range(1, len(coords)): - lon1, lat1 = coords[i-1] - lon2, lat2 = coords[i] - distance_m += haversine_distance(lat1, lon1, lat2, lon2) - - # Elevation stats - elev_arr = np.array(elevations) - elev_diff = np.diff(elev_arr) - elev_gain = float(np.sum(elev_diff[elev_diff > 0])) - elev_loss = float(np.sum(np.abs(elev_diff[elev_diff < 0]))) - - # Trail stats - trail_arr = np.array(trail_values) - on_trail_cells = np.sum(trail_arr > 0) - total_cells = len(trail_arr) - on_trail_pct = float(100 * on_trail_cells / total_cells) if total_cells > 0 else 0 - - # Free memory - del mcp, cumulative_costs, traceback, cost, trails, barriers, elevation - gc.collect() - - return { - "status": "ok", - "coords": coords, - "elevations": elevations, # Raw elevation values for maneuver generation - "stats": { - "distance_km": distance_m / 1000, - "effort_minutes": best_cost / 60, - "elevation_gain_m": elev_gain, - "elevation_loss_m": elev_loss, - "on_trail_pct": on_trail_pct, - "barrier_crossings": barrier_crossings, - "cell_count": total_cells, - }, - "entry_point": best_entry["entry_point"] - } - - def _valhalla_route( - self, - start_lat: float, start_lon: float, - end_lat: float, end_lon: float, - mode: str - ) -> Dict: - """ - Call Valhalla for network routing. - - Returns: - {"segment": {...}, "error": None} on success - {"segment": None, "error": "..."} on failure - """ - costing = MODE_TO_COSTING.get(mode, "pedestrian") - - valhalla_request = { - "locations": [ - {"lat": start_lat, "lon": start_lon}, - {"lat": end_lat, "lon": end_lon} - ], - "costing": costing, - "directions_options": {"units": "kilometers"} - } - - try: - resp = requests.post(f"{VALHALLA_URL}/route", json=valhalla_request, timeout=30) - - if resp.status_code == 200: - valhalla_data = resp.json() - trip = valhalla_data.get("trip", {}) - legs = trip.get("legs", []) - - if legs: - leg = legs[0] - shape = leg.get("shape", "") - coords = self._decode_polyline(shape) - - maneuvers = [] - for m in leg.get("maneuvers", []): - maneuvers.append({ - "instruction": m.get("instruction", ""), - "type": m.get("type", 0), - "distance_km": m.get("length", 0), - "time_seconds": m.get("time", 0), - "street_names": m.get("street_names", []), - }) - - summary = trip.get("summary", {}) - return { - "segment": { - "coordinates": coords, - "distance_km": summary.get("length", 0), - "duration_minutes": summary.get("time", 0) / 60, - "maneuvers": maneuvers, - }, - "error": None - } - - return {"segment": None, "error": f"Valhalla returned {resp.status_code}: {resp.text[:200]}"} - - except Exception as e: - return {"segment": None, "error": f"Valhalla request failed: {e}"} - - def _generate_wilderness_maneuvers( - self, - coords: List[List[float]], - elevations: List[float], - position: str = "start" - ) -> List[Dict]: - """ - Generate turn-by-turn maneuvers for a wilderness segment. - - Segment breaks occur when: - - Bearing changes more than 30° from segment start - - Grade category changes (flat→steep etc) - - Distance exceeds 0.5 miles without a break - - Args: - coords: [[lon, lat], ...] coordinate list - elevations: Elevation values (meters) for each coord - position: "start" or "end" for labeling - - Returns: - List of maneuver dicts with instruction, distance, elevation, grade, bearing - """ - if not coords or len(coords) < 2: - return [] - - # Constants - COMPASS = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", - "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"] - MAX_SEGMENT_M = 804.672 # 0.5 miles in meters - BEARING_THRESHOLD = 30 # degrees - M_TO_FT = 3.28084 - M_TO_MI = 0.000621371 - - def get_bearing(lat1, lon1, lat2, lon2): - """Calculate bearing between two points (degrees 0-360).""" - dlon = math.radians(lon2 - lon1) - lat1_r, lat2_r = math.radians(lat1), math.radians(lat2) - x = math.sin(dlon) * math.cos(lat2_r) - y = math.cos(lat1_r) * math.sin(lat2_r) - math.sin(lat1_r) * math.cos(lat2_r) * math.cos(dlon) - return (math.degrees(math.atan2(x, y)) + 360) % 360 - - def bearing_to_cardinal(bearing): - """Convert bearing to 16-point compass direction.""" - return COMPASS[round(bearing / 22.5) % 16] - - def get_grade_category(grade_deg): - """Categorize grade angle: flat (0-2°), gentle (2-5°), moderate (5-10°), steep (10-15°), very steep (15°+).""" - grade_abs = abs(grade_deg) - if grade_abs < 2: - return "flat" - elif grade_abs < 5: - return "gentle" - elif grade_abs < 10: - return "moderate" - elif grade_abs < 15: - return "steep" - else: - return "very steep" - - def format_distance(meters): - """Format distance: feet with commas if under 1 mile, miles with one decimal if over.""" - miles = meters * M_TO_MI - if miles < 1.0: - feet = round(meters * M_TO_FT) - return f"{feet:,} ft" - else: - return f"{miles:.1f} mi" - - def build_instruction(cardinal, gain_ft, loss_ft, grade_cat, distance_m): - """Build instruction string per spec.""" - dist_str = format_distance(distance_m) - if grade_cat == "flat": - return f"Head {cardinal} on level ground — {dist_str}" - elif gain_ft > loss_ft: - return f"Head {cardinal}, gaining {gain_ft:,} ft ({grade_cat} uphill) — {dist_str}" - else: - return f"Head {cardinal}, descending {loss_ft:,} ft ({grade_cat} downhill) — {dist_str}" - - maneuvers = [] - i = 0 - - while i < len(coords) - 1: - seg_start_idx = i - seg_start_lon, seg_start_lat = coords[i] - seg_start_elev = elevations[i] if i < len(elevations) else 0 - - # Initial bearing for this segment - next_lon, next_lat = coords[i + 1] - seg_bearing = get_bearing(seg_start_lat, seg_start_lon, next_lat, next_lon) - - # Accumulate elevation changes within segment - seg_distance_m = 0 - seg_elev_gain = 0 - seg_elev_loss = 0 - prev_elev = seg_start_elev - - # Calculate initial grade category - step_dist = haversine_distance(seg_start_lat, seg_start_lon, next_lat, next_lon) - step_elev_change = (elevations[i + 1] if i + 1 < len(elevations) else seg_start_elev) - seg_start_elev - initial_grade = math.degrees(math.atan(step_elev_change / step_dist)) if step_dist > 0 else 0 - seg_grade_cat = get_grade_category(initial_grade) - - j = i - while j < len(coords) - 1: - lon1, lat1 = coords[j] - lon2, lat2 = coords[j + 1] - elev1 = elevations[j] if j < len(elevations) else prev_elev - elev2 = elevations[j + 1] if j + 1 < len(elevations) else elev1 - - step_dist = haversine_distance(lat1, lon1, lat2, lon2) - step_bearing = get_bearing(lat1, lon1, lat2, lon2) - step_elev_change = elev2 - elev1 - step_grade = math.degrees(math.atan(step_elev_change / step_dist)) if step_dist > 0 else 0 - step_grade_cat = get_grade_category(step_grade) - - # Check break conditions - bearing_diff = abs(step_bearing - seg_bearing) - if bearing_diff > 180: - bearing_diff = 360 - bearing_diff - - # Break if: bearing changed >30°, grade category changed, or distance >0.5mi - if seg_distance_m > 0: # Don't break on first step - if bearing_diff > BEARING_THRESHOLD: - break - if step_grade_cat != seg_grade_cat: - break - if seg_distance_m >= MAX_SEGMENT_M: - break - - # Accumulate - seg_distance_m += step_dist - if step_elev_change > 0: - seg_elev_gain += step_elev_change - else: - seg_elev_loss += abs(step_elev_change) - prev_elev = elev2 - j += 1 - - # Compute segment stats - seg_end_idx = j - gain_ft = round(seg_elev_gain * M_TO_FT) - loss_ft = round(seg_elev_loss * M_TO_FT) - - # Net elevation change for grade calculation - net_elev_change = seg_elev_gain - seg_elev_loss - grade_deg = math.degrees(math.atan(net_elev_change / seg_distance_m)) if seg_distance_m > 0 else 0 - grade_cat = get_grade_category(grade_deg) - - cardinal = bearing_to_cardinal(seg_bearing) - instruction = build_instruction(cardinal, gain_ft, loss_ft, grade_cat, seg_distance_m) - - maneuvers.append({ - "instruction": instruction, - "type": "wilderness", - "distance_m": round(seg_distance_m, 1), - "elevation_gain_ft": gain_ft, - "elevation_loss_ft": loss_ft, - "grade_degrees": round(grade_deg, 1), - "grade_category": grade_cat, - "bearing": round(seg_bearing, 1), - "cardinal": cardinal, - }) - - i = seg_end_idx - - # Add arrival maneuver - arrival_text = "Arrive at trail/road" if position == "start" else "Arrive at destination" - last_bearing = maneuvers[-1]["bearing"] if maneuvers else 0 - last_cardinal = maneuvers[-1]["cardinal"] if maneuvers else "N" - - maneuvers.append({ - "instruction": arrival_text, - "type": "arrival", - "distance_m": 0, - "elevation_gain_ft": 0, - "elevation_loss_ft": 0, - "grade_degrees": 0, - "grade_category": "flat", - "bearing": last_bearing, - "cardinal": last_cardinal, - }) - - return maneuvers - - def _build_response( - self, - wilderness_start: Optional[List], - wilderness_start_stats: Optional[Dict], - wilderness_start_elevations: Optional[List], - network_segment: Optional[Dict], - wilderness_end: Optional[List], - wilderness_end_stats: Optional[Dict], - wilderness_end_elevations: Optional[List], - mode: str, - boundary_mode: str, - entry_start: Optional[Dict], - entry_end: Optional[Dict], - scenario: str, - t0: float, - valhalla_error: Optional[str] - ) -> Dict: - """Build the final GeoJSON response.""" - features = [] - - # Wilderness start segment - if wilderness_start and wilderness_start_stats: - wild_start_maneuvers = [] - if wilderness_start_elevations: - wild_start_maneuvers = self._generate_wilderness_maneuvers( - wilderness_start, wilderness_start_elevations, position="start" - ) - features.append({ - "type": "Feature", - "properties": { - "segment_type": "wilderness", - "segment_position": "start", - "effort_minutes": float(wilderness_start_stats["effort_minutes"]), - "distance_km": float(wilderness_start_stats["distance_km"]), - "elevation_gain_m": wilderness_start_stats["elevation_gain_m"], - "elevation_loss_m": wilderness_start_stats["elevation_loss_m"], - "boundary_mode": boundary_mode, - "on_trail_pct": wilderness_start_stats["on_trail_pct"], - "barrier_crossings": wilderness_start_stats["barrier_crossings"], - "wilderness_mode": "foot", - "maneuvers": wild_start_maneuvers, - }, - "geometry": {"type": "LineString", "coordinates": wilderness_start} - }) - - # Network segment - if network_segment: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "network", - "distance_km": network_segment["distance_km"], - "duration_minutes": network_segment["duration_minutes"], - "maneuvers": network_segment["maneuvers"], - "network_mode": mode, - }, - "geometry": {"type": "LineString", "coordinates": network_segment["coordinates"]} - }) - - # Wilderness end segment - if wilderness_end and wilderness_end_stats: - wild_end_maneuvers = [] - if wilderness_end_elevations: - wild_end_maneuvers = self._generate_wilderness_maneuvers( - wilderness_end, wilderness_end_elevations, position="end" - ) - features.append({ - "type": "Feature", - "properties": { - "segment_type": "wilderness", - "segment_position": "end", - "effort_minutes": float(wilderness_end_stats["effort_minutes"]), - "distance_km": float(wilderness_end_stats["distance_km"]), - "elevation_gain_m": wilderness_end_stats["elevation_gain_m"], - "elevation_loss_m": wilderness_end_stats["elevation_loss_m"], - "boundary_mode": boundary_mode, - "on_trail_pct": wilderness_end_stats["on_trail_pct"], - "barrier_crossings": wilderness_end_stats["barrier_crossings"], - "wilderness_mode": "foot", - "maneuvers": wild_end_maneuvers, - }, - "geometry": {"type": "LineString", "coordinates": wilderness_end} - }) - - # Combined path - combined_coords = [] - if wilderness_start: - combined_coords.extend(wilderness_start) - if network_segment: - # Skip first coord if we already have wilderness_start (avoid duplicate) - start_idx = 1 if wilderness_start else 0 - combined_coords.extend(network_segment["coordinates"][start_idx:]) - if wilderness_end: - # Skip first coord (avoid duplicate with network end) - start_idx = 1 if (wilderness_start or network_segment) else 0 - combined_coords.extend(wilderness_end[start_idx:]) - - if combined_coords: - features.append({ - "type": "Feature", - "properties": { - "segment_type": "combined", - "wilderness_mode": "foot", - "network_mode": mode, - "boundary_mode": boundary_mode, - "scenario": scenario, - }, - "geometry": {"type": "LineString", "coordinates": combined_coords} - }) - - geojson = {"type": "FeatureCollection", "features": features} - - # Calculate totals - total_distance_km = 0.0 - total_effort_minutes = 0.0 - wilderness_distance_km = 0.0 - wilderness_effort_minutes = 0.0 - network_distance_km = 0.0 - network_duration_minutes = 0.0 - barrier_crossings = 0 - on_trail_pct = 0.0 - - if wilderness_start_stats: - wilderness_distance_km += wilderness_start_stats["distance_km"] - wilderness_effort_minutes += wilderness_start_stats["effort_minutes"] - barrier_crossings += wilderness_start_stats["barrier_crossings"] - on_trail_pct = wilderness_start_stats["on_trail_pct"] - - if wilderness_end_stats: - wilderness_distance_km += wilderness_end_stats["distance_km"] - wilderness_effort_minutes += wilderness_end_stats["effort_minutes"] - barrier_crossings += wilderness_end_stats["barrier_crossings"] - # Average on-trail percentage if we have both - if wilderness_start_stats: - on_trail_pct = (on_trail_pct + wilderness_end_stats["on_trail_pct"]) / 2 - else: - on_trail_pct = wilderness_end_stats["on_trail_pct"] - - if network_segment: - network_distance_km = network_segment["distance_km"] - network_duration_minutes = network_segment["duration_minutes"] - - total_distance_km = wilderness_distance_km + network_distance_km - total_effort_minutes = wilderness_effort_minutes + network_duration_minutes - - summary = { - "total_distance_km": float(total_distance_km), - "total_effort_minutes": float(total_effort_minutes), - "wilderness_distance_km": float(wilderness_distance_km), - "wilderness_effort_minutes": float(wilderness_effort_minutes), - "network_distance_km": float(network_distance_km), - "network_duration_minutes": float(network_duration_minutes), - "on_trail_pct": float(on_trail_pct), - "barrier_crossings": barrier_crossings, - "boundary_mode": boundary_mode, - "wilderness_mode": "foot", - "network_mode": mode, - "scenario": scenario, - "computation_time_s": time.time() - t0, - } - - if entry_start: - summary["entry_point_start"] = { - "lat": entry_start["lat"], - "lon": entry_start["lon"], - "highway_class": entry_start["highway_class"], - "name": entry_start.get("name", ""), - } - - if entry_end: - summary["entry_point_end"] = { - "lat": entry_end["lat"], - "lon": entry_end["lon"], - "highway_class": entry_end["highway_class"], - "name": entry_end.get("name", ""), - } - - result = {"status": "ok", "route": geojson, "summary": summary} - - if valhalla_error: - result["warning"] = f"Network segment incomplete: {valhalla_error}" - - return result - - def _decode_polyline(self, encoded: str, precision: int = 6) -> List[List[float]]: - """Decode a polyline string into coordinates [lon, lat].""" - coords = [] - index = 0 - lat = 0 - lon = 0 - - while index < len(encoded): - shift = 0 - result = 0 - while True: - b = ord(encoded[index]) - 63 - index += 1 - result |= (b & 0x1f) << shift - shift += 5 - if b < 0x20: - break - dlat = ~(result >> 1) if result & 1 else result >> 1 - lat += dlat - - shift = 0 - result = 0 - while True: - b = ord(encoded[index]) - 63 - index += 1 - result |= (b & 0x1f) << shift - shift += 5 - if b < 0x20: - break - dlon = ~(result >> 1) if result & 1 else result >> 1 - lon += dlon - - coords.append([lon / (10 ** precision), lat / (10 ** precision)]) - - return coords - - def close(self): - """Close all readers.""" - if self.dem_reader: - self.dem_reader.close() - if self.friction_reader: - self.friction_reader.close() - if self.barrier_reader: - self.barrier_reader.close() - if self.wilderness_reader: - self.wilderness_reader.close() - if self.trail_reader: - self.trail_reader.close() - self.entry_index.close() - - -def build_entry_index(): - """Build the trail entry point index.""" - index = EntryPointIndex() - stats = index.build_index() - index.close() - return stats - - -if __name__ == "__main__": - import sys - - if len(sys.argv) > 1 and sys.argv[1] == "build": - print("Building trail entry point index...") - stats = build_entry_index() - print(f"\nDone. Total entry points: {stats['total']}") - - elif len(sys.argv) > 1 and sys.argv[1] == "test": - print("Testing router (all scenarios)...") - print("=" * 60) - - router = OffrouteRouter() - - # Test points - wilderness_start = (44.0543, -115.4237) # Off-network - wilderness_end = (45.2, -115.5) # Deep wilderness (Frank Church) - road_start = (43.6150, -116.2023) # Boise downtown (on-network) - road_end = (43.5867, -116.5625) # Nampa (on-network) - - tests = [ - ("A: wilderness→road", wilderness_start, (44.0814, -115.5021)), - ("B: wilderness→wilderness", wilderness_start, wilderness_end), - ("C: road→wilderness", road_start, wilderness_start), - ("D: road→road", road_start, road_end), - ] - - for label, (slat, slon), (elat, elon) in tests: - print(f"\n{label}") - print("-" * 40) - - result = router.route( - start_lat=slat, start_lon=slon, - end_lat=elat, end_lon=elon, - mode="foot", boundary_mode="pragmatic" - ) - - if result["status"] == "ok": - s = result["summary"] - print(f" Scenario: {s.get('scenario', '?')}") - print(f" Total: {s['total_distance_km']:.2f} km, {s['total_effort_minutes']:.1f} min") - print(f" Wilderness: {s['wilderness_distance_km']:.2f} km") - print(f" Network: {s['network_distance_km']:.2f} km") - if s.get('entry_point_start'): - ep = s['entry_point_start'] - print(f" Entry (start): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") - if s.get('entry_point_end'): - ep = s['entry_point_end'] - print(f" Entry (end): {ep['highway_class']} at {ep['lat']:.4f}, {ep['lon']:.4f}") - else: - print(f" ERROR: {result['message']}") - - router.close() - - else: - print("Usage:") - print(" python router.py build # Build entry point index") - print(" python router.py test # Test all scenarios") diff --git a/lib/offroute/trails.py b/lib/offroute/trails.py deleted file mode 100644 index 9d9185e..0000000 --- a/lib/offroute/trails.py +++ /dev/null @@ -1,174 +0,0 @@ -""" -Trail corridor reader for OFFROUTE. - -Provides access to the OSM-derived trail raster for pathfinding. -Trail values replace WorldCover friction where trails exist. - -Raster values: - 0 = no trail (use WorldCover friction) - 5 = road (0.1× friction) - 15 = track (0.3× friction) - 25 = foot trail (0.5× friction) -""" -import numpy as np -from pathlib import Path -from typing import Tuple, Optional - -try: - import rasterio - from rasterio.windows import from_bounds - from rasterio.enums import Resampling -except ImportError: - raise ImportError("rasterio is required for trails layer support") - -# Default path to the trails raster -DEFAULT_TRAILS_PATH = Path("/mnt/nav/worldcover/trails.tif") - -# Trail value to friction multiplier mapping -TRAIL_FRICTION_MAP = { - 5: 0.1, # road - 15: 0.3, # track - 25: 0.5, # foot trail -} - - -class TrailReader: - """Reader for OSM-derived trail corridor raster.""" - - def __init__(self, trails_path: Path = DEFAULT_TRAILS_PATH): - self.trails_path = trails_path - self._dataset = None - - def _open(self): - """Lazy open the dataset.""" - if self._dataset is None: - if not self.trails_path.exists(): - raise FileNotFoundError( - f"Trails raster not found at {self.trails_path}. " - f"Run the Phase B rasterization script first." - ) - self._dataset = rasterio.open(self.trails_path) - return self._dataset - - def get_trails_grid( - self, - south: float, - north: float, - west: float, - east: float, - target_shape: Tuple[int, int] - ) -> np.ndarray: - """ - Get trail values for a bounding box, resampled to target shape. - - Args: - south, north, west, east: Bounding box coordinates (WGS84) - target_shape: (rows, cols) to resample to (matches elevation grid) - - Returns: - np.ndarray of uint8 trail values: - 0 = no trail - 5 = road (0.1× friction) - 15 = track (0.3× friction) - 25 = foot trail (0.5× friction) - """ - ds = self._open() - - # Create a window from the bounding box - window = from_bounds(west, south, east, north, ds.transform) - - # Read with resampling to target shape - # Use nearest neighbor to preserve discrete values - trails = ds.read( - 1, - window=window, - out_shape=target_shape, - resampling=Resampling.nearest - ) - - return trails - - def sample_point(self, lat: float, lon: float) -> int: - """Sample trail value at a single point.""" - ds = self._open() - - # Get pixel coordinates - row, col = ds.index(lon, lat) - - # Check bounds - if row < 0 or row >= ds.height or col < 0 or col >= ds.width: - return 0 # Out of bounds = no trail - - # Read single pixel - window = rasterio.windows.Window(col, row, 1, 1) - value = ds.read(1, window=window) - return int(value[0, 0]) - - def close(self): - """Close the dataset.""" - if self._dataset is not None: - self._dataset.close() - self._dataset = None - - -def trails_to_friction(trails: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """ - Convert trail values to friction multipliers. - - Args: - trails: uint8 array of trail values (0, 5, 15, or 25) - - Returns: - Tuple of: - - friction: float32 array of friction multipliers - - has_trail: bool array indicating where trails exist - """ - friction = np.ones_like(trails, dtype=np.float32) - has_trail = trails > 0 - - # Apply friction values where trails exist - friction[trails == 5] = 0.1 # road - friction[trails == 15] = 0.3 # track - friction[trails == 25] = 0.5 # foot trail - - return friction, has_trail - - -if __name__ == "__main__": - print("Testing TrailReader...") - - if not DEFAULT_TRAILS_PATH.exists(): - print(f"Trails raster not found at {DEFAULT_TRAILS_PATH}") - print("Run Phase B rasterization first.") - exit(1) - - reader = TrailReader() - - # Test point sampling - Twin Falls downtown (should have roads) - test_lat, test_lon = 42.563, -114.461 - trail_value = reader.sample_point(test_lat, test_lon) - print(f"\nTwin Falls ({test_lat}, {test_lon}): trail value = {trail_value}") - label = {0: "no trail", 5: "road", 15: "track", 25: "trail"}.get(trail_value, "unknown") - print(f" Type: {label}") - - # Test grid read for test bbox - trails = reader.get_trails_grid( - south=42.21, north=42.60, west=-114.76, east=-113.79, - target_shape=(400, 1000) - ) - print(f"\nGrid test shape: {trails.shape}") - - unique, counts = np.unique(trails, return_counts=True) - print("Value distribution:") - for v, c in zip(unique, counts): - pct = 100 * c / trails.size - label = {0: "no trail", 5: "road", 15: "track", 25: "trail"}.get(v, f"unknown({v})") - print(f" {label}: {c:,} pixels ({pct:.2f}%)") - - # Test conversion to friction - friction, has_trail = trails_to_friction(trails) - print(f"\nTrail coverage: {100 * np.sum(has_trail) / trails.size:.2f}%") - print(f"Friction range (on trails): {friction[has_trail].min():.1f} - {friction[has_trail].max():.1f}") - - reader.close() - print("\nTrailReader test complete.") From adee6d5a69ff23b96ee247563c71807762bc7481 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 10:21:31 -0600 Subject: [PATCH 65/72] cleanup: remove dead place_detail orchestrator cluster + lib/google_places.py (post-PR-11 dead code) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #11 (cleanup #2) deleted the /api/place* HTTP handlers but left their orchestrator functions in lib/place_detail.py as dead code. Pre-flight for the original Task #27 (delete google_places.py) surfaced that _enrich_with_google is NOT a no-caller leaf — it's called by the unreachable get_place_detail. A full caller-graph trace showed ~90% of place_detail.py is dead orchestration. Scope expanded (Matt confirmed in chat) to remove the whole dead cluster: - lib/google_places.py (entire file) - place_detail.py: get_place_detail, get_place_by_wikidata, _enrich_with_google, _apply_google_data, _enrich_with_overture, _enrich_with_wiki_index, _enrich_wiki_links, _parse_nominatim, _parse_nominatim_address, _parse_overpass, _build_overpass_query, cache_get, cache_put, _get_db + their now-unused imports/constants (json, time, requests, osm_categories, NOMINATIM_URL, etc.) KEEP only lookup_wiki_index + _get_wiki_index_db (the wiki_enrich_api survivor path) — preserved byte-exact. Module docstring refreshed. Flagged separately (not touched): overture.py + osm_categories.py are now orphaned (only consumers were the deleted cluster); stale docstrings; the deployment_config.py:9 catalog comment. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/google_places.py | 397 -------------------- lib/place_detail.py | 877 +------------------------------------------ 2 files changed, 4 insertions(+), 1270 deletions(-) delete mode 100644 lib/google_places.py diff --git a/lib/google_places.py b/lib/google_places.py deleted file mode 100644 index 8272b81..0000000 --- a/lib/google_places.py +++ /dev/null @@ -1,397 +0,0 @@ -""" -Google Places (New) API client for tertiary enrichment. - -Searches for business POIs and fetches details (opening hours, phone, website) -when OSM + Overture data is incomplete. Uses field masks to minimize cost. - -API docs: https://developers.google.com/maps/documentation/places/web-service -""" -import json -import os -import sqlite3 -import time -from datetime import date, timezone, datetime - -import requests - -from .utils import setup_logging - -logger = setup_logging('recon.google_places') - -API_BASE = 'https://places.googleapis.com/v1' -DEFAULT_DAILY_CAP = 500 -REQUEST_TIMEOUT = 3 # seconds - -# Google day index → OSM abbreviation -_DAY_ABBR = ['Su', 'Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa'] - -_db_conn = None - - -def _get_db(): - """Return a module-level SQLite connection (lazy init).""" - global _db_conn - if _db_conn is not None: - return _db_conn - - db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') - db_path = os.path.join(db_dir, 'place_cache.db') - _db_conn = sqlite3.connect(db_path, check_same_thread=False) - _db_conn.execute("PRAGMA journal_mode=WAL") - _db_conn.execute("PRAGMA synchronous=NORMAL") - # Ensure google_api_calls table exists - _db_conn.execute(""" - CREATE TABLE IF NOT EXISTS google_api_calls ( - call_date TEXT PRIMARY KEY, - call_count INTEGER NOT NULL DEFAULT 0 - ) - """) - _db_conn.commit() - return _db_conn - - -def _get_api_key(): - """Return the Google Places API key from environment.""" - key = os.environ.get('GOOGLE_PLACES_API_KEY') - if not key: - logger.error("GOOGLE_PLACES_API_KEY not set in environment") - return key - - -def _get_daily_cap(): - """Return the daily API call cap (configurable via deployment config).""" - try: - from .deployment_config import get_deployment_config - config = get_deployment_config() - return config.get('google_places', {}).get('daily_cap', DEFAULT_DAILY_CAP) - except Exception: - return DEFAULT_DAILY_CAP - - -# ── Daily call counter ────────────────────────────────────────────────── - -def check_daily_cap(): - """Return True if under daily cap, False if limit reached.""" - db = _get_db() - today = date.today().isoformat() - row = db.execute( - "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) - ).fetchone() - current = row[0] if row else 0 - cap = _get_daily_cap() - if current >= cap: - logger.info(f"google_places: daily_cap_reached count={current} cap={cap}") - return False - return True - - -def get_daily_count(): - """Return today's API call count.""" - db = _get_db() - today = date.today().isoformat() - row = db.execute( - "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) - ).fetchone() - return row[0] if row else 0 - - -def increment_call_counter(): - """Atomically increment today's API call counter.""" - db = _get_db() - today = date.today().isoformat() - db.execute(""" - INSERT INTO google_api_calls (call_date, call_count) VALUES (?, 1) - ON CONFLICT(call_date) DO UPDATE SET call_count = call_count + 1 - """, (today,)) - db.commit() - - -def _set_daily_count_to_cap(): - """Set today's counter to the cap value (soft-stop on quota error).""" - db = _get_db() - today = date.today().isoformat() - cap = _get_daily_cap() - db.execute(""" - INSERT INTO google_api_calls (call_date, call_count) VALUES (?, ?) - ON CONFLICT(call_date) DO UPDATE SET call_count = ? - """, (today, cap, cap)) - db.commit() - - -# ── Google Places cache (on place_cache table) ───────────────────────── - -def cache_get_google(osm_type, osm_id): - """Return (google_place_id, google_data_dict) or (None, None).""" - db = _get_db() - row = db.execute( - "SELECT google_place_id, google_data FROM place_cache WHERE osm_type=? AND osm_id=?", - (osm_type, osm_id) - ).fetchone() - if row and row[0]: - data = None - if row[1]: - try: - data = json.loads(row[1]) - except (json.JSONDecodeError, TypeError): - pass - return row[0], data - return None, None - - -def cache_put_google(osm_type, osm_id, place_id, data): - """Store Google Places data for a cache entry (UPSERT on google columns).""" - db = _get_db() - now = int(time.time()) - db.execute(""" - INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at, google_place_id, google_data, google_fetched_at) - VALUES (?, ?, '', 'pending', 0, ?, ?, ?) - ON CONFLICT(osm_type, osm_id) DO UPDATE SET - google_place_id = excluded.google_place_id, - google_data = excluded.google_data, - google_fetched_at = excluded.google_fetched_at - """, (osm_type, osm_id, place_id, json.dumps(data) if data else None, now)) - db.commit() - - -# ── API calls ─────────────────────────────────────────────────────────── - -def search_place(name, lat, lon, radius_m=200): - """ - Search Google Places (New) for a business by name + location. - Returns the Google Place ID of the best match, or None. - """ - key = _get_api_key() - if not key: - return None - - if not check_daily_cap(): - return None - - try: - resp = requests.post( - f'{API_BASE}/places:searchText', - headers={ - 'Content-Type': 'application/json', - 'X-Goog-Api-Key': key, - 'X-Goog-FieldMask': 'places.id,places.displayName,places.location', - }, - json={ - 'textQuery': name, - 'locationBias': { - 'circle': { - 'center': {'latitude': lat, 'longitude': lon}, - 'radius': float(radius_m), - } - }, - 'maxResultCount': 1, - }, - timeout=REQUEST_TIMEOUT, - ) - - increment_call_counter() - - if resp.status_code == 429: - logger.warning("google_places: action=search place=%s result=rate_limited", name) - _set_daily_count_to_cap() - return None - - if resp.status_code == 403: - logger.error("google_places: action=search place=%s result=forbidden (invalid key?)", name) - return None - - if resp.status_code != 200: - logger.warning("google_places: action=search place=%s result=error status=%d", name, resp.status_code) - return None - - data = resp.json() - places = data.get('places', []) - if not places: - logger.info("google_places: action=search place=%s result=miss", name) - return None - - place_id = places[0].get('id') - display = places[0].get('displayName', {}).get('text', '?') - logger.info("google_places: action=search place=%s result=hit google_name=%s id=%s", name, display, place_id) - return place_id - - except requests.exceptions.Timeout: - logger.warning("google_places: action=search place=%s result=timeout", name) - return None - except Exception as e: - logger.error("google_places: action=search place=%s result=error err=%s", name, e) - return None - - -def get_place_details(place_id): - """ - Fetch details for a Google Place ID. - Returns dict with {opening_hours, phone_number, website} or None. - """ - key = _get_api_key() - if not key: - return None - - if not check_daily_cap(): - return None - - try: - resp = requests.get( - f'{API_BASE}/places/{place_id}', - headers={ - 'X-Goog-Api-Key': key, - 'X-Goog-FieldMask': 'regularOpeningHours,internationalPhoneNumber,websiteUri', - }, - timeout=REQUEST_TIMEOUT, - ) - - increment_call_counter() - - if resp.status_code == 429: - logger.warning("google_places: action=details id=%s result=rate_limited", place_id) - _set_daily_count_to_cap() - return None - - if resp.status_code != 200: - logger.warning("google_places: action=details id=%s result=error status=%d", place_id, resp.status_code) - return None - - data = resp.json() - result = { - 'opening_hours': None, - 'opening_hours_raw': None, - 'phone_number': None, - 'website': None, - } - - # Phone - phone = data.get('internationalPhoneNumber') - if phone: - result['phone_number'] = phone.replace(' ', '').replace('-', '') - - # Website - result['website'] = data.get('websiteUri') - - # Opening hours - hours = data.get('regularOpeningHours') - if hours: - # Try OSM-compatible format from periods - periods = hours.get('periods', []) - if periods: - osm_str = _periods_to_osm(periods) - if osm_str: - result['opening_hours'] = osm_str - - # Fallback: weekday descriptions (human-readable) - if not result['opening_hours']: - descriptions = hours.get('weekdayDescriptions') - if descriptions: - result['opening_hours_raw'] = descriptions - - logger.info("google_places: action=details id=%s result=hit hours=%s phone=%s website=%s", - place_id, - 'yes' if result['opening_hours'] or result['opening_hours_raw'] else 'no', - 'yes' if result['phone_number'] else 'no', - 'yes' if result['website'] else 'no') - return result - - except requests.exceptions.Timeout: - logger.warning("google_places: action=details id=%s result=timeout", place_id) - return None - except Exception as e: - logger.error("google_places: action=details id=%s result=error err=%s", place_id, e) - return None - - -# ── Opening hours conversion ──────────────────────────────────────────── - -def _periods_to_osm(periods): - """ - Convert Google Places periods array to OSM opening_hours string. - - Google periods: [{"open": {"day": 0-6, "hour": H, "minute": M}, - "close": {"day": 0-6, "hour": H, "minute": M}}, ...] - Where day 0 = Sunday. - - OSM format: "Mo-Fr 06:00-23:00; Sa-Su 07:00-23:00" - """ - if not periods: - return None - - # Check for 24/7: single period with no close, or open 00:00 close 00:00 next day - if len(periods) == 1: - p = periods[0] - o = p.get('open', {}) - c = p.get('close') - if c is None and o.get('hour', 0) == 0 and o.get('minute', 0) == 0: - return '24/7' - - # Build a map: day_index → "HH:MM-HH:MM" - day_hours = {} # day_index → time_range string - for p in periods: - o = p.get('open', {}) - c = p.get('close', {}) - day = o.get('day', 0) - open_time = f"{o.get('hour', 0):02d}:{o.get('minute', 0):02d}" - - if c: - close_time = f"{c.get('hour', 0):02d}:{c.get('minute', 0):02d}" - # Handle midnight closing (00:00 means end of day) - if close_time == '00:00': - close_time = '24:00' - else: - close_time = '24:00' - - time_range = f"{open_time}-{close_time}" - - # A day can have multiple periods (e.g., lunch break) - if day in day_hours: - day_hours[day] = day_hours[day] + ',' + time_range - else: - day_hours[day] = time_range - - if not day_hours: - return None - - # Check if all 7 days have same hours - unique_ranges = set(day_hours.values()) - if len(day_hours) == 7 and len(unique_ranges) == 1: - hours = unique_ranges.pop() - if hours == '00:00-24:00': - return '24/7' - return hours # implicit "every day" - - # Group consecutive days with same hours - # Reorder to OSM convention: Mo(1) Tu(2) We(3) Th(4) Fr(5) Sa(6) Su(0) - osm_day_order = [1, 2, 3, 4, 5, 6, 0] - groups = [] - current_days = [] - current_hours = None - - for day_idx in osm_day_order: - hours = day_hours.get(day_idx) - if hours == current_hours: - current_days.append(day_idx) - else: - if current_days and current_hours: - groups.append((current_days, current_hours)) - current_days = [day_idx] - current_hours = hours - - if current_days and current_hours: - groups.append((current_days, current_hours)) - - if not groups: - return None - - # Format each group - parts = [] - for days, hours in groups: - if len(days) == 1: - day_str = _DAY_ABBR[days[0]] - elif len(days) == 2: - day_str = f"{_DAY_ABBR[days[0]]},{_DAY_ABBR[days[1]]}" - else: - day_str = f"{_DAY_ABBR[days[0]]}-{_DAY_ABBR[days[-1]]}" - parts.append(f"{day_str} {hours}") - - return '; '.join(parts) diff --git a/lib/place_detail.py b/lib/place_detail.py index 46aa8b0..6f6f1ba 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -1,291 +1,17 @@ """ -Place detail proxy — local Nominatim first, Overpass API fallback, SQLite cache. -Overture Maps enrichment layer fills sparse extratags (phone, website, brand). +Wiki-index lookup for place enrichment. -Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict -matching the response shape for /api/place//. +Provides lookup_wiki_index(wikidata_id, name, country_code) — a pure read of the +local wiki_index.db, used by the /api/wiki-enrich endpoint (navi-places +HTTP-fetches wiki enrichment instead of reading the 2.1 GB DB directly). """ -import json import os import sqlite3 -import time -import requests as http_requests - -from .osm_categories import humanize_category from .utils import setup_logging logger = setup_logging('recon.place_detail') -NOMINATIM_URL = "http://localhost:8010/details.php" -OVERPASS_URL = "https://overpass-api.de/api/interpreter" -OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" -VALID_OSM_TYPES = {"N", "W", "R"} - -_db_conn = None - - -# ── SQLite cache ──────────────────────────────────────────────────────── - -def _get_db(): - """Return a module-level SQLite connection (lazy init).""" - global _db_conn - if _db_conn is not None: - return _db_conn - - db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') - os.makedirs(db_dir, exist_ok=True) - db_path = os.path.join(db_dir, 'place_cache.db') - - _db_conn = sqlite3.connect(db_path, check_same_thread=False) - _db_conn.execute("PRAGMA journal_mode=WAL") - _db_conn.execute("PRAGMA synchronous=NORMAL") - _db_conn.execute(""" - CREATE TABLE IF NOT EXISTS place_cache ( - osm_type TEXT NOT NULL, - osm_id INTEGER NOT NULL, - data TEXT NOT NULL, - source TEXT NOT NULL, - cached_at INTEGER NOT NULL, - PRIMARY KEY (osm_type, osm_id) - ) - """) - _db_conn.commit() - logger.info(f"Place cache DB ready at {db_path}") - return _db_conn - - -def cache_get(osm_type, osm_id): - """Return cached place dict or None.""" - db = _get_db() - row = db.execute( - "SELECT data FROM place_cache WHERE osm_type=? AND osm_id=?", - (osm_type, osm_id) - ).fetchone() - if row: - try: - result = json.loads(row[0]) - result['source'] = 'cache' - return result - except (json.JSONDecodeError, TypeError): - pass - return None - - -def cache_put(osm_type, osm_id, data, source): - """Store a place detail result in the cache (preserves google columns).""" - db = _get_db() - now = int(time.time()) - db.execute(""" - INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at) - VALUES (?, ?, ?, ?, ?) - ON CONFLICT(osm_type, osm_id) DO UPDATE SET - data = excluded.data, - source = excluded.source, - cached_at = excluded.cached_at - """, (osm_type, osm_id, json.dumps(data), source, now)) - db.commit() - - -# ── Overture enrichment ───────────────────────────────────────────────── - -def _enrich_with_overture(result, osm_type, osm_id): - """ - Attempt to enrich a place result with Overture Maps data. - Fills sparse extratags (phone, website, brand) without overwriting existing values. - Returns the (possibly enriched) result dict. - """ - try: - from .deployment_config import get_deployment_config - deploy_config = get_deployment_config() - features = deploy_config.get('features', {}) - if not features.get('has_overture_enrichment', False): - return result - except Exception: - return result - - try: - from .overture import find_by_osm_id, find_by_coords_and_name - except ImportError: - logger.debug("Overture module not available") - return result - - enrichment = None - match_method = None - - # Strategy 1: OSM cross-reference (exact) - enrichment = find_by_osm_id(osm_type, osm_id) - if enrichment: - match_method = 'osm_xref' - - # Strategy 2: Coordinate + name fuzzy (fallback) - if not enrichment and result.get('centroid') and result.get('name'): - centroid = result['centroid'] - if centroid.get('lat') and centroid.get('lon'): - enrichment = find_by_coords_and_name( - centroid['lat'], centroid['lon'], result['name'] - ) - if enrichment: - match_method = 'coord_name_fuzzy' - - if not enrichment: - return result - - # Fill sparse extratags (never overwrite existing non-null values) - extratags = result.get('extratags', {}) - fill_map = [ - ('phone', 'phone'), - ('website', 'website'), - ('brand', 'brand_name'), - ('brand:wikidata', 'brand_wikidata'), - ] - for osm_key, overture_key in fill_map: - if not extratags.get(osm_key) and enrichment.get(overture_key): - extratags[osm_key] = enrichment[overture_key] - result['extratags'] = extratags - - # Add source metadata - result['sources'] = { - 'primary': result.get('source', 'unknown'), - 'enrichment': 'overture', - 'overture_match_method': match_method, - 'overture_gers_id': enrichment.get('gers_id'), - 'overture_confidence': enrichment.get('confidence'), - 'overture_basic_category': enrichment.get('basic_category'), - } - - logger.debug(f"Overture enrichment for {osm_type}/{osm_id}: {match_method}") - return result - - - -# ── Google Places enrichment (tertiary, gap-fill only) ────────────── - -# Business POI classes eligible for Google enrichment -_BUSINESS_CLASSES = {'amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'} - -# Fields Google can fill -_GOOGLE_GAP_FIELDS = ('opening_hours', 'phone', 'website') - - -def _enrich_with_google(result, osm_type, osm_id): - """ - Tertiary enrichment via Google Places (New) API. - Only fires for business-type POIs when opening_hours, phone, or website - are still missing after OSM + Overture enrichment. - Fills only empty fields — never overwrites existing values. - """ - # Check feature flag - try: - from .deployment_config import get_deployment_config - deploy_config = get_deployment_config() - features = deploy_config.get('features', {}) - if not features.get('has_google_places_enrichment', False): - return result - except Exception: - return result - - # Only enrich business-type POIs - poi_class = result.get('class', '') - if poi_class not in _BUSINESS_CLASSES: - return result - - # Check if any gap fields are missing - extratags = result.get('extratags', {}) - gaps = [f for f in _GOOGLE_GAP_FIELDS if not extratags.get(f)] - if not gaps: - logger.debug(f"google_places: skip {osm_type}/{osm_id} — no gaps") - return result - - try: - from . import google_places - except ImportError: - logger.debug("google_places module not available") - return result - - # Check Google cache first - cached_pid, cached_data = google_places.cache_get_google(osm_type, osm_id) - if cached_pid and cached_data: - _apply_google_data(result, cached_data, gaps) - result.setdefault('sources', {})['google_places'] = { - 'place_id': cached_pid, - 'source': 'cache', - } - logger.debug(f"google_places: cache hit for {osm_type}/{osm_id}") - return result - - # Skip if already looked up and found nothing (cached_pid is None) - if cached_pid is not None: - return result - - # Skip new Google API calls for guest users (cached data already returned above) - from .auth import get_user_id - if not get_user_id(): - logger.debug(f"google_places: skip API call for {osm_type}/{osm_id} — guest user") - return result - - # Daily cap check - if not google_places.check_daily_cap(): - return result - - # Search for the place - name = result.get('name', '') - centroid = result.get('centroid', {}) - lat = centroid.get('lat') - lon = centroid.get('lon') - if not name or not lat or not lon: - return result - - place_id = google_places.search_place(name, lat, lon) - if not place_id: - # Cache the miss to avoid repeated lookups - google_places.cache_put_google(osm_type, osm_id, '__miss__', None) - return result - - # Get details - details = google_places.get_place_details(place_id) - if not details: - google_places.cache_put_google(osm_type, osm_id, place_id, None) - return result - - # Cache the result - google_places.cache_put_google(osm_type, osm_id, place_id, details) - - # Apply to result - _apply_google_data(result, details, gaps) - result.setdefault('sources', {})['google_places'] = { - 'place_id': place_id, - 'source': 'api', - 'daily_count': google_places.get_daily_count(), - } - - return result - - -def _apply_google_data(result, google_data, gaps): - """Apply Google Places data to fill gap fields only.""" - extratags = result.get('extratags', {}) - if 'opening_hours' in gaps: - osm_hours = google_data.get('opening_hours') - if osm_hours: - extratags['opening_hours'] = osm_hours - elif google_data.get('opening_hours_raw'): - extratags['opening_hours_raw'] = google_data['opening_hours_raw'] - if 'phone' in gaps and google_data.get('phone_number'): - extratags['phone'] = google_data['phone_number'] - if 'website' in gaps and google_data.get('website'): - extratags['website'] = google_data['website'] - result['extratags'] = extratags - - - - -# ── Wiki link rewriting ───────────────────────────────────────────────── - -# Extratag keys that may contain wiki references -_WIKI_TAGS = ('wikipedia', 'wikidata', 'wikivoyage', 'appropedia') - - # ── Wiki Index enrichment ─────────────────────────────────────────────── @@ -369,598 +95,3 @@ def lookup_wiki_index(wikidata_id=None, name=None, country_code=None): except Exception as e: logger.debug(f"wiki_index lookup error: {e}") return None - - -def _enrich_with_wiki_index(result): - try: - from .deployment_config import get_deployment_config - deploy_config = get_deployment_config() - features = deploy_config.get("features", {}) - if not features.get("has_kiwix_wiki", False): - return result - except Exception: - return result - - db = _get_wiki_index_db() - if not db: - return result - - try: - cur = db.cursor() - row = None - - extratags = result.get("extratags", {}) - wikidata_id = result.get("wikidata_id") or extratags.get("wikidata") - if wikidata_id: - if isinstance(wikidata_id, str) and wikidata_id.startswith("http"): - wikidata_id = wikidata_id.split("/")[-1] - cur.execute( - "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE wikidata_id = ?", - (wikidata_id,) - ) - row = cur.fetchone() - - if not row: - name = result.get("name") - address = result.get("address") or {} - country_code = address.get("country_code") or result.get("country_code") - if name and country_code: - cur.execute( - "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE place_name = ? AND country_code = ? LIMIT 1", - (name, country_code.lower()) - ) - row = cur.fetchone() - - if row: - if row["summary"]: - result["wiki_summary"] = row["summary"] - if row["wiki_population"]: - try: - result["wiki_population"] = int(row["wiki_population"]) - except (ValueError, TypeError): - result["wiki_population"] = row["wiki_population"] - if row["wikipedia_title"]: - title = row["wikipedia_title"].replace(" ", "_") - result["wiki_url"] = f"https://en.wikipedia.org/wiki/{title}" - if row["wikivoyage_title"]: - title = row["wikivoyage_title"].replace(" ", "_") - result["wikivoyage_url"] = f"https://en.wikivoyage.org/wiki/{title}" - logger.debug(f"Wiki index enrichment hit for {result.get(name)}") - - except Exception as e: - logger.debug(f"Wiki index enrichment error: {e}") - - return result - -def _enrich_wiki_links(result): - """ - Rewrite wiki-related extratags to local Kiwix URLs where available. - Falls back to public URLs. Only runs when has_wiki_rewriting is enabled. - Returns the (possibly enriched) result dict. - """ - try: - from .deployment_config import get_deployment_config - deploy_config = get_deployment_config() - features = deploy_config.get('features', {}) - if not features.get('has_wiki_rewriting', False): - return result - except Exception: - return result - - try: - from .wiki_rewrite import rewrite_wiki_link - except ImportError: - logger.debug("wiki_rewrite module not available") - return result - - extratags = result.get('extratags', {}) - if not extratags: - return result - - rewrites = {} - for tag in _WIKI_TAGS: - value = extratags.get(tag) - if not value: - continue - url, status = rewrite_wiki_link(tag, value) - if status != 'original': - extratags[tag] = url - rewrites[tag] = status - - if rewrites: - result['extratags'] = extratags - result.setdefault('sources', {})['wiki_rewrites'] = rewrites - logger.debug(f"Wiki rewrites for {result.get('osm_type')}/{result.get('osm_id')}: {rewrites}") - - return result - -# ── Nominatim parsing ─────────────────────────────────────────────────── - -# Nominatim address array uses rank_address to indicate what each entry is. -# We map rank ranges to our flat address fields. -RANK_TO_FIELD = { - 4: 'country', - 5: 'postcode', - 6: 'state', # rank 6 = county in US, but we try name matching - 8: 'state', - 12: 'county', - 16: 'city', - 20: 'neighbourhood', - 22: 'neighbourhood', - 26: 'road', - 28: 'house_number', -} - - -def _parse_nominatim_address(address_array, country_code=None): - """Parse Nominatim's ranked address array into a flat address dict.""" - addr = { - 'house_number': None, - 'road': None, - 'neighbourhood': None, - 'city': None, - 'county': None, - 'state': None, - 'postcode': None, - 'country': None, - 'country_code': country_code, - } - - if not address_array: - return addr - - for entry in address_array: - if not entry.get('isaddress', False): - continue - - name = entry.get('localname', '') - rank = entry.get('rank_address', 0) - etype = entry.get('type', '') - eclass = entry.get('class', '') - - # Explicit type-based assignments (more reliable than rank alone) - if etype == 'country' and eclass == 'place': - addr['country'] = name - elif etype == 'state' or (eclass == 'boundary' and etype == 'administrative' and rank == 8): - if not addr['state']: - addr['state'] = name - elif etype == 'county' or (eclass == 'boundary' and etype == 'administrative' and rank in (10, 12)): - if not addr['county']: - addr['county'] = name - elif etype in ('city', 'town', 'village', 'hamlet') and eclass == 'place': - if not addr['city']: - addr['city'] = name - elif eclass == 'boundary' and etype == 'administrative' and rank == 16: - # City-level admin boundary (common in US) - if not addr['city']: - addr['city'] = name - elif etype == 'postcode': - addr['postcode'] = name - elif eclass == 'highway' or rank == 26: - if not addr['road']: - addr['road'] = name - elif etype == 'house_number' or rank == 28: - addr['house_number'] = name - elif rank in (20, 22) and not addr['neighbourhood']: - addr['neighbourhood'] = name - - # Remove county from output (not in spec) - addr.pop('county', None) - - return addr - - -def _parse_nominatim(data): - """Parse a Nominatim /details response into our canonical shape.""" - osm_type = data.get('osm_type', '') - osm_id = data.get('osm_id', 0) - osm_class = data.get('category', '') - osm_type_tag = data.get('type', '') - - # Centroid - centroid_geom = data.get('centroid', {}) - coords = centroid_geom.get('coordinates', [0, 0]) - centroid = {'lat': coords[1], 'lon': coords[0]} if len(coords) >= 2 else {'lat': 0, 'lon': 0} - - # Names - names = data.get('names', {}) - display_name = data.get('localname', '') or names.get('name', '') - - # Address - address = _parse_nominatim_address( - data.get('address', []), - country_code=data.get('country_code') - ) - - # Use calculated_postcode if address parse didn't find one - if not address.get('postcode') and data.get('calculated_postcode'): - address['postcode'] = data['calculated_postcode'] - - # Extratags - raw_extra = data.get('extratags', {}) - extratags = { - 'opening_hours': raw_extra.get('opening_hours'), - 'phone': raw_extra.get('phone') or raw_extra.get('contact:phone'), - 'website': raw_extra.get('website') or raw_extra.get('contact:website') or raw_extra.get('url'), - 'email': raw_extra.get('email') or raw_extra.get('contact:email'), - 'wikipedia': raw_extra.get('wikipedia'), - 'wikidata': raw_extra.get('wikidata'), - 'cuisine': raw_extra.get('cuisine'), - 'operator': raw_extra.get('operator'), - 'wheelchair': raw_extra.get('wheelchair'), - 'fee': raw_extra.get('fee'), - 'takeaway': raw_extra.get('takeaway'), - } - - # Category: use extratags.place for boundaries (e.g. "city"), else class/type - effective_class = osm_class - effective_type = osm_type_tag - if osm_class == 'boundary' and osm_type_tag == 'administrative': - place_tag = raw_extra.get('place') or raw_extra.get('linked_place') - if place_tag: - effective_class = 'place' - effective_type = place_tag - - category = humanize_category(effective_class, effective_type) - - # Filter names: only include extra name tags, not the bare "name" - extra_names = {k: v for k, v in names.items() if k != 'name'} if names else {} - - # Boundary geometry (polygon/multipolygon from Nominatim) - boundary = None - geom = data.get('geometry') - if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): - boundary = geom - - return { - 'osm_type': osm_type, - 'osm_id': osm_id, - 'name': display_name, - 'category': category, - 'class': osm_class, - 'type': osm_type_tag, - 'address': address, - 'centroid': centroid, - 'extratags': extratags, - 'names': extra_names if extra_names else None, - 'source': 'nominatim_local', - 'boundary': boundary, - } - - -# ── Overpass parsing ──────────────────────────────────────────────────── - -OVERPASS_TYPE_MAP = {'N': 'node', 'W': 'way', 'R': 'relation'} - - -def _build_overpass_query(osm_type, osm_id): - """Build an Overpass QL query for a single element.""" - elem = OVERPASS_TYPE_MAP.get(osm_type) - if not elem: - return None - return f"[out:json][timeout:10];{elem}({osm_id});out tags center;" - - -def _parse_overpass(data, osm_type, osm_id): - """Parse an Overpass API response into our canonical shape.""" - elements = data.get('elements', []) - if not elements: - return None - - elem = elements[0] - tags = elem.get('tags', {}) - - # Centroid: Overpass returns lat/lon for nodes, center for ways/relations - lat = elem.get('lat') or (elem.get('center', {}).get('lat')) - lon = elem.get('lon') or (elem.get('center', {}).get('lon')) - centroid = {'lat': lat, 'lon': lon} if lat and lon else {'lat': 0, 'lon': 0} - - # Determine class/type from tags — Overpass doesn't have a canonical class field - # Use the first recognized class tag - osm_class = '' - osm_type_tag = '' - for cls in ('amenity', 'shop', 'leisure', 'tourism', 'natural', 'highway', - 'boundary', 'place', 'building', 'waterway', 'landuse', 'historic'): - if cls in tags: - osm_class = cls - osm_type_tag = tags[cls] - break - - category = humanize_category(osm_class, osm_type_tag) - - # Address from addr:* tags - address = { - 'house_number': tags.get('addr:housenumber'), - 'road': tags.get('addr:street'), - 'neighbourhood': tags.get('addr:suburb') or tags.get('addr:neighbourhood'), - 'city': tags.get('addr:city'), - 'state': tags.get('addr:state'), - 'postcode': tags.get('addr:postcode'), - 'country': tags.get('addr:country'), - 'country_code': tags.get('addr:country_code', - tags.get('addr:country', '')).lower()[:2] or None, - } - - # Extratags - extratags = { - 'opening_hours': tags.get('opening_hours'), - 'phone': tags.get('phone') or tags.get('contact:phone'), - 'website': tags.get('website') or tags.get('contact:website') or tags.get('url'), - 'email': tags.get('email') or tags.get('contact:email'), - 'wikipedia': tags.get('wikipedia'), - 'wikidata': tags.get('wikidata'), - 'cuisine': tags.get('cuisine'), - 'operator': tags.get('operator'), - 'wheelchair': tags.get('wheelchair'), - 'fee': tags.get('fee'), - 'takeaway': tags.get('takeaway'), - } - - # Names - name = tags.get('name', '') - extra_names = {} - for k, v in tags.items(): - if k.startswith('name:') or k in ('alt_name', 'old_name', 'short_name', 'official_name'): - extra_names[k] = v - - return { - 'osm_type': osm_type, - 'osm_id': osm_id, - 'name': name, - 'category': category, - 'class': osm_class, - 'type': osm_type_tag, - 'address': address, - 'centroid': centroid, - 'extratags': extratags, - 'names': extra_names if extra_names else None, - 'source': 'overpass', - } - - -# ── Public API ────────────────────────────────────────────────────────── - -def get_place_detail(osm_type, osm_id): - """ - Fetch place details for an OSM element. - - Returns (dict, status_code): - - (data, 200) on success - - (error_dict, 404) if not found in any source - - (error_dict, 502) if both sources error - """ - osm_type = osm_type.upper() - if osm_type not in VALID_OSM_TYPES: - return {'error': f'Invalid osm_type: {osm_type}. Must be N, W, or R.'}, 400 - - if osm_id <= 0: - return {'error': 'osm_id must be a positive integer'}, 400 - - # 1. Check cache - cached = cache_get(osm_type, osm_id) - if cached: - logger.debug(f"Cache hit: {osm_type}/{osm_id}") - return cached, 200 - - # 2. Try local Nominatim first - nominatim_result = None - nominatim_error = None - try: - resp = http_requests.get(NOMINATIM_URL, params={ - 'osmtype': osm_type, - 'osmid': osm_id, - 'format': 'json', - 'addressdetails': 1, - 'hierarchy': 0, - 'keywords': 0, - 'polygon_geojson': 1, - }, timeout=5) - - if resp.status_code == 200: - data = resp.json() - # Nominatim returns a result even for IDs not in its DB, - # but they'll have empty/minimal data. Check for osm_id match. - if data.get('osm_id') == osm_id: - nominatim_result = _parse_nominatim(data) - logger.debug(f"Nominatim hit: {osm_type}/{osm_id}") - except Exception as e: - nominatim_error = str(e) - logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}") - - if nominatim_result: - nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) - nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) - nominatim_result = _enrich_wiki_links(nominatim_result) - nominatim_result = _enrich_with_wiki_index(nominatim_result) - cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') - return nominatim_result, 200 - - # 3. Fallback to Overpass - overpass_result = None - overpass_error = None - try: - query = _build_overpass_query(osm_type, osm_id) - if query: - resp = http_requests.post( - OVERPASS_URL, - data={'data': query}, - headers={'User-Agent': OVERPASS_UA}, - timeout=10, - ) - if resp.status_code == 200: - data = resp.json() - overpass_result = _parse_overpass(data, osm_type, osm_id) - if overpass_result: - logger.debug(f"Overpass hit: {osm_type}/{osm_id}") - elif resp.status_code == 429: - overpass_error = "Overpass rate limited" - logger.warning(f"Overpass 429 for {osm_type}/{osm_id}") - else: - overpass_error = f"Overpass HTTP {resp.status_code}" - except Exception as e: - overpass_error = str(e) - logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}") - - if overpass_result: - overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) - overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) - overpass_result = _enrich_wiki_links(overpass_result) - overpass_result = _enrich_with_wiki_index(overpass_result) - cache_put(osm_type, osm_id, overpass_result, 'overpass') - return overpass_result, 200 - - # 4. Both failed - if nominatim_error and overpass_error: - logger.error(f"Both sources failed for {osm_type}/{osm_id}: " - f"Nominatim={nominatim_error}, Overpass={overpass_error}") - return {'error': 'Both data sources unavailable'}, 502 - - # Not found in either source (no errors, just empty results) - return {'error': f'{osm_type}/{osm_id} not found'}, 404 - - -# ── Wikidata lookup ───────────────────────────────────────────────────── - -WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php" - -def get_place_by_wikidata(wikidata_id): - """ - Fetch place details from Wikidata entity. - - Returns (dict, status_code): - - (data, 200) on success - - (error_dict, 404) if entity not found - - (error_dict, 400) if invalid ID format - - (error_dict, 502) on API error - """ - # Validate wikidata ID format (Q followed by digits) - wikidata_id = wikidata_id.upper().strip() - if not wikidata_id.startswith("Q") or not wikidata_id[1:].isdigit(): - return {"error": f"Invalid wikidata ID: {wikidata_id}. Must be Q followed by digits."}, 400 - - try: - resp = http_requests.get(WIKIDATA_API_URL, params={ - "action": "wbgetentities", - "ids": wikidata_id, - "format": "json", - "languages": "en", - "props": "labels|descriptions|claims|sitelinks", - }, timeout=10, headers={"User-Agent": "Navi/1.0 (forge.echo6.co/matt/recon)"}) - - if resp.status_code != 200: - logger.warning(f"Wikidata API error for {wikidata_id}: HTTP {resp.status_code}") - return {"error": "Wikidata API error"}, 502 - - data = resp.json() - entities = data.get("entities", {}) - entity = entities.get(wikidata_id) - - if not entity or entity.get("missing"): - return {"error": f"Wikidata entity {wikidata_id} not found"}, 404 - - # Extract basic info - labels = entity.get("labels", {}) - descriptions = entity.get("descriptions", {}) - claims = entity.get("claims", {}) - - name = labels.get("en", {}).get("value", wikidata_id) - description = descriptions.get("en", {}).get("value", "") - - # Extract coordinates from P625 (coordinate location) - lat, lon = None, None - if "P625" in claims: - coord_claim = claims["P625"] - if coord_claim and coord_claim[0].get("mainsnak", {}).get("datavalue"): - coord_val = coord_claim[0]["mainsnak"]["datavalue"]["value"] - lat = coord_val.get("latitude") - lon = coord_val.get("longitude") - - # Extract population from P1082 - population = None - if "P1082" in claims: - pop_claims = claims["P1082"] - if pop_claims: - # Get the most recent population value - for claim in pop_claims: - if claim.get("mainsnak", {}).get("datavalue"): - try: - population = int(claim["mainsnak"]["datavalue"]["value"]["amount"].lstrip("+")) - break - except (KeyError, ValueError): - pass - - # Extract country from P17 - country = None - if "P17" in claims: - country_claims = claims["P17"] - if country_claims and country_claims[0].get("mainsnak", {}).get("datavalue"): - country_id = country_claims[0]["mainsnak"]["datavalue"]["value"]["id"] - # Could resolve this to a name, but for now just store the ID - - # Extract instance of (P31) for type classification - instance_of = [] - if "P31" in claims: - for claim in claims["P31"]: - if claim.get("mainsnak", {}).get("datavalue"): - instance_of.append(claim["mainsnak"]["datavalue"]["value"]["id"]) - - # Extract OSM relation ID if available (P402) - osm_relation_id = None - if "P402" in claims: - osm_claims = claims["P402"] - if osm_claims and osm_claims[0].get("mainsnak", {}).get("datavalue"): - osm_relation_id = osm_claims[0]["mainsnak"]["datavalue"]["value"] - - # Extract Wikipedia sitelink - sitelinks = entity.get("sitelinks", {}) - wikipedia = None - if "enwiki" in sitelinks: - wiki_title = sitelinks["enwiki"].get("title", "") - if wiki_title: - wikipedia = f"en:{wiki_title}" - - result = { - "wikidata_id": wikidata_id, - "name": name, - "description": description, - "centroid": {"lat": lat, "lon": lon} if lat and lon else None, - "population": population, - "instance_of": instance_of, - "osm_relation_id": osm_relation_id, - "source": "wikidata", - "extratags": { - "wikidata": wikidata_id, - }, - } - - if wikipedia: - result["extratags"]["wikipedia"] = wikipedia - - # Fetch boundary polygon from Nominatim if we have an OSM relation ID - boundary = None - if osm_relation_id: - try: - nom_resp = http_requests.get(NOMINATIM_URL, params={ - 'osmtype': 'R', - 'osmid': osm_relation_id, - 'format': 'json', - 'polygon_geojson': 1, - }, timeout=5) - if nom_resp.status_code == 200: - nom_data = nom_resp.json() - geom = nom_data.get('geometry') - if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): - boundary = geom - logger.debug(f"Wikidata boundary hit for {wikidata_id}") - except Exception as e: - logger.debug(f"Wikidata boundary fetch failed: {e}") - - result["boundary"] = boundary - - result = _enrich_with_wiki_index(result) - logger.debug(f"Wikidata hit: {wikidata_id} -> {name}") - return result, 200 - - except Exception as e: - logger.warning(f"Wikidata error for {wikidata_id}: {e}") - return {"error": "Wikidata lookup failed"}, 502 From 79d7b2b343478a6bac025e9ffd63051bc43116e2 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 10:29:39 -0600 Subject: [PATCH 66/72] cleanup: remove orphaned lib/address_book.py (post-cleanup-4 dead code) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After cleanup #4 deleted lib/geocode.py, the only remaining address_book references in recon were lib/address_book_test.py (test of the dying SUT) and a dead `from . import address_book` import at the top of lib/netsyms_api.py (never referenced in the body). This PR removes all three. - DELETE lib/address_book.py + lib/address_book_test.py - netsyms_api.py: drop the dead `from . import address_book` import config/address_book.yaml stays — vendored data, navi-contacts (:8423) consumes its own copy via NAVI_ADDRESS_BOOK_YAML. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/address_book.py | 160 --------------------------------------- lib/address_book_test.py | 91 ---------------------- lib/netsyms_api.py | 1 - 3 files changed, 252 deletions(-) delete mode 100644 lib/address_book.py delete mode 100644 lib/address_book_test.py diff --git a/lib/address_book.py b/lib/address_book.py deleted file mode 100644 index f9827f6..0000000 --- a/lib/address_book.py +++ /dev/null @@ -1,160 +0,0 @@ -""" -RECON Address Book — YAML-backed saved-location lookup. - -Provides named locations (home, work, etc.) that short-circuit Photon -geocoding when an exact alias match is found. - -Config: /opt/recon/config/address_book.yaml -""" - -import os -import re -import threading - -import yaml - -from .utils import setup_logging - -logger = setup_logging('recon.address_book') - -_CONFIG_PATH = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - 'config', 'address_book.yaml', -) - -_lock = threading.Lock() -_entries: list[dict] = [] -_mtime: float = 0.0 - - -def _reload_if_changed(): - """Reload the YAML file if its mtime has changed.""" - global _entries, _mtime - try: - st = os.stat(_CONFIG_PATH) - except FileNotFoundError: - logger.warning("Address book not found: %s", _CONFIG_PATH) - _entries = [] - _mtime = 0.0 - return - - if st.st_mtime == _mtime: - return - - with _lock: - # Double-check after acquiring lock - try: - st = os.stat(_CONFIG_PATH) - except FileNotFoundError: - _entries = [] - _mtime = 0.0 - return - if st.st_mtime == _mtime: - return - - with open(_CONFIG_PATH, 'r') as f: - data = yaml.safe_load(f) or {} - - raw = data.get('entries', []) - loaded = [] - for entry in raw: - # Normalise aliases to lowercase for matching - aliases = [a.lower() for a in entry.get('aliases', [])] - loaded.append({ - 'id': entry.get('id', ''), - 'name': entry.get('name', ''), - 'aliases': aliases, - 'address': entry.get('address', ''), - 'lat': entry.get('lat'), - 'lon': entry.get('lon'), - 'tags': entry.get('tags', []), - }) - _entries = loaded - _mtime = st.st_mtime - logger.info("Address book loaded: %d entries from %s", len(_entries), _CONFIG_PATH) - - -def load(): - """Ensure the address book is loaded (and refreshed if the file changed).""" - _reload_if_changed() - return _entries - - -def _normalize(text: str) -> str: - """Lowercase, strip, remove commas, collapse whitespace.""" - t = text.strip().lower() - t = t.replace(',', ' ') - return ' '.join(t.split()) - - -def lookup(query: str): - """ - Look up a query against name and aliases. - - Returns dict with the matching entry plus a 'confidence' field: - - "exact": full name/alias match, OR query starts with alias + word boundary - - "partial": alias starts with query + word boundary, or alias appears - as a contiguous token sequence inside the query - - None if no match - - Matching order (first exact wins, else first partial): - 1. normalized(query) == normalized(name or alias) → exact - 2. normalized(query) starts with normalized(alias) + " " → exact - 3. normalized(alias) starts with normalized(query) + " " → partial - 4. normalized(alias) is a contiguous token sub-sequence → partial - """ - _reload_if_changed() - q = _normalize(query) - if not q: - return None - - first_exact = None - first_partial = None - - for entry in _entries: - norm_name = _normalize(entry['name']) - check_aliases = [_normalize(a) for a in entry.get('aliases', [])] - all_forms = [norm_name] + check_aliases - - for form in all_forms: - if not form: - continue - - # Rule 1: exact match - if q == form: - return {**entry, 'confidence': 'exact'} - - # Rule 2: query starts with alias + word boundary - if q.startswith(form + ' '): - if first_exact is None: - first_exact = entry - continue - - # Rule 3: alias starts with query (user still typing) - if form.startswith(q) and len(q) < len(form): - if first_partial is None: - first_partial = entry - continue - - # Rule 4: alias is contiguous token sub-sequence in query - # Build regex: token1\s+token2\s+...tokenN - tokens = form.split() - if len(tokens) >= 1: - pattern = r'(?:^|\s)' + r'\s+'.join(re.escape(t) for t in tokens) + r'(?:\s|$)' - if re.search(pattern, q): - if first_partial is None: - first_partial = entry - - if first_exact is not None: - return {**first_exact, 'confidence': 'exact'} - - if first_partial is not None: - return {**first_partial, 'confidence': 'partial'} - - return None - - -def list_all(): - """Return all address book entries.""" - _reload_if_changed() - return list(_entries) diff --git a/lib/address_book_test.py b/lib/address_book_test.py deleted file mode 100644 index 75905f0..0000000 --- a/lib/address_book_test.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for RECON address book module.""" -import sys -import os - -# Add project root to path -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from lib import address_book - -TESTS = [ - # ── Existing tests ── - ("lookup('home') → exact", - lambda: address_book.lookup("home"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('Home') → exact (case-insensitive)", - lambda: address_book.lookup("Home"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('214 north st') → exact via alias", - lambda: address_book.lookup("214 north st"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('214 North Street') → exact via alias", - lambda: address_book.lookup("214 North Street"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('nonexistent place') → None", - lambda: address_book.lookup("nonexistent place"), - lambda r: r is None), - - ("list_all() → 1 entry", - lambda: address_book.list_all(), - lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'), - - # ── New prefix+boundary tests ── - ("lookup('214 north st filer') → exact (query starts with alias)", - lambda: address_book.lookup("214 north st filer"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('214 North St Filer ID') → exact (case + trailing state)", - lambda: address_book.lookup("214 North St Filer ID"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('214 north st, filer, id') → exact (commas stripped)", - lambda: address_book.lookup("214 north st, filer, id"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('home today') → exact (short alias + trailing text)", - lambda: address_book.lookup("home today"), - lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), - - ("lookup('214') → partial (query is prefix of alias)", - lambda: address_book.lookup("214"), - lambda r: r is not None and r['confidence'] == 'partial'), - - ("lookup('214 n') → partial (partial prefix of alias)", - lambda: address_book.lookup("214 n"), - lambda r: r is not None and r['confidence'] == 'partial'), - - ("lookup('completely unrelated query') → None", - lambda: address_book.lookup("completely unrelated query"), - lambda r: r is None), - - ("lookup('214 north streets of filer') → None (no word boundary after st)", - lambda: address_book.lookup("214 north streets of filer"), - lambda r: r is None), -] - -passed = 0 -failed = 0 -for name, fn, check in TESTS: - try: - result = fn() - ok = check(result) - except Exception as e: - ok = False - result = f"EXCEPTION: {e}" - - status = "PASS" if ok else "FAIL" - if ok: - passed += 1 - else: - failed += 1 - print(f" [{status}] {name}") - if not ok: - print(f" got: {result}") - -print(f"\n{passed} passed, {failed} failed") -sys.exit(0 if failed == 0 else 1) diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 2caf47c..dbae24e 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -8,7 +8,6 @@ GET /api/netsyms/health from flask import Blueprint, request, jsonify from . import netsyms -from . import address_book from .utils import setup_logging logger = setup_logging('recon.netsyms_api') From aa6e972260d89a7c03c65aa6fd291013123f96dd Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 10:35:15 -0600 Subject: [PATCH 67/72] cleanup: remove orphaned lib/overture.py + lib/osm_categories.py (post-#27 dead code) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both modules were flagged in cleanup #27 (PR #16) as fully orphaned once the place_detail orchestrator cluster was deleted; Matt confirmed scope in chat. - lib/overture.py (170L): only consumer was place_detail._enrich_with_overture (deleted in #27). - lib/osm_categories.py (143L): humanize_category's only callers were place_detail._parse_nominatim / _parse_overpass (both deleted in #27). Re-probed against master 79d7b2b: zero import/usage references anywhere outside the modules themselves, zero template/JS refs, no test files. compileall lib/ passes. Note: scripts/overture_import.py (the Overture-Maps→PostGIS ETL script) is independent — imports nothing from lib/ — and is left untouched. After this PR the `overture` PostGIS DB it populates has no remaining recon reader; that's a data-ops follow-up, not code touched here. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/osm_categories.py | 143 ----------------------------------- lib/overture.py | 170 ------------------------------------------ 2 files changed, 313 deletions(-) delete mode 100644 lib/osm_categories.py delete mode 100644 lib/overture.py diff --git a/lib/osm_categories.py b/lib/osm_categories.py deleted file mode 100644 index dd5217c..0000000 --- a/lib/osm_categories.py +++ /dev/null @@ -1,143 +0,0 @@ -""" -Human-readable category names for OSM class/type pairs. - -Used by the place detail proxy to turn ("amenity", "cafe") into "Coffee shop". -Covers ~50 common categories; unmapped pairs fall back to title-cased class:type. -""" - -# Exact (class, type) → label -CATEGORY_MAP = { - # Amenity - ("amenity", "cafe"): "Coffee shop", - ("amenity", "restaurant"): "Restaurant", - ("amenity", "fast_food"): "Fast food restaurant", - ("amenity", "bar"): "Bar", - ("amenity", "pub"): "Pub", - ("amenity", "biergarten"): "Beer garden", - ("amenity", "ice_cream"): "Ice cream shop", - ("amenity", "fuel"): "Gas station", - ("amenity", "charging_station"): "EV charging station", - ("amenity", "parking"): "Parking", - ("amenity", "bank"): "Bank", - ("amenity", "atm"): "ATM", - ("amenity", "pharmacy"): "Pharmacy", - ("amenity", "hospital"): "Hospital", - ("amenity", "clinic"): "Clinic", - ("amenity", "dentist"): "Dentist", - ("amenity", "doctors"): "Doctor's office", - ("amenity", "veterinary"): "Veterinarian", - ("amenity", "school"): "School", - ("amenity", "university"): "University", - ("amenity", "college"): "College", - ("amenity", "library"): "Library", - ("amenity", "post_office"): "Post office", - ("amenity", "fire_station"): "Fire station", - ("amenity", "police"): "Police station", - ("amenity", "townhall"): "Town hall", - ("amenity", "place_of_worship"): "Place of worship", - ("amenity", "theatre"): "Theatre", - ("amenity", "cinema"): "Cinema", - ("amenity", "community_centre"): "Community center", - ("amenity", "toilets"): "Restrooms", - ("amenity", "drinking_water"): "Drinking water", - ("amenity", "shelter"): "Shelter", - ("amenity", "camping"): "Campground", - # Shop - ("shop", "supermarket"): "Supermarket", - ("shop", "convenience"): "Convenience store", - ("shop", "hardware"): "Hardware store", - ("shop", "clothes"): "Clothing store", - ("shop", "car_repair"): "Auto repair", - ("shop", "car"): "Car dealership", - ("shop", "bakery"): "Bakery", - ("shop", "butcher"): "Butcher", - # Leisure - ("leisure", "park"): "Park", - ("leisure", "playground"): "Playground", - ("leisure", "sports_centre"): "Sports center", - ("leisure", "swimming_pool"): "Swimming pool", - ("leisure", "golf_course"): "Golf course", - ("leisure", "nature_reserve"): "Nature reserve", - ("leisure", "campsite"): "Campsite", - # Tourism - ("tourism", "hotel"): "Hotel", - ("tourism", "motel"): "Motel", - ("tourism", "guest_house"): "Guest house", - ("tourism", "hostel"): "Hostel", - ("tourism", "camp_site"): "Campsite", - ("tourism", "viewpoint"): "Viewpoint", - ("tourism", "museum"): "Museum", - ("tourism", "information"): "Information", - ("tourism", "attraction"): "Tourist attraction", - ("tourism", "picnic_site"): "Picnic site", - # Natural - ("natural", "peak"): "Peak", - ("natural", "spring"): "Spring", - ("natural", "hot_spring"): "Hot spring", - ("natural", "lake"): "Lake", - ("natural", "water"): "Water body", - ("natural", "cliff"): "Cliff", - ("natural", "cave_entrance"): "Cave", - # Highway - ("highway", "bus_stop"): "Bus stop", - ("highway", "rest_area"): "Rest area", - # Boundary - ("boundary", "administrative"): "Administrative boundary", - ("boundary", "protected_area"): "Protected area", - ("boundary", "national_park"): "National park", - # Place - ("place", "city"): "City", - ("place", "town"): "Town", - ("place", "village"): "Village", - ("place", "hamlet"): "Hamlet", - ("place", "suburb"): "Suburb", - ("place", "neighbourhood"): "Neighborhood", - # Building - ("building", "yes"): "Building", - # Waterway - ("waterway", "river"): "River", - ("waterway", "stream"): "Stream", - ("waterway", "waterfall"): "Waterfall", - # Landuse - ("landuse", "cemetery"): "Cemetery", - ("landuse", "forest"): "Forest", - # Historic - ("historic", "monument"): "Monument", - ("historic", "memorial"): "Memorial", - ("historic", "ruins"): "Ruins", -} - -# Class-level wildcard fallbacks (when exact type isn't mapped) -CLASS_FALLBACKS = { - "shop": "Shop", - "amenity": "Amenity", - "leisure": "Leisure", - "tourism": "Tourism", - "natural": "Natural feature", - "historic": "Historic site", -} - - -def humanize_category(osm_class, osm_type): - """Return a human-readable category string for an OSM class/type pair.""" - if not osm_class or not osm_type: - return "Place" - - osm_class = osm_class.lower() - osm_type = osm_type.lower() - - # Exact match - label = CATEGORY_MAP.get((osm_class, osm_type)) - if label: - return label - - # Class-level wildcard with formatted type - prefix = CLASS_FALLBACKS.get(osm_class) - if prefix: - nice_type = osm_type.replace("_", " ").title() - return f"{prefix}: {nice_type}" if prefix != nice_type else prefix - - # Generic fallback - nice_class = osm_class.replace("_", " ").title() - nice_type = osm_type.replace("_", " ").title() - return f"{nice_class}: {nice_type}" diff --git a/lib/overture.py b/lib/overture.py deleted file mode 100644 index fcbdd18..0000000 --- a/lib/overture.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Overture Maps enrichment layer. - -Provides lookup functions against the local PostgreSQL Overture Places database. -Two strategies: - 1. find_by_osm_id — exact match via OSM cross-reference index - 2. find_by_coords_and_name — spatial + fuzzy name fallback - -Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, -functions return None gracefully (feature degrades, doesn't crash). -""" -import json -import os - -import psycopg2 -import psycopg2.pool - -from .utils import setup_logging - -logger = setup_logging('recon.overture') - -_pool = None -_pool_failed = False - -# Map full OSM type names to single-letter codes used in Overture sources -OSM_TYPE_MAP = { - 'N': 'n', 'W': 'w', 'R': 'r', - 'node': 'n', 'way': 'w', 'relation': 'r', - 'n': 'n', 'w': 'w', 'r': 'r', -} - - -def _get_pool(): - """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" - global _pool, _pool_failed - if _pool is not None: - return _pool - if _pool_failed: - return None - - try: - _pool = psycopg2.pool.SimpleConnectionPool( - minconn=1, - maxconn=3, - host=os.environ.get('OVERTURE_DB_HOST', 'localhost'), - port=int(os.environ.get('OVERTURE_DB_PORT', '5432')), - dbname=os.environ.get('OVERTURE_DB_NAME', 'overture'), - user=os.environ.get('OVERTURE_DB_USER', 'overture'), - password=os.environ.get('OVERTURE_DB_PASSWORD', ''), - connect_timeout=5, - ) - logger.info("Overture PostgreSQL connection pool initialized") - return _pool - except Exception as e: - _pool_failed = True - logger.warning(f"Overture PostgreSQL unavailable, enrichment disabled: {e}") - return None - - -def _query(sql, params): - """Execute a query and return the first row as a dict, or None.""" - pool = _get_pool() - if pool is None: - return None - - conn = None - try: - conn = pool.getconn() - with conn.cursor() as cur: - cur.execute(sql, params) - row = cur.fetchone() - if row is None: - return None - cols = [desc[0] for desc in cur.description] - return dict(zip(cols, row)) - except Exception as e: - logger.warning(f"Overture query error: {e}") - if conn: - try: - conn.rollback() - except Exception: - pass - return None - finally: - if conn: - try: - pool.putconn(conn) - except Exception: - pass - - -def _format_result(row, match_method): - """Convert a database row dict to the enrichment result shape.""" - if not row: - return None - - socials = row.get('socials') - if isinstance(socials, str): - try: - socials = json.loads(socials) - except (json.JSONDecodeError, TypeError): - socials = None - - return { - 'phone': row.get('phone'), - 'website': row.get('website'), - 'socials': socials, - 'brand_name': row.get('brand_name'), - 'brand_wikidata': row.get('brand_wikidata'), - 'basic_category': row.get('basic_category'), - 'confidence': row.get('confidence'), - 'gers_id': row.get('id'), - 'match_method': match_method, - } - - -def find_by_osm_id(osm_type, osm_id): - """ - Look up an Overture place by its OSM cross-reference. - - Args: - osm_type: OSM type — 'N', 'W', 'R', 'node', 'way', 'relation', or single letter - osm_id: OSM numeric ID - - Returns: - Enrichment dict or None - """ - type_letter = OSM_TYPE_MAP.get(osm_type) - if not type_letter: - return None - - row = _query( - """SELECT id, name, basic_category, confidence, - phone, website, socials, brand_name, brand_wikidata - FROM places - WHERE osm_type = %s AND osm_id = %s - LIMIT 1""", - (type_letter, int(osm_id)) - ) - return _format_result(row, 'osm_xref') - - -def find_by_coords_and_name(lat, lon, name, radius_m=100): - """ - Look up an Overture place by spatial proximity + fuzzy name match. - - Args: - lat: Latitude - lon: Longitude - name: Place name to fuzzy-match - radius_m: Search radius in meters (default 100) - - Returns: - Enrichment dict or None - """ - if not name or not lat or not lon: - return None - - row = _query( - """SELECT id, name, basic_category, confidence, - phone, website, socials, brand_name, brand_wikidata, - similarity(name, %s) AS sim - FROM places - WHERE ST_DWithin(geometry::geography, ST_MakePoint(%s, %s)::geography, %s) - AND similarity(name, %s) > 0.4 - ORDER BY sim DESC, ST_Distance(geometry::geography, ST_MakePoint(%s, %s)::geography) ASC - LIMIT 1""", - (name, lon, lat, radius_m, name, lon, lat) - ) - return _format_result(row, 'coord_name_fuzzy') From 879df84b7a80296d62a95a519adbfa689b6b356d Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 13:34:06 -0600 Subject: [PATCH 68/72] decouple: remove /api/auth/whoami handler (migrated to navi-admin) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-B of the 2-PR whoami migration. The route is now served by navi-admin :8427 via nginx (`^~ /api/auth/whoami` cutover verified live — edge responses carry navi-admin's X-Cache-Status: BYPASS), so recon's handler is edge-unreachable and safe to remove. - lib/api.py: delete the @app.route('/api/auth/whoami') api_auth_whoami handler + its dedicated section comment. It was the file tail (post-cleanup-#6), so api.py now ends on the metrics-history handler. Sequenced after PR-A (navi-backend, merged + deployed) and the nginx edge cutover, so the route never 404s. recon serves zero navi-facing auth-state endpoints now. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/lib/api.py b/lib/api.py index 3a7e5ca..e83a98f 100644 --- a/lib/api.py +++ b/lib/api.py @@ -2535,21 +2535,3 @@ def api_metrics_history(): return jsonify({'type': metric_type, 'hours': hours, 'points': points}) except Exception as e: return jsonify({'type': metric_type, 'hours': hours, 'points': [], 'error': str(e)}) - - -# ── Auth state endpoint ───────────────────────────────────────────────────── -# Returns current auth state for frontend consumption. -# This endpoint must be behind Caddy forward_auth to receive X-Authentik-* headers. -@app.route('/api/auth/whoami') -def api_auth_whoami(): - """Return auth state for frontend. Behind forward_auth, so headers are present when authenticated.""" - username = request.headers.get('X-Authentik-Username') - if username: - return jsonify({ - 'authenticated': True, - 'username': username, - }) - return jsonify({ - 'authenticated': False, - 'username': None, - }) From 21c0f11eff17d3de8f44dc771ff2bd55bb05c71d Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 13:57:14 -0600 Subject: [PATCH 69/72] decouple: remove scripts/overture_import.py (migrated to navi-backend) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-B of the Overture-import relocation. The ETL now lives in zvx-echo6/navi-backend at the same scripts/ path (PR-A, navi-backend 475739d: script ported verbatim + duckdb dep + docs; verified live — imports cleanly, overture PG reachable with ~20.9M rows). recon no longer produces overture data it doesn't consume. - DELETE scripts/overture_import.py. Context: cleanup #29 removed lib/overture.py (recon's only overture *reader*), leaving this ETL as recon's last orphan overture code path. PR-A moved the writer to the navi side; this removes recon's now-orphan copy. The `overture` PG database is unchanged — only the writer moved. OVERTURE_DB_* vars in /opt/recon/.env are now dead in recon (zero overture code paths remain) — flagged for out-of-band post-merge prune, same pattern as PADUS_DB_* (cleanup #5). Co-authored-by: Claude Opus 4.7 (1M context) --- scripts/overture_import.py | 350 ------------------------------------- 1 file changed, 350 deletions(-) delete mode 100644 scripts/overture_import.py diff --git a/scripts/overture_import.py b/scripts/overture_import.py deleted file mode 100644 index 0b6ba67..0000000 --- a/scripts/overture_import.py +++ /dev/null @@ -1,350 +0,0 @@ -#!/usr/bin/env python3 -"""Overture Maps Places → PostgreSQL import script (v2). - -Downloads Overture Places Parquet from S3 via DuckDB (public bucket, no credentials), -filters to North America bounding box, and inserts into local PostgreSQL with PostGIS. - -Usage: - cd /opt/recon && venv/bin/python scripts/overture_import.py - -Re-runnable (idempotent via UPSERT). -""" - -import json -import logging -import os -import re -import sys -import time - -import duckdb -import psycopg2 -import psycopg2.extras - -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%H:%M:%S' -) -log = logging.getLogger('overture_import') - -# --- Config --- -OVERTURE_RELEASE = '2026-04-15.0' -S3_PATH = f's3://overturemaps-us-west-2/release/{OVERTURE_RELEASE}/theme=places/type=place/*' - -# North America bounding box (generous — includes Hawaii, Puerto Rico, Canada) -BBOX = { - 'xmin': -170.0, - 'xmax': -50.0, - 'ymin': 15.0, - 'ymax': 85.0, -} - -BATCH_SIZE = 50_000 -OSM_RECORD_RE = re.compile(r'^([nwr])(\d+)@\d+$') - -DB_CONFIG = { - 'host': os.environ.get('OVERTURE_DB_HOST', 'localhost'), - 'port': int(os.environ.get('OVERTURE_DB_PORT', '5432')), - 'dbname': os.environ.get('OVERTURE_DB_NAME', 'overture'), - 'user': os.environ.get('OVERTURE_DB_USER', 'overture'), - 'password': os.environ.get('OVERTURE_DB_PASSWORD', ''), -} - - -def create_table(conn): - """Create places table and indexes if they don't exist.""" - with conn.cursor() as cur: - cur.execute(""" - CREATE TABLE IF NOT EXISTS places ( - id TEXT PRIMARY KEY, - geometry GEOMETRY(Point, 4326), - name TEXT, - basic_category TEXT, - confidence REAL, - phone TEXT, - website TEXT, - socials JSONB, - brand_name TEXT, - brand_wikidata TEXT, - osm_type CHAR(1), - osm_id BIGINT, - source_record_id TEXT, - raw_sources JSONB - ); - """) - cur.execute(""" - CREATE INDEX IF NOT EXISTS idx_places_osm - ON places(osm_type, osm_id) WHERE osm_type IS NOT NULL; - """) - cur.execute(""" - CREATE INDEX IF NOT EXISTS idx_places_geom - ON places USING GIST(geometry); - """) - cur.execute(""" - CREATE INDEX IF NOT EXISTS idx_places_name_trgm - ON places USING GIN(name gin_trgm_ops); - """) - conn.commit() - log.info('Table and indexes ready') - - -def parse_osm_ref(sources): - """Extract OSM type letter and ID from Overture sources array.""" - if not sources: - return None, None, None - for src in sources: - record_id = None - if isinstance(src, dict): - record_id = src.get('record_id', '') - elif hasattr(src, '__getitem__'): - # DuckDB struct — try attribute access - try: - record_id = src['record_id'] - except (KeyError, TypeError, IndexError): - pass - if not record_id: - continue - m = OSM_RECORD_RE.match(str(record_id)) - if m: - return m.group(1), int(m.group(2)), str(record_id) - return None, None, None - - -def run_import(): - """Main import: DuckDB reads S3 Parquet → PostgreSQL via chunked OFFSET/LIMIT.""" - log.info(f'Overture release: {OVERTURE_RELEASE}') - log.info(f'S3 path: {S3_PATH}') - log.info(f'Bounding box: {BBOX}') - - # Connect to PostgreSQL - conn = psycopg2.connect(**DB_CONFIG) - conn.autocommit = False - create_table(conn) - - # Set up DuckDB with httpfs and spatial for S3 access - duck = duckdb.connect() - duck.execute("INSTALL httpfs; LOAD httpfs;") - duck.execute("INSTALL spatial; LOAD spatial;") - duck.execute("SET s3_region='us-west-2';") - - # Use a materialized approach: DuckDB query → Arrow → iterate in Python - query = f""" - SELECT - id, - ST_X(geometry) AS lon, - ST_Y(geometry) AS lat, - names.primary AS name, - basic_category, - confidence, - phones, - websites, - socials, - brand, - sources - FROM read_parquet('{S3_PATH}', hive_partitioning=true) - WHERE bbox.xmin >= {BBOX['xmin']} - AND bbox.xmax <= {BBOX['xmax']} - AND bbox.ymin >= {BBOX['ymin']} - AND bbox.ymax <= {BBOX['ymax']} - """ - - log.info('Starting DuckDB query against S3 (this will take several minutes)...') - t_start = time.time() - - # Execute and fetch all as Arrow for efficient iteration - result_rel = duck.sql(query) - - upsert_sql = """ - INSERT INTO places (id, geometry, name, basic_category, confidence, - phone, website, socials, brand_name, brand_wikidata, - osm_type, osm_id, source_record_id, raw_sources) - VALUES %s - ON CONFLICT (id) DO UPDATE SET - geometry = EXCLUDED.geometry, - name = EXCLUDED.name, - basic_category = EXCLUDED.basic_category, - confidence = EXCLUDED.confidence, - phone = EXCLUDED.phone, - website = EXCLUDED.website, - socials = EXCLUDED.socials, - brand_name = EXCLUDED.brand_name, - brand_wikidata = EXCLUDED.brand_wikidata, - osm_type = EXCLUDED.osm_type, - osm_id = EXCLUDED.osm_id, - source_record_id = EXCLUDED.source_record_id, - raw_sources = EXCLUDED.raw_sources - """ - - template = """( - %(id)s, - ST_SetSRID(ST_MakePoint(%(lon)s, %(lat)s), 4326), - %(name)s, - %(basic_category)s, - %(confidence)s, - %(phone)s, - %(website)s, - %(socials)s::jsonb, - %(brand_name)s, - %(brand_wikidata)s, - %(osm_type)s, - %(osm_id)s, - %(source_record_id)s, - %(raw_sources)s::jsonb - )""" - - total = 0 - osm_refs = 0 - batch = [] - - log.info('DuckDB query executing, fetching results in chunks...') - - # Fetch in chunks using fetchmany on the relation - chunk_size = BATCH_SIZE - while True: - chunk = result_rel.fetchmany(chunk_size) - if not chunk: - break - - for row in chunk: - row_id = row[0] - lon = row[1] - lat = row[2] - name = row[3] - basic_cat = row[4] - conf = row[5] - phones = row[6] - websites = row[7] - socials_raw = row[8] - brand_raw = row[9] - sources_raw = row[10] - - if lon is None or lat is None: - continue - - # Phone: first element of VARCHAR[] - phone = None - if phones and len(phones) > 0: - phone = str(phones[0]) if phones[0] else None - - # Website: first element of VARCHAR[] - website = None - if websites and len(websites) > 0: - website = str(websites[0]) if websites[0] else None - - # Socials: VARCHAR[] → JSON array of strings - socials_json = None - if socials_raw and len(socials_raw) > 0: - socials_json = json.dumps([str(s) for s in socials_raw if s]) - - # Brand: struct with wikidata and names.primary - brand_name = None - brand_wikidata = None - if brand_raw: - try: - if isinstance(brand_raw, dict): - brand_wikidata = brand_raw.get('wikidata') - names_struct = brand_raw.get('names') - if names_struct and isinstance(names_struct, dict): - brand_name = names_struct.get('primary') - else: - # DuckDB struct — access by key - brand_wikidata = brand_raw['wikidata'] if 'wikidata' in dir(brand_raw) else None - try: - brand_wikidata = brand_raw[0] # wikidata is first field - names_struct = brand_raw[1] # names is second field - if names_struct: - brand_name = names_struct[0] # primary is first field - except (IndexError, TypeError): - pass - except Exception: - pass - - # Sources: parse OSM cross-reference - sources_list = None - if sources_raw: - if isinstance(sources_raw, (list, tuple)): - sources_list = [] - for s in sources_raw: - if isinstance(s, dict): - sources_list.append(s) - else: - # DuckDB struct tuple — convert - try: - sources_list.append({ - 'dataset': s[1] if len(s) > 1 else None, - 'record_id': s[3] if len(s) > 3 else None, - }) - except (TypeError, IndexError): - pass - - osm_type_letter, osm_id_val, source_record_id = parse_osm_ref(sources_list) - if osm_type_letter: - osm_refs += 1 - - raw_sources_json = json.dumps(sources_list) if sources_list else None - - batch.append({ - 'id': row_id, - 'lon': float(lon), - 'lat': float(lat), - 'name': name, - 'basic_category': basic_cat, - 'confidence': float(conf) if conf is not None else None, - 'phone': phone, - 'website': website, - 'socials': socials_json, - 'brand_name': brand_name, - 'brand_wikidata': brand_wikidata, - 'osm_type': osm_type_letter, - 'osm_id': osm_id_val, - 'source_record_id': source_record_id, - 'raw_sources': raw_sources_json, - }) - - if len(batch) >= BATCH_SIZE: - with conn.cursor() as cur: - psycopg2.extras.execute_values( - cur, upsert_sql, batch, - template=template, - page_size=BATCH_SIZE - ) - conn.commit() - total += len(batch) - elapsed = time.time() - t_start - rate = total / elapsed if elapsed > 0 else 0 - log.info(f'Inserted {total:,} rows ({osm_refs:,} OSM xrefs) ' - f'[{rate:.0f} rows/sec, {elapsed:.0f}s elapsed]') - batch = [] - - # Flush remaining - if batch: - with conn.cursor() as cur: - psycopg2.extras.execute_values( - cur, upsert_sql, batch, - template=template, - page_size=BATCH_SIZE - ) - conn.commit() - total += len(batch) - - duck.close() - - # Final stats - elapsed = time.time() - t_start - log.info(f'Import complete: {total:,} rows, {osm_refs:,} OSM cross-refs, ' - f'{elapsed:.0f}s total ({total/elapsed:.0f} rows/sec)') - - # Verify - with conn.cursor() as cur: - cur.execute("SELECT count(*) FROM places") - count = cur.fetchone()[0] - cur.execute("SELECT count(*) FROM places WHERE osm_type IS NOT NULL") - osm_count = cur.fetchone()[0] - log.info(f'Final table: {count:,} total rows, {osm_count:,} with OSM cross-references') - - conn.close() - - -if __name__ == '__main__': - run_import() From ac99723e514d9766d8c8d7994452cefd3d59c06a Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 19:25:53 -0600 Subject: [PATCH 70/72] decouple: remove /api/wiki-enrich + wiki_index read path (migrated to navi-places) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-B of decouple #4-READ. navi-places now reads its own wiki_index.db directly (navi-backend a8f9520, deployed + verified: Horseshoe Falls enrichment served from /var/lib/navi-backend/wiki_index.db; admin-info dropped the recon-wiki-enrich dependency). recon's endpoint is edge-unreachable-unused, safe to remove. - DELETE lib/wiki_enrich_api.py (the /api/wiki-enrich blueprint). - DELETE lib/place_detail.py (97-line survivor: lookup_wiki_index + _get_wiki_index_db) — its only consumer was wiki_enrich_api.py (verified zero non-test code consumers). Fully orphaned. - DELETE lib/wiki_enrich_api_test.py (tests the deleted endpoint). - api.py: drop the wiki_enrich_bp import + register_blueprint. Untouched (separate decouple): /api/wiki-rewrite (wiki_rewrite_api.py + wiki_rewrite.py), still navi-consumed. /opt/recon/data/wiki_index.db left in place (data; now a harmless dead file). Internal localhost migration — no nginx. Flag (doc follow-up, not fixed): deployment_config.py:10 + wiki_rewrite_api.py:6 both have stale in-prose references to the deleted place_detail. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 4 -- lib/place_detail.py | 97 ------------------------------------- lib/wiki_enrich_api.py | 31 ------------ lib/wiki_enrich_api_test.py | 77 ----------------------------- 4 files changed, 209 deletions(-) delete mode 100644 lib/place_detail.py delete mode 100644 lib/wiki_enrich_api.py delete mode 100644 lib/wiki_enrich_api_test.py diff --git a/lib/api.py b/lib/api.py index e83a98f..63562f7 100644 --- a/lib/api.py +++ b/lib/api.py @@ -62,10 +62,6 @@ app.request_class = _LargeZimRequest from .netsyms_api import netsyms_bp app.register_blueprint(netsyms_bp) -# ── Wiki-enrich Blueprint (extraction #5 prep — HTTP wrapper over wiki_index) ── -from .wiki_enrich_api import wiki_enrich_bp -app.register_blueprint(wiki_enrich_bp) - # ── Wiki-rewrite Blueprint (extraction #5 prep — HTTP wrapper over rewrite_wiki_link) ── from .wiki_rewrite_api import wiki_rewrite_bp app.register_blueprint(wiki_rewrite_bp) diff --git a/lib/place_detail.py b/lib/place_detail.py deleted file mode 100644 index 6f6f1ba..0000000 --- a/lib/place_detail.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Wiki-index lookup for place enrichment. - -Provides lookup_wiki_index(wikidata_id, name, country_code) — a pure read of the -local wiki_index.db, used by the /api/wiki-enrich endpoint (navi-places -HTTP-fetches wiki enrichment instead of reading the 2.1 GB DB directly). -""" -import os -import sqlite3 - -from .utils import setup_logging - -logger = setup_logging('recon.place_detail') - - -# ── Wiki Index enrichment ─────────────────────────────────────────────── - -_wiki_index_conn = None - -def _get_wiki_index_db(): - global _wiki_index_conn - if _wiki_index_conn is not None: - return _wiki_index_conn - - db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "wiki_index.db") - if not os.path.exists(db_path): - logger.debug(f"wiki_index.db not found at {db_path}") - return None - - _wiki_index_conn = sqlite3.connect(db_path, check_same_thread=False) - _wiki_index_conn.row_factory = sqlite3.Row - logger.info(f"Wiki index DB ready at {db_path}") - return _wiki_index_conn - - -def lookup_wiki_index(wikidata_id=None, name=None, country_code=None): - """Standalone wiki_index lookup, extracted for the /api/wiki-enrich endpoint - (extraction #5: navi-places HTTP-fetches wiki enrichment instead of reading - the 2.1 GB wiki_index.db directly). - - Mirrors the lookup that `_enrich_with_wiki_index` performs in-process: - by wikidata_id first, then a name + country_code fallback. Returns a dict of - wiki enrichment fields (only those present), or None if there is no match or - the wiki_index DB is unavailable. Pure DB read — no feature-flag gating - (callers decide whether to call) and never raises. - - NOTE: additive only — `_enrich_with_wiki_index` is intentionally left - untouched here; it can be DRY-refactored to delegate to this in a later PR. - """ - db = _get_wiki_index_db() - if not db: - return None - - try: - cur = db.cursor() - row = None - - if wikidata_id: - wid = wikidata_id - if isinstance(wid, str) and wid.startswith("http"): - wid = wid.split("/")[-1] - cur.execute( - "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE wikidata_id = ?", - (wid,) - ) - row = cur.fetchone() - - if not row and name and country_code: - cur.execute( - "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE place_name = ? AND country_code = ? LIMIT 1", - (name, country_code.lower()) - ) - row = cur.fetchone() - - if not row: - return None - - out = {} - if row["summary"]: - out["wiki_summary"] = row["summary"] - if row["wiki_population"]: - try: - out["wiki_population"] = int(row["wiki_population"]) - except (ValueError, TypeError): - out["wiki_population"] = row["wiki_population"] - if row["wikipedia_title"]: - title = row["wikipedia_title"].replace(" ", "_") - out["wiki_url"] = f"https://en.wikipedia.org/wiki/{title}" - if row["wikivoyage_title"]: - title = row["wikivoyage_title"].replace(" ", "_") - out["wikivoyage_url"] = f"https://en.wikivoyage.org/wiki/{title}" - - return out or None - - except Exception as e: - logger.debug(f"wiki_index lookup error: {e}") - return None diff --git a/lib/wiki_enrich_api.py b/lib/wiki_enrich_api.py deleted file mode 100644 index ff0f9c7..0000000 --- a/lib/wiki_enrich_api.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Wiki-enrich API — read-only HTTP wrapper over the wiki_index lookup. - -Extraction #5 prep: lets the (future) navi-places service fetch wiki enrichment -over HTTP instead of reading recon's 2.1 GB data/wiki_index.db directly. Additive -only — does not change place_detail's in-process `_enrich_with_wiki_index` path. - - GET /api/wiki-enrich?wikidata= (primary key) - GET /api/wiki-enrich?name=&country= (fallback key) - -Public (no auth), matching /api/place/*. 400 if no usable key; 404 on no match. -""" -from flask import Blueprint, request, jsonify - -from .place_detail import lookup_wiki_index - -wiki_enrich_bp = Blueprint('wiki_enrich', __name__) - - -@wiki_enrich_bp.route('/api/wiki-enrich') -def api_wiki_enrich(): - wikidata = (request.args.get('wikidata') or '').strip() or None - name = (request.args.get('name') or '').strip() or None - country = (request.args.get('country') or '').strip() or None - - if not wikidata and not (name and country): - return jsonify({'error': 'provide ?wikidata= or ?name=&country='}), 400 - - result = lookup_wiki_index(wikidata_id=wikidata, name=name, country_code=country) - if result is None: - return jsonify({'error': 'no wiki match'}), 404 - return jsonify(result) diff --git a/lib/wiki_enrich_api_test.py b/lib/wiki_enrich_api_test.py deleted file mode 100644 index 681e5cb..0000000 --- a/lib/wiki_enrich_api_test.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Tests for the /api/wiki-enrich endpoint (extraction #5 prep). - -Plain-assert style (matching the other lib *_test.py; recon's venv has no -pytest). Builds a minimal Flask app with only wiki_enrich_bp registered (avoids -importing the full recon app) and points place_detail's lazy wiki_index -connection at an in-memory fixture DB. Run with pytest, or directly: - python -m lib.wiki_enrich_api_test -""" -import sqlite3 - -from flask import Flask - -from lib import place_detail -from lib.wiki_enrich_api import wiki_enrich_bp - - -def _client(): - """Fresh in-memory wiki_index fixture + a minimal app with just the route.""" - conn = sqlite3.connect(":memory:", check_same_thread=False) - conn.row_factory = sqlite3.Row - conn.execute( - "CREATE TABLE wiki_places (wikidata_id TEXT, place_name TEXT, country_code TEXT, " - "summary TEXT, wiki_population TEXT, wikipedia_title TEXT, wikivoyage_title TEXT)" - ) - conn.execute( - "INSERT INTO wiki_places VALUES (?,?,?,?,?,?,?)", - ("Q830149", "Filer", "us", "A city in Idaho.", "2508", "Filer, Idaho", "Filer"), - ) - conn.commit() - # Point the lazy module-level connection at the fixture so - # _get_wiki_index_db()/lookup_wiki_index() use it (bypasses the file path). - place_detail._wiki_index_conn = conn - app = Flask(__name__) - app.register_blueprint(wiki_enrich_bp) - return app.test_client() - - -def test_wiki_enrich_hit_by_wikidata(): - resp = _client().get("/api/wiki-enrich?wikidata=Q830149") - assert resp.status_code == 200, resp.status_code - d = resp.get_json() - assert d["wiki_summary"] == "A city in Idaho." - assert d["wiki_population"] == 2508 # cast to int - assert d["wiki_url"] == "https://en.wikipedia.org/wiki/Filer,_Idaho" - assert d["wikivoyage_url"] == "https://en.wikivoyage.org/wiki/Filer" - - -def test_wiki_enrich_no_match_404(): - resp = _client().get("/api/wiki-enrich?wikidata=Q9999999") - assert resp.status_code == 404, resp.status_code - - -def test_wiki_enrich_name_country_fallback(): - resp = _client().get("/api/wiki-enrich?name=Filer&country=US") - assert resp.status_code == 200, resp.status_code - assert resp.get_json()["wiki_summary"] == "A city in Idaho." - - -def test_wiki_enrich_no_key_400(): - c = _client() - assert c.get("/api/wiki-enrich").status_code == 400 - # name without country is not a usable key - assert c.get("/api/wiki-enrich?name=Filer").status_code == 400 - - -if __name__ == "__main__": - failures = 0 - for _name, _fn in sorted(globals().items()): - if _name.startswith("test_") and callable(_fn): - try: - _fn() - print(f"PASS {_name}") - except Exception as exc: # noqa: BLE001 - failures += 1 - print(f"FAIL {_name}: {exc!r}") - print("OK" if failures == 0 else f"{failures} FAILED") - raise SystemExit(1 if failures else 0) From 6365fe67565ffabe67f5a0d3647f1aaa263bc983 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 21:21:22 -0600 Subject: [PATCH 71/72] decouple: remove /api/wiki-rewrite (migrated to navi-places) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-B of decouple #4-REWRITE — the LAST recon→navi decoupling step. navi-places now owns the Kiwix link-rewrite logic in-process (navi-backend PR-A 7103c27, deployed + verified: Twin Falls live route returns wiki_rewrites local/public from navi's own wiki_cache.db; zero outbound calls to recon /api/wiki-rewrite). - DELETE lib/wiki_rewrite.py (the Kiwix rewrite logic — ported to navi-places). - DELETE lib/wiki_rewrite_api.py (the /api/wiki-rewrite blueprint). - DELETE lib/wiki_rewrite_api_test.py (tests the deleted endpoint). - api.py: drop the wiki_rewrite_bp import + register_blueprint + section comment. Verified zero recon consumers: nothing in recon imports wiki_rewrite — it was purely an HTTP endpoint for navi-places. After this, recon services make and receive zero navi-ecosystem runtime calls; recon is a fully separate product. Out-of-band (post-deploy): DROP TABLE wiki_cache from /opt/recon/data/place_cache.db (table only — place_cache + google_api_calls stay). Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 5 - lib/wiki_rewrite.py | 324 ----------------------------------- lib/wiki_rewrite_api.py | 34 ---- lib/wiki_rewrite_api_test.py | 73 -------- 4 files changed, 436 deletions(-) delete mode 100644 lib/wiki_rewrite.py delete mode 100644 lib/wiki_rewrite_api.py delete mode 100644 lib/wiki_rewrite_api_test.py diff --git a/lib/api.py b/lib/api.py index 63562f7..576f4ee 100644 --- a/lib/api.py +++ b/lib/api.py @@ -62,11 +62,6 @@ app.request_class = _LargeZimRequest from .netsyms_api import netsyms_bp app.register_blueprint(netsyms_bp) -# ── Wiki-rewrite Blueprint (extraction #5 prep — HTTP wrapper over rewrite_wiki_link) ── -from .wiki_rewrite_api import wiki_rewrite_bp -app.register_blueprint(wiki_rewrite_bp) - - # ── Navigation Constants ── diff --git a/lib/wiki_rewrite.py b/lib/wiki_rewrite.py deleted file mode 100644 index d884635..0000000 --- a/lib/wiki_rewrite.py +++ /dev/null @@ -1,324 +0,0 @@ -""" -Wiki link rewriter — rewrites OSM wikipedia/wikidata/wikivoyage/appropedia -links to local Kiwix URLs where the article exists in a loaded ZIM. - -Falls back silently to public URLs when article is unavailable locally. -Caches positive results only in place_cache.db. - -Kiwix catalog is parsed from the OPDS Atom feed at startup and refreshed -hourly to pick up newly loaded ZIMs without a restart. - -Operations note: - - After loading a new ZIM, either restart RECON (forces fresh catalog - fetch) or wait up to 1 hour for automatic refresh. - - To invalidate the wiki cache (e.g. after ZIM update): - sqlite3 /opt/recon/data/place_cache.db "DELETE FROM wiki_cache;" -""" -import os -import re -import sqlite3 -import time -import xml.etree.ElementTree as ET -from urllib.parse import unquote, quote - -import requests as http_requests - -from .utils import setup_logging - -logger = setup_logging('recon.wiki_rewrite') - -# ── Configuration ─────────────────────────────────────────────────────── - -KIWIX_BASE = "http://localhost:8430" -KIWIX_PUBLIC_BASE = "https://wiki.echo6.co" -KIWIX_CATALOG_URL = f"{KIWIX_BASE}/catalog/v2/entries" -HEAD_TIMEOUT = 1.5 # seconds -CATALOG_REFRESH_INTERVAL = 3600 # 1 hour - -# OPDS Atom namespace -_ATOM_NS = "http://www.w3.org/2005/Atom" - -# ── ZIM catalog map ───────────────────────────────────────────────────── - -_zim_map = {} # source_type → content_path e.g. 'wikipedia' → 'wikipedia_en_all_maxi_2026-02' -_zim_map_ts = 0.0 # last refresh timestamp - -# Prefix-to-source-type mapping (order matters: longest prefix first) -_ZIM_PREFIX_MAP = [ - ('wikipedia_en_all', 'wikipedia'), - ('appropedia_en_all', 'appropedia'), - ('wikivoyage_en', 'wikivoyage'), - ('wikidata_en', 'wikidata'), -] - - -def _discover_zims(): - """Parse Kiwix OPDS Atom catalog to map source types to content paths.""" - global _zim_map, _zim_map_ts - - try: - resp = http_requests.get(KIWIX_CATALOG_URL, timeout=5) - if resp.status_code != 200: - logger.warning(f"Kiwix catalog returned HTTP {resp.status_code}") - return - - root = ET.fromstring(resp.content) - new_map = {} - - for entry in root.findall(f"{{{_ATOM_NS}}}entry"): - name_el = entry.find(f"{{{_ATOM_NS}}}name") - if name_el is None: - continue - book_name = name_el.text or "" - - # - content_path = None - for link in entry.findall(f"{{{_ATOM_NS}}}link"): - if link.get("type") == "text/html": - href = link.get("href", "") - if href.startswith("/content/"): - content_path = href[len("/content/"):] - break - - if not content_path: - continue - - # Match book name against known prefixes - for prefix, source_type in _ZIM_PREFIX_MAP: - if book_name.startswith(prefix): - new_map[source_type] = content_path - break - - _zim_map = new_map - _zim_map_ts = time.time() - logger.info(f"ZIM catalog refreshed: {new_map}") - - except Exception as e: - logger.warning(f"Failed to discover ZIMs from Kiwix catalog: {e}") - - -def _ensure_zim_map(): - """Lazy-load and refresh ZIM map if stale.""" - if not _zim_map or (time.time() - _zim_map_ts) > CATALOG_REFRESH_INTERVAL: - _discover_zims() - - -# ── Database (wiki_cache in place_cache.db) ───────────────────────────── - -_db_conn = None - - -def _get_db(): - """Return a module-level SQLite connection to place_cache.db (lazy init).""" - global _db_conn - if _db_conn is not None: - return _db_conn - - db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') - os.makedirs(db_dir, exist_ok=True) - db_path = os.path.join(db_dir, 'place_cache.db') - - _db_conn = sqlite3.connect(db_path, check_same_thread=False) - _db_conn.execute("PRAGMA journal_mode=WAL") - _db_conn.execute("PRAGMA synchronous=NORMAL") - _db_conn.execute(""" - CREATE TABLE IF NOT EXISTS wiki_cache ( - source_type TEXT NOT NULL, - article_id TEXT NOT NULL, - kiwix_url TEXT NOT NULL, - cached_at INTEGER NOT NULL, - PRIMARY KEY (source_type, article_id) - ) - """) - _db_conn.commit() - logger.info(f"Wiki cache table ready in {db_path}") - return _db_conn - - -# ── URL classification ────────────────────────────────────────────────── - -# Patterns for OSM wikipedia/wikidata tag values -_WIKI_TAG_RE = re.compile(r'^(?:en:)?(.+)$') # "en:Title" or just "Title" -_WIKI_URL_RE = re.compile(r'https?://en\.wikipedia\.org/wiki/(.+)') -_WIKIDATA_TAG_RE = re.compile(r'^(Q\d+)$') -_WIKIDATA_URL_RE = re.compile(r'https?://(?:www\.)?wikidata\.org/wiki/(Q\d+)') -_WIKIVOYAGE_URL_RE = re.compile(r'https?://en\.wikivoyage\.org/wiki/(.+)') -_APPROPEDIA_URL_RE = re.compile(r'https?://(?:www\.)?appropedia\.org/(?:wiki/)?(.+)') - - -def _normalize_article_id(article_id): - """Normalize article ID to MediaWiki/Kiwix convention: spaces → underscores.""" - return article_id.replace(' ', '_') - - -def classify_wiki_link(tag_name, value): - """ - Classify an OSM extratag value into (source_type, article_id) or None. - - tag_name: the extratags key ('wikipedia', 'wikidata', etc.) - value: the raw tag value from OSM - - Article IDs are normalized to MediaWiki convention (spaces → underscores). - """ - if not value or not isinstance(value, str): - return None - - value = value.strip() - - if tag_name == 'wikidata': - m = _WIKIDATA_TAG_RE.match(value) - if m: - return ('wikidata', m.group(1)) - m = _WIKIDATA_URL_RE.match(value) - if m: - return ('wikidata', m.group(1)) - return None - - if tag_name == 'wikipedia': - # URL form: https://en.wikipedia.org/wiki/Title - m = _WIKI_URL_RE.match(value) - if m: - return ('wikipedia', _normalize_article_id(unquote(m.group(1)))) - # Tag form: "en:Title" or "Title" - m = _WIKI_TAG_RE.match(value) - if m: - return ('wikipedia', _normalize_article_id(m.group(1))) - return None - - if tag_name == 'wikivoyage': - m = _WIKIVOYAGE_URL_RE.match(value) - if m: - return ('wikivoyage', _normalize_article_id(unquote(m.group(1)))) - # Plain tag: "en:Title" or "Title" - m = _WIKI_TAG_RE.match(value) - if m: - return ('wikivoyage', _normalize_article_id(m.group(1))) - return None - - if tag_name == 'appropedia': - m = _APPROPEDIA_URL_RE.match(value) - if m: - return ('appropedia', _normalize_article_id(unquote(m.group(1)))) - return ('appropedia', _normalize_article_id(value)) - - return None - - -# ── URL builders ──────────────────────────────────────────────────────── - -def build_kiwix_url(source_type, article_id): - """Build a public Kiwix URL. Returns None if source_type not in ZIM map.""" - _ensure_zim_map() - content_path = _zim_map.get(source_type) - if not content_path: - return None - return f"{KIWIX_PUBLIC_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" - - -_PUBLIC_URL_TEMPLATES = { - 'wikipedia': "https://en.wikipedia.org/wiki/{id}", - 'wikidata': "https://www.wikidata.org/wiki/{id}", - 'wikivoyage': "https://en.wikivoyage.org/wiki/{id}", - 'appropedia': "https://www.appropedia.org/wiki/{id}", -} - - -def build_public_url(source_type, article_id): - """Build the canonical public URL for a wiki article.""" - tmpl = _PUBLIC_URL_TEMPLATES.get(source_type) - if not tmpl: - return None - return tmpl.format(id=quote(article_id, safe='/:@!$&\'()*+,;=')) - - -# ── Kiwix availability check ─────────────────────────────────────────── - -def check_kiwix_has_article(source_type, article_id): - """ - Check if an article exists in local Kiwix. - - Returns (bool, url): - - (True, kiwix_public_url) if article exists locally - - (False, None) if not found or Kiwix unavailable - - Only positive results are cached. - """ - # Check cache first - db = _get_db() - row = db.execute( - "SELECT kiwix_url FROM wiki_cache WHERE source_type=? AND article_id=?", - (source_type, article_id) - ).fetchone() - if row: - return (True, row[0]) - - # Build local HEAD URL - _ensure_zim_map() - content_path = _zim_map.get(source_type) - if not content_path: - return (False, None) - - head_url = f"{KIWIX_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" - - try: - resp = http_requests.head(head_url, timeout=HEAD_TIMEOUT, allow_redirects=True) - if resp.status_code == 200: - kiwix_url = build_kiwix_url(source_type, article_id) - # Cache positive result - now = int(time.time()) - db.execute(""" - INSERT OR REPLACE INTO wiki_cache (source_type, article_id, kiwix_url, cached_at) - VALUES (?, ?, ?, ?) - """, (source_type, article_id, kiwix_url, now)) - db.commit() - return (True, kiwix_url) - else: - return (False, None) - except Exception as e: - logger.debug(f"Kiwix HEAD failed for {source_type}/{article_id}: {e}") - return (False, None) - - -# ── Primary entry point ──────────────────────────────────────────────── - -def rewrite_wiki_link(tag_name, value): - """ - Rewrite an OSM wiki tag value to a local Kiwix URL if available. - - Returns (url, 'local'|'public') or (None, None) if unrecognized. - """ - classified = classify_wiki_link(tag_name, value) - if not classified: - return (value, 'original') - - source_type, article_id = classified - - # Try local Kiwix - found, kiwix_url = check_kiwix_has_article(source_type, article_id) - if found and kiwix_url: - return (kiwix_url, 'local') - - # Fall back to public URL - public_url = build_public_url(source_type, article_id) - if public_url: - return (public_url, 'public') - - return (value, 'original') - - -# ── Discovery stubs (disabled, for future activation) ─────────────────── - -def discover_wikivoyage_article(name, category, lat, lon): - """ - Discover a related Wikivoyage article for a place. - Enabled by has_wiki_discovery. Currently returns None. - """ - return None - - -def discover_appropedia_article(name, category): - """ - Discover a related Appropedia article for a place. - Enabled by has_wiki_discovery. Currently returns None. - """ - return None diff --git a/lib/wiki_rewrite_api.py b/lib/wiki_rewrite_api.py deleted file mode 100644 index ae1d52e..0000000 --- a/lib/wiki_rewrite_api.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Wiki-rewrite API — read-only HTTP wrapper over wiki_rewrite.rewrite_wiki_link. - -Extraction #5 prep: lets the (future) navi-places service rewrite OSM wiki tags -to local Kiwix URLs over HTTP instead of importing recon's wiki_rewrite module -(which talks to Kiwix and the wiki_cache table in /opt/recon/data/place_cache.db). -Additive only — does not change place_detail's in-process `_enrich_wiki_links`. - - GET /api/wiki-rewrite?tag=&value= - -Public (no auth), matching /api/place/* and /api/wiki-enrich. 400 on missing -value or unknown tag. No 404 — an unclassifiable value returns the original -value with status "original" (mirrors rewrite_wiki_link). -""" -from flask import Blueprint, request, jsonify - -from .wiki_rewrite import rewrite_wiki_link - -wiki_rewrite_bp = Blueprint('wiki_rewrite', __name__) - -_KNOWN_TAGS = {'wikipedia', 'wikidata', 'wikivoyage', 'appropedia'} - - -@wiki_rewrite_bp.route('/api/wiki-rewrite') -def api_wiki_rewrite(): - tag = (request.args.get('tag') or '').strip().lower() - value = (request.args.get('value') or '').strip() - - if not value: - return jsonify({'error': 'value is required'}), 400 - if tag not in _KNOWN_TAGS: - return jsonify({'error': f"tag must be one of {sorted(_KNOWN_TAGS)}"}), 400 - - url, status = rewrite_wiki_link(tag, value) - return jsonify({'url': url, 'status': status}) diff --git a/lib/wiki_rewrite_api_test.py b/lib/wiki_rewrite_api_test.py deleted file mode 100644 index 2bc50f4..0000000 --- a/lib/wiki_rewrite_api_test.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Tests for the /api/wiki-rewrite endpoint (extraction #5 prep). - -Plain-assert style (recon's venv has no pytest). Builds a minimal Flask app -with only wiki_rewrite_bp registered. Mocks `wiki_rewrite.check_kiwix_has_article` -to control the local-Kiwix-hit vs. fallback paths without touching Kiwix or the -wiki_cache DB. classify_wiki_link (pure regex) runs for real. Run with pytest, -or directly: python -m lib.wiki_rewrite_api_test -""" -from flask import Flask - -from lib import wiki_rewrite -from lib.wiki_rewrite_api import wiki_rewrite_bp - - -def _client(kiwix_hit): - """kiwix_hit: (found_bool, url) returned by a stubbed check_kiwix_has_article.""" - wiki_rewrite.check_kiwix_has_article = lambda source_type, article_id: kiwix_hit - app = Flask(__name__) - app.register_blueprint(wiki_rewrite_bp) - return app.test_client() - - -def test_local_kiwix_hit(): - url = "https://wiki.echo6.co/content/wikipedia/Filer,_Idaho" - c = _client((True, url)) - resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer, Idaho") - assert resp.status_code == 200, resp.status_code - d = resp.get_json() - assert d["status"] == "local" - assert d["url"] == url - - -def test_public_fallback_when_not_in_kiwix(): - c = _client((False, None)) # not in Kiwix -> canonical public URL - resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer") - assert resp.status_code == 200, resp.status_code - d = resp.get_json() - assert d["status"] == "public" - assert d["url"] == "https://en.wikipedia.org/wiki/Filer" - - -def test_unclassifiable_returns_original(): - # 'wikidata' requires a Q-id; a non-matching value -> classify None -> original. - c = _client((False, None)) - resp = c.get("/api/wiki-rewrite?tag=wikidata&value=not-a-qid") - assert resp.status_code == 200, resp.status_code - d = resp.get_json() - assert d["status"] == "original" - assert d["url"] == "not-a-qid" - - -def test_missing_value_400(): - c = _client((False, None)) - assert c.get("/api/wiki-rewrite?tag=wikipedia").status_code == 400 - - -def test_unknown_tag_400(): - c = _client((False, None)) - assert c.get("/api/wiki-rewrite?tag=facebook&value=x").status_code == 400 - - -if __name__ == "__main__": - failures = 0 - for _name, _fn in sorted(globals().items()): - if _name.startswith("test_") and callable(_fn): - try: - _fn() - print(f"PASS {_name}") - except Exception as exc: # noqa: BLE001 - failures += 1 - print(f"FAIL {_name}: {exc!r}") - print("OK" if failures == 0 else f"{failures} FAILED") - raise SystemExit(1 if failures else 0) From e840a119dd8a2f68ba994ba7fdc099c504800384 Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 23 May 2026 23:09:49 -0600 Subject: [PATCH 72/72] cleanup: drop dead deployment_config references + orphaned deleted_contacts template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tidies stale references left behind by the navi extraction + decoupling work. - lib/deployment_config.py: the consumer-catalog docstring listed four in-process consumers that were all extracted/removed across cleanups #4/#5/#6/#27 (/api/landclass gate, google_places.py, place_detail.py, offroute/router.py). Replaced the stale 4-bullet list with an accurate note: recon has no remaining caller of get_deployment_config() today; the module is retained per cleanup #1. - lib/api.py: removed the now-dead `from .deployment_config import get_deployment_config` import (its only caller was the /api/landclass handler removed in #5 — zero call sites remain). - templates/knowledge/deleted_contacts.html: deleted — orphaned since cleanup #3 removed the contacts/dashboard routes; zero callers in recon. No functional change (the removed import was unused; the template unrendered). Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 1 - lib/deployment_config.py | 12 ++--- templates/knowledge/deleted_contacts.html | 56 ----------------------- 3 files changed, 6 insertions(+), 63 deletions(-) delete mode 100644 templates/knowledge/deleted_contacts.html diff --git a/lib/api.py b/lib/api.py index 576f4ee..a0697bf 100644 --- a/lib/api.py +++ b/lib/api.py @@ -24,7 +24,6 @@ from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB -from .deployment_config import get_deployment_config logger = setup_logging('recon.api') diff --git a/lib/deployment_config.py b/lib/deployment_config.py index 83cc864..ab6aa17 100644 --- a/lib/deployment_config.py +++ b/lib/deployment_config.py @@ -4,12 +4,12 @@ Deployment profile loader. Reads RECON_PROFILE env var (default: "home"), loads the matching YAML from config/profiles/.yaml, and caches the parsed dict in memory. -Provides get_deployment_config() for in-process consumers of the profile: - - lib/api.py:api_landclass — the has_landclass feature-flag gate - - lib/google_places.py — Google Places enrichment config - - lib/place_detail.py — place-detail enrichment config (×4 call sites) - - lib/offroute/router.py — profile.offroute.* (osm_pbf_path / postgis_dsn / - densify_interval_m) +Exposes get_deployment_config() as the in-process accessor for the profile. + +Note: its former consumers (the /api/landclass gate, google_places, +place_detail, offroute/router) were all extracted to navi-* services or removed +across cleanups #4–#6/#27 — recon has no remaining caller of +get_deployment_config() today; the module is retained per cleanup #1. (The former /api/config HTTP endpoint that served this dict to the frontend was removed once navi-config (:8422) took over that route.) """ diff --git a/templates/knowledge/deleted_contacts.html b/templates/knowledge/deleted_contacts.html deleted file mode 100644 index 58a9ff5..0000000 --- a/templates/knowledge/deleted_contacts.html +++ /dev/null @@ -1,56 +0,0 @@ -{% extends "base.html" %} -{% block content %} -

    Deleted Contacts

    -{% if not contacts %} -

    No deleted contacts.

    -{% else %} - - - {% for c in contacts %} - - - - - - - - - {% endfor %} -
    LabelNameCategoryPhoneDeleted AtActions
    {{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} - - -
    -{% endif %} -{% endblock %} -{% block scripts %} - -{% endblock %}