From 095bf8c2af35c1d4d0dabb7dd1492e808f6cf485 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 22 Apr 2026 04:08:12 +0000 Subject: [PATCH] Add Google Places (New) tertiary enrichment for business POIs Fills opening_hours, phone, and website gaps when OSM + Overture data is incomplete. Only fires for business-class POIs (amenity, shop, tourism, leisure, office, craft). Daily API call cap with SQLite tracking. cache_put now preserves google columns across cache refreshes. Co-Authored-By: Claude Opus 4.6 --- config/profiles/home.yaml | 3 +- config/profiles/minimal_pi.yaml | 1 + config/profiles/regional_pi.yaml | 1 + lib/google_places.py | 397 +++++++++++++++++++++++++++++++ lib/place_detail.py | 132 +++++++++- 5 files changed, 527 insertions(+), 7 deletions(-) create mode 100644 lib/google_places.py diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index 99430a8..f44a58b 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -22,7 +22,7 @@ traffic: place_details: local_source: "nominatim" - local_bbox: [-117.2, 41.98, -111.04, 49.0] + local_bbox: [-125.0, 31.3, -104.0, 49.0] fallback_source: "overpass" services: @@ -40,6 +40,7 @@ features: has_landclass: false has_address_book_write: false has_overture_enrichment: true + has_google_places_enrichment: true defaults: center: [42.5736, -114.6066] diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index 07a61d5..108fdfd 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -35,6 +35,7 @@ features: has_landclass: false has_address_book_write: true has_overture_enrichment: false + has_google_places_enrichment: false defaults: center: [44.0, -114.0] diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index 291ee81..eaf7956 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -40,6 +40,7 @@ features: has_landclass: true has_address_book_write: true has_overture_enrichment: false + has_google_places_enrichment: false defaults: center: [44.0, -114.0] diff --git a/lib/google_places.py b/lib/google_places.py new file mode 100644 index 0000000..8272b81 --- /dev/null +++ b/lib/google_places.py @@ -0,0 +1,397 @@ +""" +Google Places (New) API client for tertiary enrichment. + +Searches for business POIs and fetches details (opening hours, phone, website) +when OSM + Overture data is incomplete. Uses field masks to minimize cost. + +API docs: https://developers.google.com/maps/documentation/places/web-service +""" +import json +import os +import sqlite3 +import time +from datetime import date, timezone, datetime + +import requests + +from .utils import setup_logging + +logger = setup_logging('recon.google_places') + +API_BASE = 'https://places.googleapis.com/v1' +DEFAULT_DAILY_CAP = 500 +REQUEST_TIMEOUT = 3 # seconds + +# Google day index → OSM abbreviation +_DAY_ABBR = ['Su', 'Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa'] + +_db_conn = None + + +def _get_db(): + """Return a module-level SQLite connection (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + db_path = os.path.join(db_dir, 'place_cache.db') + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + # Ensure google_api_calls table exists + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS google_api_calls ( + call_date TEXT PRIMARY KEY, + call_count INTEGER NOT NULL DEFAULT 0 + ) + """) + _db_conn.commit() + return _db_conn + + +def _get_api_key(): + """Return the Google Places API key from environment.""" + key = os.environ.get('GOOGLE_PLACES_API_KEY') + if not key: + logger.error("GOOGLE_PLACES_API_KEY not set in environment") + return key + + +def _get_daily_cap(): + """Return the daily API call cap (configurable via deployment config).""" + try: + from .deployment_config import get_deployment_config + config = get_deployment_config() + return config.get('google_places', {}).get('daily_cap', DEFAULT_DAILY_CAP) + except Exception: + return DEFAULT_DAILY_CAP + + +# ── Daily call counter ────────────────────────────────────────────────── + +def check_daily_cap(): + """Return True if under daily cap, False if limit reached.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + current = row[0] if row else 0 + cap = _get_daily_cap() + if current >= cap: + logger.info(f"google_places: daily_cap_reached count={current} cap={cap}") + return False + return True + + +def get_daily_count(): + """Return today's API call count.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + return row[0] if row else 0 + + +def increment_call_counter(): + """Atomically increment today's API call counter.""" + db = _get_db() + today = date.today().isoformat() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, 1) + ON CONFLICT(call_date) DO UPDATE SET call_count = call_count + 1 + """, (today,)) + db.commit() + + +def _set_daily_count_to_cap(): + """Set today's counter to the cap value (soft-stop on quota error).""" + db = _get_db() + today = date.today().isoformat() + cap = _get_daily_cap() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, ?) + ON CONFLICT(call_date) DO UPDATE SET call_count = ? + """, (today, cap, cap)) + db.commit() + + +# ── Google Places cache (on place_cache table) ───────────────────────── + +def cache_get_google(osm_type, osm_id): + """Return (google_place_id, google_data_dict) or (None, None).""" + db = _get_db() + row = db.execute( + "SELECT google_place_id, google_data FROM place_cache WHERE osm_type=? AND osm_id=?", + (osm_type, osm_id) + ).fetchone() + if row and row[0]: + data = None + if row[1]: + try: + data = json.loads(row[1]) + except (json.JSONDecodeError, TypeError): + pass + return row[0], data + return None, None + + +def cache_put_google(osm_type, osm_id, place_id, data): + """Store Google Places data for a cache entry (UPSERT on google columns).""" + db = _get_db() + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at, google_place_id, google_data, google_fetched_at) + VALUES (?, ?, '', 'pending', 0, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + google_place_id = excluded.google_place_id, + google_data = excluded.google_data, + google_fetched_at = excluded.google_fetched_at + """, (osm_type, osm_id, place_id, json.dumps(data) if data else None, now)) + db.commit() + + +# ── API calls ─────────────────────────────────────────────────────────── + +def search_place(name, lat, lon, radius_m=200): + """ + Search Google Places (New) for a business by name + location. + Returns the Google Place ID of the best match, or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.post( + f'{API_BASE}/places:searchText', + headers={ + 'Content-Type': 'application/json', + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'places.id,places.displayName,places.location', + }, + json={ + 'textQuery': name, + 'locationBias': { + 'circle': { + 'center': {'latitude': lat, 'longitude': lon}, + 'radius': float(radius_m), + } + }, + 'maxResultCount': 1, + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=search place=%s result=rate_limited", name) + _set_daily_count_to_cap() + return None + + if resp.status_code == 403: + logger.error("google_places: action=search place=%s result=forbidden (invalid key?)", name) + return None + + if resp.status_code != 200: + logger.warning("google_places: action=search place=%s result=error status=%d", name, resp.status_code) + return None + + data = resp.json() + places = data.get('places', []) + if not places: + logger.info("google_places: action=search place=%s result=miss", name) + return None + + place_id = places[0].get('id') + display = places[0].get('displayName', {}).get('text', '?') + logger.info("google_places: action=search place=%s result=hit google_name=%s id=%s", name, display, place_id) + return place_id + + except requests.exceptions.Timeout: + logger.warning("google_places: action=search place=%s result=timeout", name) + return None + except Exception as e: + logger.error("google_places: action=search place=%s result=error err=%s", name, e) + return None + + +def get_place_details(place_id): + """ + Fetch details for a Google Place ID. + Returns dict with {opening_hours, phone_number, website} or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.get( + f'{API_BASE}/places/{place_id}', + headers={ + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'regularOpeningHours,internationalPhoneNumber,websiteUri', + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=details id=%s result=rate_limited", place_id) + _set_daily_count_to_cap() + return None + + if resp.status_code != 200: + logger.warning("google_places: action=details id=%s result=error status=%d", place_id, resp.status_code) + return None + + data = resp.json() + result = { + 'opening_hours': None, + 'opening_hours_raw': None, + 'phone_number': None, + 'website': None, + } + + # Phone + phone = data.get('internationalPhoneNumber') + if phone: + result['phone_number'] = phone.replace(' ', '').replace('-', '') + + # Website + result['website'] = data.get('websiteUri') + + # Opening hours + hours = data.get('regularOpeningHours') + if hours: + # Try OSM-compatible format from periods + periods = hours.get('periods', []) + if periods: + osm_str = _periods_to_osm(periods) + if osm_str: + result['opening_hours'] = osm_str + + # Fallback: weekday descriptions (human-readable) + if not result['opening_hours']: + descriptions = hours.get('weekdayDescriptions') + if descriptions: + result['opening_hours_raw'] = descriptions + + logger.info("google_places: action=details id=%s result=hit hours=%s phone=%s website=%s", + place_id, + 'yes' if result['opening_hours'] or result['opening_hours_raw'] else 'no', + 'yes' if result['phone_number'] else 'no', + 'yes' if result['website'] else 'no') + return result + + except requests.exceptions.Timeout: + logger.warning("google_places: action=details id=%s result=timeout", place_id) + return None + except Exception as e: + logger.error("google_places: action=details id=%s result=error err=%s", place_id, e) + return None + + +# ── Opening hours conversion ──────────────────────────────────────────── + +def _periods_to_osm(periods): + """ + Convert Google Places periods array to OSM opening_hours string. + + Google periods: [{"open": {"day": 0-6, "hour": H, "minute": M}, + "close": {"day": 0-6, "hour": H, "minute": M}}, ...] + Where day 0 = Sunday. + + OSM format: "Mo-Fr 06:00-23:00; Sa-Su 07:00-23:00" + """ + if not periods: + return None + + # Check for 24/7: single period with no close, or open 00:00 close 00:00 next day + if len(periods) == 1: + p = periods[0] + o = p.get('open', {}) + c = p.get('close') + if c is None and o.get('hour', 0) == 0 and o.get('minute', 0) == 0: + return '24/7' + + # Build a map: day_index → "HH:MM-HH:MM" + day_hours = {} # day_index → time_range string + for p in periods: + o = p.get('open', {}) + c = p.get('close', {}) + day = o.get('day', 0) + open_time = f"{o.get('hour', 0):02d}:{o.get('minute', 0):02d}" + + if c: + close_time = f"{c.get('hour', 0):02d}:{c.get('minute', 0):02d}" + # Handle midnight closing (00:00 means end of day) + if close_time == '00:00': + close_time = '24:00' + else: + close_time = '24:00' + + time_range = f"{open_time}-{close_time}" + + # A day can have multiple periods (e.g., lunch break) + if day in day_hours: + day_hours[day] = day_hours[day] + ',' + time_range + else: + day_hours[day] = time_range + + if not day_hours: + return None + + # Check if all 7 days have same hours + unique_ranges = set(day_hours.values()) + if len(day_hours) == 7 and len(unique_ranges) == 1: + hours = unique_ranges.pop() + if hours == '00:00-24:00': + return '24/7' + return hours # implicit "every day" + + # Group consecutive days with same hours + # Reorder to OSM convention: Mo(1) Tu(2) We(3) Th(4) Fr(5) Sa(6) Su(0) + osm_day_order = [1, 2, 3, 4, 5, 6, 0] + groups = [] + current_days = [] + current_hours = None + + for day_idx in osm_day_order: + hours = day_hours.get(day_idx) + if hours == current_hours: + current_days.append(day_idx) + else: + if current_days and current_hours: + groups.append((current_days, current_hours)) + current_days = [day_idx] + current_hours = hours + + if current_days and current_hours: + groups.append((current_days, current_hours)) + + if not groups: + return None + + # Format each group + parts = [] + for days, hours in groups: + if len(days) == 1: + day_str = _DAY_ABBR[days[0]] + elif len(days) == 2: + day_str = f"{_DAY_ABBR[days[0]]},{_DAY_ABBR[days[1]]}" + else: + day_str = f"{_DAY_ABBR[days[0]]}-{_DAY_ABBR[days[-1]]}" + parts.append(f"{day_str} {hours}") + + return '; '.join(parts) diff --git a/lib/place_detail.py b/lib/place_detail.py index 8ca2781..9c71b3b 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -73,13 +73,17 @@ def cache_get(osm_type, osm_id): def cache_put(osm_type, osm_id, data, source): - """Store a place detail result in the cache.""" + """Store a place detail result in the cache (preserves google columns).""" db = _get_db() - db.execute( - "INSERT OR REPLACE INTO place_cache (osm_type, osm_id, data, source, cached_at) " - "VALUES (?, ?, ?, ?, ?)", - (osm_type, osm_id, json.dumps(data), source, int(time.time())) - ) + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + data = excluded.data, + source = excluded.source, + cached_at = excluded.cached_at + """, (osm_type, osm_id, json.dumps(data), source, now)) db.commit() @@ -154,6 +158,120 @@ def _enrich_with_overture(result, osm_type, osm_id): return result + +# ── Google Places enrichment (tertiary, gap-fill only) ────────────── + +# Business POI classes eligible for Google enrichment +_BUSINESS_CLASSES = {'amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'} + +# Fields Google can fill +_GOOGLE_GAP_FIELDS = ('opening_hours', 'phone', 'website') + + +def _enrich_with_google(result, osm_type, osm_id): + """ + Tertiary enrichment via Google Places (New) API. + Only fires for business-type POIs when opening_hours, phone, or website + are still missing after OSM + Overture enrichment. + Fills only empty fields — never overwrites existing values. + """ + # Check feature flag + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_google_places_enrichment', False): + return result + except Exception: + return result + + # Only enrich business-type POIs + poi_class = result.get('class', '') + if poi_class not in _BUSINESS_CLASSES: + return result + + # Check if any gap fields are missing + extratags = result.get('extratags', {}) + gaps = [f for f in _GOOGLE_GAP_FIELDS if not extratags.get(f)] + if not gaps: + logger.debug(f"google_places: skip {osm_type}/{osm_id} — no gaps") + return result + + try: + from . import google_places + except ImportError: + logger.debug("google_places module not available") + return result + + # Check Google cache first + cached_pid, cached_data = google_places.cache_get_google(osm_type, osm_id) + if cached_pid and cached_data: + _apply_google_data(result, cached_data, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': cached_pid, + 'source': 'cache', + } + logger.debug(f"google_places: cache hit for {osm_type}/{osm_id}") + return result + + # Skip if already looked up and found nothing (cached_pid is None) + if cached_pid is not None: + return result + + # Daily cap check + if not google_places.check_daily_cap(): + return result + + # Search for the place + name = result.get('name', '') + centroid = result.get('centroid', {}) + lat = centroid.get('lat') + lon = centroid.get('lon') + if not name or not lat or not lon: + return result + + place_id = google_places.search_place(name, lat, lon) + if not place_id: + # Cache the miss to avoid repeated lookups + google_places.cache_put_google(osm_type, osm_id, '__miss__', None) + return result + + # Get details + details = google_places.get_place_details(place_id) + if not details: + google_places.cache_put_google(osm_type, osm_id, place_id, None) + return result + + # Cache the result + google_places.cache_put_google(osm_type, osm_id, place_id, details) + + # Apply to result + _apply_google_data(result, details, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': place_id, + 'source': 'api', + 'daily_count': google_places.get_daily_count(), + } + + return result + + +def _apply_google_data(result, google_data, gaps): + """Apply Google Places data to fill gap fields only.""" + extratags = result.get('extratags', {}) + if 'opening_hours' in gaps: + osm_hours = google_data.get('opening_hours') + if osm_hours: + extratags['opening_hours'] = osm_hours + elif google_data.get('opening_hours_raw'): + extratags['opening_hours_raw'] = google_data['opening_hours_raw'] + if 'phone' in gaps and google_data.get('phone_number'): + extratags['phone'] = google_data['phone_number'] + if 'website' in gaps and google_data.get('website'): + extratags['website'] = google_data['website'] + result['extratags'] = extratags + + # ── Nominatim parsing ─────────────────────────────────────────────────── # Nominatim address array uses rank_address to indicate what each entry is. @@ -441,6 +559,7 @@ def get_place_detail(osm_type, osm_id): if nominatim_result: nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) + nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -472,6 +591,7 @@ def get_place_detail(osm_type, osm_id): if overpass_result: overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) + overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200