From 999cf37626142efa4b468fe8373e80ca2c077e9e Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Apr 2026 23:30:59 +0000 Subject: [PATCH] Fix: Gemini "null" string bug in pdf_processor metadata voting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same fix as text_processor — Gemini sometimes returns the literal string "null" instead of JSON null for empty metadata fields. The voting logic and Gemini extraction now both treat "null" strings as None. Co-Authored-By: Claude Opus 4.6 --- lib/processors/pdf_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/processors/pdf_processor.py b/lib/processors/pdf_processor.py index 5d7a57f..b5a17dd 100644 --- a/lib/processors/pdf_processor.py +++ b/lib/processors/pdf_processor.py @@ -181,7 +181,7 @@ def _extract_gemini_metadata(pages_text, config): for field in ('title', 'author', 'edition', 'year'): val = data.get(field) - if val and isinstance(val, str) and val.strip(): + if val and isinstance(val, str) and val.strip() and val.strip().lower() != "null": result[field] = val.strip() return result @@ -223,7 +223,7 @@ def _vote_metadata(source_a, source_b, source_c): values = {} for name, src in sources.items(): val = src.get(field) - if val: + if val and str(val).strip().lower() != "null": values[name] = val if not values: