Fix: Gemini "null" string bug in pdf_processor metadata voting

Same fix as text_processor — Gemini sometimes returns the literal
string "null" instead of JSON null for empty metadata fields. The
voting logic and Gemini extraction now both treat "null" strings
as None.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-15 23:30:59 +00:00
commit 999cf37626

View file

@ -181,7 +181,7 @@ def _extract_gemini_metadata(pages_text, config):
for field in ('title', 'author', 'edition', 'year'):
val = data.get(field)
if val and isinstance(val, str) and val.strip():
if val and isinstance(val, str) and val.strip() and val.strip().lower() != "null":
result[field] = val.strip()
return result
@ -223,7 +223,7 @@ def _vote_metadata(source_a, source_b, source_c):
values = {}
for name, src in sources.items():
val = src.get(field)
if val:
if val and str(val).strip().lower() != "null":
values[name] = val
if not values: