mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Extract _full_zim_cleanup helper, add SIGHUP + scrape_jobs cleanup
- Extract shared _full_zim_cleanup(source_id) from api_kiwix_remove - Add SIGHUP to kiwix-serve after kiwix-manage remove - Delete linked scrape_jobs rows during ZIM removal - Update api_scraper_delete to do full ZIM cleanup when applicable - Set chromium_path for single-file browser crawl support - Add status.db to .gitignore Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
45c3bb8d56
commit
f0b160ef7c
3 changed files with 87 additions and 12 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -27,3 +27,4 @@ recon.db
|
|||
|
||||
# Kiwix binary tools (installed from tarball)
|
||||
bin/
|
||||
status.db
|
||||
|
|
|
|||
|
|
@ -487,7 +487,7 @@ scraper:
|
|||
# SingleFile CLI settings (browser crawl mode)
|
||||
singlefile:
|
||||
executable: single-file
|
||||
chromium_path: "" # Auto-detected from Playwright if empty
|
||||
chromium_path: "/usr/bin/chromium-browser"
|
||||
crawl_max_depth: 10
|
||||
|
||||
# Stream B: New Library Pipeline
|
||||
|
|
|
|||
96
lib/api.py
96
lib/api.py
|
|
@ -2060,23 +2060,24 @@ def api_kiwix_upload():
|
|||
|
||||
|
||||
|
||||
@app.route('/api/kiwix/remove/<int:source_id>', methods=['POST'])
|
||||
def api_kiwix_remove(source_id):
|
||||
"""Remove a ZIM source: delete vectors, DB records, library entry, and file."""
|
||||
def _full_zim_cleanup(source_id):
|
||||
"""Full ZIM cleanup: Qdrant vectors, DB records, kiwix-manage, SIGHUP, file delete.
|
||||
Returns dict with results. Caller handles cache refresh."""
|
||||
import subprocess
|
||||
import signal
|
||||
import requests as req
|
||||
|
||||
db = StatusDB()
|
||||
conn = db._get_conn()
|
||||
row = conn.execute("SELECT * FROM zim_sources WHERE id = ?", (source_id,)).fetchone()
|
||||
if not row:
|
||||
return jsonify({'error': 'Source not found'}), 404
|
||||
return None
|
||||
|
||||
zim_source = dict(row)
|
||||
zim_filename = zim_source['zim_filename']
|
||||
zim_path = zim_source['zim_path']
|
||||
zim_title = zim_source.get('title', zim_filename)
|
||||
results = {'vectors_deleted': 0, 'docs_deleted': 0, 'file_deleted': False}
|
||||
results = {'vectors_deleted': 0, 'docs_deleted': 0, 'file_deleted': False, 'scrape_jobs_deleted': 0}
|
||||
|
||||
# Step 1: Find all document hashes for this ZIM source
|
||||
doc_hashes = [r['hash'] for r in conn.execute(
|
||||
|
|
@ -2135,7 +2136,6 @@ def api_kiwix_remove(source_id):
|
|||
|
||||
# Step 4: Remove from kiwix-serve library
|
||||
try:
|
||||
# Get the book ID from library.xml
|
||||
subprocess.run(
|
||||
['/opt/recon/bin/kiwix-manage', '/mnt/kiwix/library.xml', 'remove', zim_filename.replace('.zim', '')],
|
||||
capture_output=True, text=True, timeout=10
|
||||
|
|
@ -2143,6 +2143,16 @@ def api_kiwix_remove(source_id):
|
|||
except Exception as e:
|
||||
logger.warning(f"kiwix-manage remove failed: {e}")
|
||||
|
||||
# Step 4b: SIGHUP kiwix-serve to reload library
|
||||
try:
|
||||
result = subprocess.run(['pidof', 'kiwix-serve'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
pid = int(result.stdout.strip().split()[0])
|
||||
os.kill(pid, signal.SIGHUP)
|
||||
logger.info(f"Sent SIGHUP to kiwix-serve (pid {pid})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to signal kiwix-serve: {e}")
|
||||
|
||||
# Step 5: Delete the ZIM file
|
||||
if os.path.isfile(zim_path):
|
||||
try:
|
||||
|
|
@ -2152,13 +2162,37 @@ def api_kiwix_remove(source_id):
|
|||
logger.warning(f"ZIM file delete failed: {e}")
|
||||
results['file_deleted'] = False
|
||||
|
||||
# Step 6: Delete any linked scrape_jobs rows
|
||||
try:
|
||||
res = conn.execute("DELETE FROM scrape_jobs WHERE zim_source_id = ?", (source_id,))
|
||||
conn.commit()
|
||||
results['scrape_jobs_deleted'] = res.rowcount
|
||||
except Exception as e:
|
||||
logger.warning(f"scrape_jobs cleanup failed: {e}")
|
||||
|
||||
logger.info(f"Full ZIM cleanup for source {source_id} ('{zim_title}'): {results}")
|
||||
return results
|
||||
|
||||
|
||||
@app.route('/api/kiwix/remove/<int:source_id>', methods=['POST'])
|
||||
def api_kiwix_remove(source_id):
|
||||
"""Remove a ZIM source: delete vectors, DB records, library entry, and file."""
|
||||
db = StatusDB()
|
||||
conn = db._get_conn()
|
||||
row = conn.execute("SELECT * FROM zim_sources WHERE id = ?", (source_id,)).fetchone()
|
||||
if not row:
|
||||
return jsonify({'error': 'Source not found'}), 404
|
||||
|
||||
results = _full_zim_cleanup(source_id)
|
||||
if results is None:
|
||||
return jsonify({'error': 'Source not found during cleanup'}), 404
|
||||
|
||||
# Refresh cache
|
||||
try:
|
||||
_cache['kiwix_sources'] = _build_kiwix_sources()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"Removed ZIM source '{zim_title}': {results}")
|
||||
return jsonify({'ok': True, 'results': results})
|
||||
|
||||
|
||||
|
|
@ -2375,20 +2409,60 @@ def api_scraper_retry(job_id):
|
|||
|
||||
@app.route('/api/scraper/delete/<int:job_id>', methods=['POST'])
|
||||
def api_scraper_delete(job_id):
|
||||
"""Delete a scrape job (only if not currently running)."""
|
||||
"""Delete a scrape job and clean up any associated ZIM artifacts."""
|
||||
import subprocess
|
||||
import signal
|
||||
|
||||
db = StatusDB()
|
||||
job = db.get_scrape_job(job_id)
|
||||
if not job:
|
||||
return jsonify({'error': 'Job not found'}), 404
|
||||
|
||||
if job['status'] == 'running':
|
||||
return jsonify({'error': 'Cannot delete a running job — cancel it first'}), 400
|
||||
return jsonify({'error': 'Cannot delete a running job \u2014 cancel it first'}), 400
|
||||
|
||||
zim_cleanup_results = None
|
||||
|
||||
# If the job has a linked zim_source, do full cleanup
|
||||
if job.get('zim_source_id'):
|
||||
zim_cleanup_results = _full_zim_cleanup(job['zim_source_id'])
|
||||
try:
|
||||
_cache['kiwix_sources'] = _build_kiwix_sources()
|
||||
except Exception:
|
||||
pass
|
||||
elif job.get('zim_filename'):
|
||||
# No zim_source row, but there may be an orphan file + library entry
|
||||
zim_path = os.path.join('/mnt/kiwix', job['zim_filename'])
|
||||
if os.path.isfile(zim_path):
|
||||
try:
|
||||
os.remove(zim_path)
|
||||
logger.info(f"Deleted orphan ZIM file: {zim_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete orphan ZIM file {zim_path}: {e}")
|
||||
try:
|
||||
subprocess.run(
|
||||
['/opt/recon/bin/kiwix-manage', '/mnt/kiwix/library.xml', 'remove',
|
||||
job['zim_filename'].replace('.zim', '')],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"kiwix-manage remove failed for orphan: {e}")
|
||||
try:
|
||||
result = subprocess.run(['pidof', 'kiwix-serve'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
pid = int(result.stdout.strip().split()[0])
|
||||
os.kill(pid, signal.SIGHUP)
|
||||
logger.info(f"Sent SIGHUP to kiwix-serve (pid {pid})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to signal kiwix-serve: {e}")
|
||||
|
||||
# Delete the scrape_jobs row (may already be gone if _full_zim_cleanup deleted it)
|
||||
conn = db._get_conn()
|
||||
conn.execute("DELETE FROM scrape_jobs WHERE id = ?", (job_id,))
|
||||
conn.commit()
|
||||
logger.info(f"Scraper job {job_id} deleted")
|
||||
return jsonify({'ok': True})
|
||||
|
||||
logger.info(f"Scraper job {job_id} deleted (zim_cleanup={zim_cleanup_results})")
|
||||
return jsonify({'ok': True, 'zim_cleanup': zim_cleanup_results})
|
||||
|
||||
|
||||
@app.route('/api/scraper/clear-failed', methods=['POST'])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue