mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 14:44:54 +02:00
Fix Zimit CLI: add subcommand, correct flag names, fix container cleanup
- Must pass `zimit` as command after image name (entrypoint execs args) - --url → --seeds, --name removed, --lang → --zim-lang, --workers → -w - Remove --rm so docker logs work after exit, manually rm container Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8945c82e3f
commit
76076fc4ab
1 changed files with 12 additions and 5 deletions
|
|
@ -148,17 +148,17 @@ def _crawl_zimit(job, config, stop_event, db):
|
|||
description = f"{category} — mirror of {domain}"
|
||||
|
||||
docker_cmd = [
|
||||
'docker', 'run', '--rm',
|
||||
'docker', 'run',
|
||||
'--name', container_name,
|
||||
'-v', f'{tmp_dir}:/output',
|
||||
docker_image,
|
||||
'--url', url,
|
||||
'--name', _sanitize_filename(domain),
|
||||
'--lang', language,
|
||||
'zimit',
|
||||
'--seeds', url,
|
||||
'--zim-lang', language,
|
||||
'--title', title,
|
||||
'--description', description[:80],
|
||||
'--output', '/output',
|
||||
'--workers', str(docker_workers),
|
||||
'-w', str(docker_workers),
|
||||
]
|
||||
|
||||
logger.info(f"Job {job_id}: Zimit crawl starting — {url}")
|
||||
|
|
@ -228,6 +228,9 @@ def _crawl_zimit(job, config, stop_event, db):
|
|||
error_msg += f": {log_text[-500:]}"
|
||||
except Exception:
|
||||
pass
|
||||
# Remove container (no --rm flag, so we clean up manually)
|
||||
subprocess.run(['docker', 'rm', '-f', container_name],
|
||||
capture_output=True, timeout=10)
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
return 0, None, error_msg
|
||||
|
||||
|
|
@ -235,6 +238,10 @@ def _crawl_zimit(job, config, stop_event, db):
|
|||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
return 0, None, f"Zimit error: {e}"
|
||||
|
||||
# Remove container (no --rm flag, so we clean up manually after getting logs)
|
||||
subprocess.run(['docker', 'rm', '-f', container_name],
|
||||
capture_output=True, timeout=10)
|
||||
|
||||
# Find the output ZIM file
|
||||
zim_files = _glob.glob(os.path.join(tmp_dir, '*.zim'))
|
||||
if not zim_files:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue