Fix Zimit CLI: add subcommand, correct flag names, fix container cleanup

- Must pass `zimit` as command after image name (entrypoint execs args)
- --url → --seeds, --name removed, --lang → --zim-lang, --workers → -w
- Remove --rm so docker logs work after exit, manually rm container

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-19 14:13:34 +00:00
commit 76076fc4ab

View file

@ -148,17 +148,17 @@ def _crawl_zimit(job, config, stop_event, db):
description = f"{category} — mirror of {domain}" description = f"{category} — mirror of {domain}"
docker_cmd = [ docker_cmd = [
'docker', 'run', '--rm', 'docker', 'run',
'--name', container_name, '--name', container_name,
'-v', f'{tmp_dir}:/output', '-v', f'{tmp_dir}:/output',
docker_image, docker_image,
'--url', url, 'zimit',
'--name', _sanitize_filename(domain), '--seeds', url,
'--lang', language, '--zim-lang', language,
'--title', title, '--title', title,
'--description', description[:80], '--description', description[:80],
'--output', '/output', '--output', '/output',
'--workers', str(docker_workers), '-w', str(docker_workers),
] ]
logger.info(f"Job {job_id}: Zimit crawl starting — {url}") logger.info(f"Job {job_id}: Zimit crawl starting — {url}")
@ -228,6 +228,9 @@ def _crawl_zimit(job, config, stop_event, db):
error_msg += f": {log_text[-500:]}" error_msg += f": {log_text[-500:]}"
except Exception: except Exception:
pass pass
# Remove container (no --rm flag, so we clean up manually)
subprocess.run(['docker', 'rm', '-f', container_name],
capture_output=True, timeout=10)
shutil.rmtree(tmp_dir, ignore_errors=True) shutil.rmtree(tmp_dir, ignore_errors=True)
return 0, None, error_msg return 0, None, error_msg
@ -235,6 +238,10 @@ def _crawl_zimit(job, config, stop_event, db):
shutil.rmtree(tmp_dir, ignore_errors=True) shutil.rmtree(tmp_dir, ignore_errors=True)
return 0, None, f"Zimit error: {e}" return 0, None, f"Zimit error: {e}"
# Remove container (no --rm flag, so we clean up manually after getting logs)
subprocess.run(['docker', 'rm', '-f', container_name],
capture_output=True, timeout=10)
# Find the output ZIM file # Find the output ZIM file
zim_files = _glob.glob(os.path.join(tmp_dir, '*.zim')) zim_files = _glob.glob(os.path.join(tmp_dir, '*.zim'))
if not zim_files: if not zim_files: