Migration: consolidate Echo6 docs to cortex with full infrastructure cleanup sync
- Documents recent infrastructure cleanup (8 CTs destroyed, 35 DNS records removed, Headscale cleanup) - Adds 24 new runbooks covering Authentik, PeerTube, Meshtastic, RECON, Proxmox, Mailcow, Internet Archive, GPU routing - Adds project documentation for headscale, vaultwarden, peertube, matrix, mmud, advbbs, arr stack - Updates services.md, environment.md, caddy.md, authentik.md to match live infrastructure - Removes 4 deprecated runbook duplicates (canonical versions live in projects/) - Adds .gitignore for binary archives and editor temp files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
89834796ff
commit
e9231ac24a
93 changed files with 51223 additions and 254 deletions
177
runbooks/pg-backup.md
Normal file
177
runbooks/pg-backup.md
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
# PostgreSQL Backup (Docker)
|
||||
|
||||
Automated pg_dump backups for any Docker-hosted PostgreSQL instance. Retention, integrity check, and restore testing included.
|
||||
|
||||
---
|
||||
|
||||
## Inputs
|
||||
|
||||
```
|
||||
CONTAINER_NAME= # Docker container name (e.g., "matrix-postgres")
|
||||
DB_NAME= # Database to back up (e.g., "synapse")
|
||||
DB_USER= # Database user (e.g., "synapse")
|
||||
BACKUP_DIR= # Host directory for backups (e.g., "/opt/matrix/backups")
|
||||
RETENTION_DAYS=14 # Days to keep backups
|
||||
CRON_SCHEDULE="0 3 * * *" # Daily at 3AM
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Create Backup Directory
|
||||
|
||||
```bash
|
||||
mkdir -p ${BACKUP_DIR}
|
||||
chmod 700 ${BACKUP_DIR}
|
||||
mkdir -p $(dirname ${BACKUP_DIR})/scripts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Create Backup Script
|
||||
|
||||
Create `$(dirname ${BACKUP_DIR})/scripts/pg_backup.sh`:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# --- Configuration (edit per service) ---
|
||||
CONTAINER_NAME="${CONTAINER_NAME}"
|
||||
DB_NAME="${DB_NAME}"
|
||||
DB_USER="${DB_USER}"
|
||||
BACKUP_DIR="${BACKUP_DIR}"
|
||||
RETENTION_DAYS=${RETENTION_DAYS}
|
||||
|
||||
# --- Derived ---
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
BACKUP_FILE="${BACKUP_DIR}/${DB_NAME}_${TIMESTAMP}.sql.gz"
|
||||
LOG_FILE="${BACKUP_DIR}/backup.log"
|
||||
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "${LOG_FILE}"
|
||||
}
|
||||
|
||||
# --- Backup ---
|
||||
log "Starting backup of ${DB_NAME} from ${CONTAINER_NAME}"
|
||||
|
||||
docker exec ${CONTAINER_NAME} pg_dump \
|
||||
-U ${DB_USER} \
|
||||
-d ${DB_NAME} \
|
||||
--format=plain \
|
||||
--no-owner \
|
||||
--no-privileges \
|
||||
| gzip > "${BACKUP_FILE}"
|
||||
|
||||
if [ $? -eq 0 ] && [ -s "${BACKUP_FILE}" ]; then
|
||||
SIZE=$(du -h "${BACKUP_FILE}" | cut -f1)
|
||||
log "Backup successful: ${BACKUP_FILE} (${SIZE})"
|
||||
else
|
||||
log "ERROR: Backup failed or produced empty file"
|
||||
rm -f "${BACKUP_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Retention ---
|
||||
DELETED=$(find ${BACKUP_DIR} -name "${DB_NAME}_*.sql.gz" -mtime +${RETENTION_DAYS} -print -delete | wc -l)
|
||||
log "Retention cleanup: removed ${DELETED} backups older than ${RETENTION_DAYS} days"
|
||||
|
||||
# --- Integrity ---
|
||||
if gzip -t "${BACKUP_FILE}" 2>/dev/null; then
|
||||
log "Integrity check: PASS"
|
||||
else
|
||||
log "ERROR: Integrity check FAILED — backup is corrupt"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Backup complete."
|
||||
```
|
||||
|
||||
Make executable:
|
||||
|
||||
```bash
|
||||
chmod +x $(dirname ${BACKUP_DIR})/scripts/pg_backup.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Test Manually
|
||||
|
||||
```bash
|
||||
$(dirname ${BACKUP_DIR})/scripts/pg_backup.sh
|
||||
ls -lh ${BACKUP_DIR}/*.sql.gz
|
||||
cat ${BACKUP_DIR}/backup.log
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Schedule via Cron
|
||||
|
||||
```bash
|
||||
(crontab -l 2>/dev/null; echo "${CRON_SCHEDULE} $(dirname ${BACKUP_DIR})/scripts/pg_backup.sh >> ${BACKUP_DIR}/cron.log 2>&1") | crontab -
|
||||
crontab -l | grep pg_backup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Test Restore (Non-Destructive)
|
||||
|
||||
```bash
|
||||
# Create throwaway test database
|
||||
docker exec ${CONTAINER_NAME} psql -U ${DB_USER} -c "CREATE DATABASE ${DB_NAME}_restore_test;"
|
||||
|
||||
# Restore latest backup into it
|
||||
LATEST=$(ls -t ${BACKUP_DIR}/${DB_NAME}_*.sql.gz | head -1)
|
||||
gunzip -c "${LATEST}" | docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME}_restore_test
|
||||
|
||||
# Spot-check (adjust table names per service)
|
||||
docker exec ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME}_restore_test -c "\dt" | head -20
|
||||
|
||||
# Cleanup
|
||||
docker exec ${CONTAINER_NAME} psql -U ${DB_USER} -c "DROP DATABASE ${DB_NAME}_restore_test;"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Emergency Restore
|
||||
|
||||
```bash
|
||||
# 1. Stop the application container (not postgres)
|
||||
docker stop <app-container>
|
||||
|
||||
# 2. Drop and recreate
|
||||
docker exec ${CONTAINER_NAME} psql -U ${DB_USER} -c "DROP DATABASE ${DB_NAME};"
|
||||
docker exec ${CONTAINER_NAME} psql -U ${DB_USER} -c "CREATE DATABASE ${DB_NAME} OWNER ${DB_USER};"
|
||||
|
||||
# 3. Restore
|
||||
LATEST=$(ls -t ${BACKUP_DIR}/${DB_NAME}_*.sql.gz | head -1)
|
||||
gunzip -c "${LATEST}" | docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME}
|
||||
|
||||
# 4. Restart app
|
||||
docker start <app-container>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Monitoring Hook (Optional)
|
||||
|
||||
Add to your monitoring stack:
|
||||
|
||||
```bash
|
||||
LATEST_AGE=$(( $(date +%s) - $(stat -c %Y $(ls -t ${BACKUP_DIR}/${DB_NAME}_*.sql.gz | head -1)) ))
|
||||
if [ ${LATEST_AGE} -gt 90000 ]; then
|
||||
echo "WARNING: Latest ${DB_NAME} backup is more than 25 hours old"
|
||||
fi
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Checklist
|
||||
|
||||
```
|
||||
□ Manual backup produces valid .sql.gz
|
||||
□ Backup log shows success
|
||||
□ Gzip integrity check passes
|
||||
□ Cron job installed
|
||||
□ Test restore succeeds
|
||||
□ Test database cleaned up
|
||||
```
|
||||
Loading…
Add table
Add a link
Reference in a new issue