Fix restart loop issue - use PID file for clean bot restart

- Use PID file to track bot process instead of pkill pattern matching
- Add 3-second debounce after restart to prevent signal storms
- Properly distinguish bot process from config tool process
- Clean up PID file on bot exit
This commit is contained in:
Matt 2025-12-15 14:01:14 -07:00
commit 0d29ad49c2

View file

@ -98,14 +98,22 @@ ttyd -W -p 7682 \
trap "kill %1 2>/dev/null; kill %2 2>/dev/null" EXIT trap "kill %1 2>/dev/null; kill %2 2>/dev/null" EXIT
# Restart watcher - monitors for restart signal from config tool # Restart watcher - monitors for restart signal from config tool
BOT_PID_FILE="/tmp/meshai_bot.pid"
( (
while true; do while true; do
if [ -f /tmp/meshai_restart ]; then if [ -f /tmp/meshai_restart ]; then
rm -f /tmp/meshai_restart rm -f /tmp/meshai_restart
echo "Restart signal received, restarting bot..." echo "Restart signal received, restarting bot..."
pkill -f "python.*meshai.*--config" --signal 0 2>/dev/null || true # Don't kill config tool # Kill bot using PID file
pkill -f "python -m meshai --config-file" || true if [ -f "$BOT_PID_FILE" ]; then
sleep 1 BOT_PID=$(cat "$BOT_PID_FILE")
if kill -0 "$BOT_PID" 2>/dev/null; then
kill "$BOT_PID" 2>/dev/null || true
echo "Sent TERM to bot (PID $BOT_PID)"
fi
fi
# Debounce - wait before checking for more signals
sleep 3
fi fi
sleep 2 sleep 2
done done
@ -114,7 +122,11 @@ trap "kill %1 2>/dev/null; kill %2 2>/dev/null" EXIT
# Start the bot in a loop - retry on failure # Start the bot in a loop - retry on failure
echo "Starting MeshAI..." echo "Starting MeshAI..."
while true; do while true; do
python -m meshai --config-file "$MESHAI_CONFIG" || true python -m meshai --config-file "$MESHAI_CONFIG" &
BOT_PID=$!
echo "$BOT_PID" > "$BOT_PID_FILE"
wait $BOT_PID || true
rm -f "$BOT_PID_FILE"
echo "Bot exited. Check config at http://localhost:7682. Retrying in 5s..." echo "Bot exited. Check config at http://localhost:7682. Retrying in 5s..."
sleep 5 sleep 5
done done