Fix WAL checkpoint: copy -wal/-shm, checkpoint before export/merge/restore

This commit is contained in:
2026-04-20 19:40:08 +09:00
parent d7cbf7833f
commit f13e73b733
2 changed files with 29 additions and 3 deletions

Binary file not shown.

32
sync.sh
View File

@@ -14,6 +14,9 @@ echo "[$(date '+%H:%M:%S')] Sync from $HOSTNAME..."
rm -f "$SYNC_DIR/state_merged.db"
# ── Step 1: Export local state.db (via temp dir to avoid lock) ────────────
# IMPORTANT: Hermes uses WAL mode. Must checkpoint WAL before copying,
# otherwise uncommitted data in -wal is lost. We also copy -wal/-shm as a
# safety net (SQLite will ignore them after checkpoint).
python3 << PYEOF
import sqlite3, os, shutil, tempfile
@@ -23,15 +26,29 @@ tmpdir = tempfile.mkdtemp(prefix='hs_exp_')
try:
tmp_db = os.path.join(tmpdir, 'db')
shutil.copy2(local_db, tmp_db)
# Checkpoint WAL so all changes are flushed to the main db file
src = sqlite3.connect(local_db)
jm = src.execute('PRAGMA journal_mode').fetchone()[0]
if jm == 'wal':
src.execute('PRAGMA wal_checkpoint(TRUNCATE)')
src.close()
# Copy main db + wal + shm as a bundle
shutil.copy2(local_db, tmp_db)
if os.path.exists(local_db + '-wal'):
shutil.copy2(local_db + '-wal', tmp_db + '-wal')
if os.path.exists(local_db + '-shm'):
shutil.copy2(local_db + '-shm', tmp_db + '-shm')
# Verify by reading counts
test = sqlite3.connect(tmp_db)
r = test.execute('SELECT COUNT(*) FROM sessions').fetchone()[0]
m = test.execute('SELECT COUNT(*) FROM messages').fetchone()[0]
test.close()
shutil.copy2(tmp_db, export_db)
print(f'Exported: {r}s/{m}m')
print(f'Exported ({jm}): {r}s/{m}m')
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
PYEOF
@@ -123,11 +140,15 @@ try:
# Copy to temp file first (avoids WAL lock issues with open connections)
tmp_copy = os.path.join(tmpdir, name)
shutil.copy2(db_file, tmp_copy)
# Also copy WAL if exists
if os.path.exists(db_file + '-wal'):
shutil.copy2(db_file + '-wal', tmp_copy + '-wal')
if os.path.exists(db_file + '-shm'):
shutil.copy2(db_file + '-shm', tmp_copy + '-shm')
src = sqlite3.connect(tmp_copy)
# Checkpoint any WAL before reading (ensures consistent state)
if src.execute('PRAGMA journal_mode').fetchone()[0] == 'wal':
src.execute('PRAGMA wal_checkpoint(TRUNCATE)')
s_cnt = src.execute('SELECT COUNT(*) FROM sessions').fetchone()[0]
m_cnt = src.execute('SELECT COUNT(*) FROM messages').fetchone()[0]
@@ -147,6 +168,8 @@ try:
os.remove(tmp_copy)
if os.path.exists(tmp_copy + '-wal'):
os.remove(tmp_copy + '-wal')
if os.path.exists(tmp_copy + '-shm'):
os.remove(tmp_copy + '-shm')
conn.commit()
conn.close()
@@ -192,6 +215,9 @@ else:
shutil.copy2(merged_path, tmp_db)
test = sqlite3.connect(tmp_db)
# Ensure merged db is checkpointed (it uses DELETE mode, but be safe)
if test.execute('PRAGMA journal_mode').fetchone()[0] == 'wal':
test.execute('PRAGMA wal_checkpoint(TRUNCATE)')
r = test.execute('SELECT COUNT(*) FROM sessions').fetchone()[0]
m = test.execute('SELECT COUNT(*) FROM messages').fetchone()[0]
test.close()