From 46b2afc56b79b9dac1d99e2f6324574a56df5f34 Mon Sep 17 00:00:00 2001 From: Kewe63 Date: Wed, 13 May 2026 17:35:44 +0300 Subject: [PATCH] fix(state): use TRUNCATE WAL checkpoint to prevent unbounded WAL growth PASSIVE checkpoint never shrinks the WAL file, causing state.db-wal to grow without bound. Change to TRUNCATE in _try_wal_checkpoint() and close() so the WAL is truncated regularly. Fixes #24034 --- hermes_state.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/hermes_state.py b/hermes_state.py index fef4a0d18..ca7ea5bd0 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -595,17 +595,27 @@ class SessionDB: ) def _try_wal_checkpoint(self) -> None: - """Best-effort PASSIVE WAL checkpoint. Never blocks, never raises. + """Best-effort TRUNCATE WAL checkpoint. Never raises. - Flushes committed WAL frames back into the main DB file for any - frames that no other connection currently needs. Keeps the WAL - from growing unbounded when many processes hold persistent + Flushes committed WAL frames back into the main DB file and + truncates the WAL file to zero bytes. Keeps the WAL from + growing unbounded when many processes hold persistent connections. + + PASSIVE checkpoint was previously used here, but it never + truncates the WAL file — the file stays at its high-water + mark until an explicit TRUNCATE is called (which only + happened inside the infrequent vacuum()). + + TRUNCATE may block writers briefly while checkpointing, but + _try_wal_checkpoint is called off the hot path (every 50 + writes) and already runs under ``self._lock``, so the + additional hold time is negligible. """ try: with self._lock: result = self._conn.execute( - "PRAGMA wal_checkpoint(PASSIVE)" + "PRAGMA wal_checkpoint(TRUNCATE)" ).fetchone() if result and result[1] > 0: logger.debug( @@ -618,13 +628,13 @@ class SessionDB: def close(self): """Close the database connection. - Attempts a PASSIVE WAL checkpoint first so that exiting processes - help keep the WAL file from growing unbounded. + Attempts a TRUNCATE WAL checkpoint first so that exiting processes + help shrink the WAL file. """ with self._lock: if self._conn: try: - self._conn.execute("PRAGMA wal_checkpoint(PASSIVE)") + self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") except Exception: pass self._conn.close()