fix(state): index tool_calls and tool_name in FTS5 for session_search

The FTS5 virtual tables (messages_fts, messages_fts_trigram) previously
only indexed the content column via external content mode. Tool calls
and tool names stored in the tool_calls (JSON) and tool_name columns
were invisible to FTS5 search.

Root cause: FTS5 triggers only INSERTed new.content into the index.

Changes:
- Switch FTS5 tables from external content (content=messages) to inline
  mode so that trigger-inserted content is both indexed and stored
- Update all 6 FTS5 triggers to concatenate content, tool_name, and
  tool_calls when indexing new messages
- Extend the short-CJK LIKE fallback to also search tool_name and
  tool_calls columns

Closes: #16751
This commit is contained in:
Bakey Dev.
2026-04-28 15:01:55 +08:00
committed by Teknium
parent 7d884f81c4
commit cfcad80ee1

View File

@ -102,13 +102,14 @@ CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestam
FTS_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
content,
content=messages,
content_rowid=id
content
);
CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
INSERT INTO messages_fts(rowid, content) VALUES (
new.id,
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN
@ -117,7 +118,10 @@ END;
CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
INSERT INTO messages_fts(rowid, content) VALUES (
new.id,
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
);
END;
"""
@ -128,13 +132,14 @@ END;
FTS_TRIGRAM_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
content,
content=messages,
content_rowid=id,
tokenize='trigram'
);
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
INSERT INTO messages_fts_trigram(rowid, content) VALUES (
new.id,
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
@ -143,7 +148,10 @@ END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
INSERT INTO messages_fts_trigram(rowid, content) VALUES (
new.id,
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
);
END;
"""
@ -1534,8 +1542,8 @@ class SessionDB:
# Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
# Fall back to LIKE substring search.
escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
like_where = ["m.content LIKE ? ESCAPE '\\'"]
like_params: list = [f"%{escaped}%"]
like_where = ["(m.content LIKE ? ESCAPE '\\' OR m.tool_name LIKE ? ESCAPE '\\' OR m.tool_calls LIKE ? ESCAPE '\\')"]
like_params: list = [f"%{escaped}%", f"%{escaped}%", f"%{escaped}%"]
if source_filter is not None:
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
like_params.extend(source_filter)