perf(desktop): make session-id search SQL-bounded, not O(n)
search_sessions_by_id previously fetched up to 10k sessions via list_sessions_rich and filtered them in Python — O(n) per keystroke. Push the id match into SQL instead. - list_sessions_rich gains an optional id_query param: a case-insensitive LIKE pushed into the outer WHERE, matched against each surfaced row's id AND every id in its forward compression chain (via the existing chain CTE). Searching a compression root id or a tip id both resolve to the same projected conversation. LIKE wildcards in the needle are escaped. - search_sessions_by_id now fetches only matching rows (limit*4) and ranks exact > prefix > substring in Python over that small set. - web_server /api/sessions/search: route ID matches and content matches through one lineage-keyed dedup helper so an id-hit and a content-hit on the same conversation collapse to a single result (the contributor's version keyed ID hits by raw sid and content hits by root, which could double-list a compression tip). - command-center haystack also matches _lineage_root_id for parity. E2E verified against a real DB: exact match over 3000+ sessions materializes 1 row in Python (was ~3000), 5ms; root-id resolves to tip; LIKE-wildcard escaping holds. Follow-up to @0xharryriddle's feat(desktop): search sessions by id.
This commit is contained in:
@ -1628,46 +1628,6 @@ async def search_sessions(q: str = "", limit: int = 20):
|
||||
db = SessionDB()
|
||||
try:
|
||||
safe_limit = max(1, min(int(limit or 20), 100))
|
||||
seen: dict = {}
|
||||
|
||||
def add_result(sid: str, payload: dict) -> None:
|
||||
if sid and sid not in seen and len(seen) < safe_limit:
|
||||
seen[sid] = payload
|
||||
|
||||
# Direct ID matches first: users often paste a session id from CLI,
|
||||
# logs, or another Hermes surface. FTS can't find those unless the
|
||||
# id happens to appear in message text.
|
||||
for row in db.search_sessions_by_id(q, limit=safe_limit, include_archived=True):
|
||||
sid = row.get("id")
|
||||
preview = (row.get("preview") or "").strip()
|
||||
snippet = preview or f"Session ID: {sid}"
|
||||
add_result(
|
||||
sid,
|
||||
{
|
||||
"session_id": sid,
|
||||
"snippet": snippet,
|
||||
"role": None,
|
||||
"source": row.get("source"),
|
||||
"model": row.get("model"),
|
||||
"session_started": row.get("started_at"),
|
||||
},
|
||||
)
|
||||
|
||||
# Auto-add prefix wildcards so partial words match
|
||||
# e.g. "nimb" → "nimb*" matches "nimby"
|
||||
# Preserve quoted phrases and existing wildcards as-is
|
||||
import re
|
||||
terms = []
|
||||
for token in re.findall(r'"[^"]*"|\S+', q.strip()):
|
||||
if token.startswith('"') or token.endswith("*"):
|
||||
terms.append(token)
|
||||
else:
|
||||
terms.append(token + "*")
|
||||
prefix_query = " ".join(terms)
|
||||
# Over-fetch so lineage dedup can still surface `limit` distinct
|
||||
# conversations even when several hits collapse onto one root.
|
||||
fetch_limit = max(safe_limit * 5, 50)
|
||||
matches = db.search_messages(query=prefix_query, limit=fetch_limit)
|
||||
|
||||
# Walk parent_session_id to the compression root, memoized so a
|
||||
# chain of compression segments only costs one walk. We deliberately
|
||||
@ -1739,25 +1699,71 @@ async def search_sessions(q: str = "", limit: int = 20):
|
||||
tip_cache[root_id] = tip
|
||||
return tip
|
||||
|
||||
# Keep the best (first / most relevant) hit per compression root.
|
||||
# `seen` already holds the direct ID matches collected above; the
|
||||
# content matches extend it without clobbering them.
|
||||
for m in matches:
|
||||
raw_sid = m["session_id"]
|
||||
# Both ID matches and content matches share one keyspace, keyed by
|
||||
# compression lineage root, so an id-hit and a content-hit on the
|
||||
# same logical conversation collapse to a single result. The first
|
||||
# hit for a lineage wins; ID matches run first and take priority.
|
||||
seen: dict = {}
|
||||
|
||||
def add_lineage_result(raw_sid: str, payload: dict) -> None:
|
||||
if not raw_sid:
|
||||
return
|
||||
root = compression_root(raw_sid)
|
||||
if root in seen:
|
||||
continue
|
||||
if root in seen or len(seen) >= safe_limit:
|
||||
return
|
||||
payload = dict(payload)
|
||||
payload["session_id"] = lineage_tip(root)
|
||||
payload["lineage_root"] = root
|
||||
seen[root] = payload
|
||||
|
||||
# Direct ID matches first: users often paste a session id from CLI,
|
||||
# logs, or another Hermes surface. FTS can't find those unless the
|
||||
# id happens to appear in message text. search_sessions_by_id is
|
||||
# SQL-bounded, so this stays cheap even with thousands of sessions.
|
||||
for row in db.search_sessions_by_id(q, limit=safe_limit, include_archived=True):
|
||||
sid = row.get("id")
|
||||
preview = (row.get("preview") or "").strip()
|
||||
snippet = preview or f"Session ID: {sid}"
|
||||
add_lineage_result(
|
||||
sid,
|
||||
{
|
||||
"snippet": snippet,
|
||||
"role": None,
|
||||
"source": row.get("source"),
|
||||
"model": row.get("model"),
|
||||
"session_started": row.get("started_at"),
|
||||
},
|
||||
)
|
||||
|
||||
# Auto-add prefix wildcards so partial words match
|
||||
# e.g. "nimb" → "nimb*" matches "nimby"
|
||||
# Preserve quoted phrases and existing wildcards as-is
|
||||
import re
|
||||
terms = []
|
||||
for token in re.findall(r'"[^"]*"|\S+', q.strip()):
|
||||
if token.startswith('"') or token.endswith("*"):
|
||||
terms.append(token)
|
||||
else:
|
||||
terms.append(token + "*")
|
||||
prefix_query = " ".join(terms)
|
||||
# Over-fetch so lineage dedup can still surface `limit` distinct
|
||||
# conversations even when several hits collapse onto one root.
|
||||
fetch_limit = max(safe_limit * 5, 50)
|
||||
matches = db.search_messages(query=prefix_query, limit=fetch_limit)
|
||||
|
||||
for m in matches:
|
||||
if len(seen) >= safe_limit:
|
||||
break
|
||||
seen[root] = {
|
||||
"session_id": lineage_tip(root),
|
||||
"lineage_root": root,
|
||||
"snippet": m.get("snippet", ""),
|
||||
"role": m.get("role"),
|
||||
"source": m.get("source"),
|
||||
"model": m.get("model"),
|
||||
"session_started": m.get("session_started"),
|
||||
}
|
||||
add_lineage_result(
|
||||
m["session_id"],
|
||||
{
|
||||
"snippet": m.get("snippet", ""),
|
||||
"role": m.get("role"),
|
||||
"source": m.get("source"),
|
||||
"model": m.get("model"),
|
||||
"session_started": m.get("session_started"),
|
||||
},
|
||||
)
|
||||
return {"results": list(seen.values())}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user