perf(desktop): make session-id search SQL-bounded, not O(n)

search_sessions_by_id previously fetched up to 10k sessions via list_sessions_rich and filtered them in Python — O(n) per keystroke. Push the id match into SQL instead. - list_sessions_rich gains an optional id_query param: a case-insensitive LIKE pushed into the outer WHERE, matched against each surfaced row's id AND every id in its forward compression chain (via the existing chain CTE). Searching a compression root id or a tip id both resolve to the same projected conversation. LIKE wildcards in the needle are escaped. - search_sessions_by_id now fetches only matching rows (limit*4) and ranks exact > prefix > substring in Python over that small set. - web_server /api/sessions/search: route ID matches and content matches through one lineage-keyed dedup helper so an id-hit and a content-hit on the same conversation collapse to a single result (the contributor's version keyed ID hits by raw sid and content hits by root, which could double-list a compression tip). - command-center haystack also matches _lineage_root_id for parity. E2E verified against a real DB: exact match over 3000+ sessions materializes 1 row in Python (was ~3000), 5ms; root-id resolves to tip; LIKE-wildcard escaping holds. Follow-up to @0xharryriddle's feat(desktop): search sessions by id.
2026-06-04 06:05:22 -07:00
parent 9ecc331be8
commit 580d924097
4 changed files with 133 additions and 73 deletions
--- a/apps/desktop/src/app/command-center/index.tsx
+++ b/apps/desktop/src/app/command-center/index.tsx
@ -156,7 +156,7 @@ export function CommandCenterView({ initialSection, onClose, onDeleteSession, on
    }

    return sorted.filter(session => {
-      const haystack = `${sessionTitle(session)} ${session.id}`.toLowerCase()
+      const haystack = `${sessionTitle(session)} ${session.id} ${session._lineage_root_id ?? ''}`.toLowerCase()

      return haystack.includes(needle)
    })
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -1628,46 +1628,6 @@ async def search_sessions(q: str = "", limit: int = 20):
        db = SessionDB()
        try:
            safe_limit = max(1, min(int(limit or 20), 100))
-            seen: dict = {}
-
-            def add_result(sid: str, payload: dict) -> None:
-                if sid and sid not in seen and len(seen) < safe_limit:
-                    seen[sid] = payload
-
-            # Direct ID matches first: users often paste a session id from CLI,
-            # logs, or another Hermes surface. FTS can't find those unless the
-            # id happens to appear in message text.
-            for row in db.search_sessions_by_id(q, limit=safe_limit, include_archived=True):
-                sid = row.get("id")
-                preview = (row.get("preview") or "").strip()
-                snippet = preview or f"Session ID: {sid}"
-                add_result(
-                    sid,
-                    {
-                        "session_id": sid,
-                        "snippet": snippet,
-                        "role": None,
-                        "source": row.get("source"),
-                        "model": row.get("model"),
-                        "session_started": row.get("started_at"),
-                    },
-                )
-
-            # Auto-add prefix wildcards so partial words match
-            # e.g. "nimb" → "nimb*" matches "nimby"
-            # Preserve quoted phrases and existing wildcards as-is
-            import re
-            terms = []
-            for token in re.findall(r'"[^"]*"|\S+', q.strip()):
-                if token.startswith('"') or token.endswith("*"):
-                    terms.append(token)
-                else:
-                    terms.append(token + "*")
-            prefix_query = " ".join(terms)
-            # Over-fetch so lineage dedup can still surface `limit` distinct
-            # conversations even when several hits collapse onto one root.
-            fetch_limit = max(safe_limit * 5, 50)
-            matches = db.search_messages(query=prefix_query, limit=fetch_limit)

            # Walk parent_session_id to the compression root, memoized so a
            # chain of compression segments only costs one walk. We deliberately
@ -1739,25 +1699,71 @@ async def search_sessions(q: str = "", limit: int = 20):
                tip_cache[root_id] = tip
                return tip

-            # Keep the best (first / most relevant) hit per compression root.
-            # `seen` already holds the direct ID matches collected above; the
-            # content matches extend it without clobbering them.
-            for m in matches:
-                raw_sid = m["session_id"]
+            # Both ID matches and content matches share one keyspace, keyed by
+            # compression lineage root, so an id-hit and a content-hit on the
+            # same logical conversation collapse to a single result. The first
+            # hit for a lineage wins; ID matches run first and take priority.
+            seen: dict = {}
+
+            def add_lineage_result(raw_sid: str, payload: dict) -> None:
+                if not raw_sid:
+                    return
                root = compression_root(raw_sid)
-                if root in seen:
-                    continue
+                if root in seen or len(seen) >= safe_limit:
+                    return
+                payload = dict(payload)
+                payload["session_id"] = lineage_tip(root)
+                payload["lineage_root"] = root
+                seen[root] = payload
+
+            # Direct ID matches first: users often paste a session id from CLI,
+            # logs, or another Hermes surface. FTS can't find those unless the
+            # id happens to appear in message text. search_sessions_by_id is
+            # SQL-bounded, so this stays cheap even with thousands of sessions.
+            for row in db.search_sessions_by_id(q, limit=safe_limit, include_archived=True):
+                sid = row.get("id")
+                preview = (row.get("preview") or "").strip()
+                snippet = preview or f"Session ID: {sid}"
+                add_lineage_result(
+                    sid,
+                    {
+                        "snippet": snippet,
+                        "role": None,
+                        "source": row.get("source"),
+                        "model": row.get("model"),
+                        "session_started": row.get("started_at"),
+                    },
+                )
+
+            # Auto-add prefix wildcards so partial words match
+            # e.g. "nimb" → "nimb*" matches "nimby"
+            # Preserve quoted phrases and existing wildcards as-is
+            import re
+            terms = []
+            for token in re.findall(r'"[^"]*"|\S+', q.strip()):
+                if token.startswith('"') or token.endswith("*"):
+                    terms.append(token)
+                else:
+                    terms.append(token + "*")
+            prefix_query = " ".join(terms)
+            # Over-fetch so lineage dedup can still surface `limit` distinct
+            # conversations even when several hits collapse onto one root.
+            fetch_limit = max(safe_limit * 5, 50)
+            matches = db.search_messages(query=prefix_query, limit=fetch_limit)
+
+            for m in matches:
                if len(seen) >= safe_limit:
                    break
-                seen[root] = {
-                    "session_id": lineage_tip(root),
-                    "lineage_root": root,
-                    "snippet": m.get("snippet", ""),
-                    "role": m.get("role"),
-                    "source": m.get("source"),
-                    "model": m.get("model"),
-                    "session_started": m.get("session_started"),
-                }
+                add_lineage_result(
+                    m["session_id"],
+                    {
+                        "snippet": m.get("snippet", ""),
+                        "role": m.get("role"),
+                        "source": m.get("source"),
+                        "model": m.get("model"),
+                        "session_started": m.get("session_started"),
+                    },
+                )
            return {"results": list(seen.values())}
        finally:
            db.close()
--- a/hermes_state.py
+++ b/hermes_state.py
@ -1565,6 +1565,7 @@ class SessionDB:
        order_by_last_active: bool = False,
        include_archived: bool = False,
        archived_only: bool = False,
+        id_query: str = None,
    ) -> List[Dict[str, Any]]:
        """List sessions with preview (first user message) and last active timestamp.

@ -1626,6 +1627,16 @@ class SessionDB:
            where_clauses.append("s.archived = 0")

        where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
+
+        # Optional session-id filter, pushed into SQL so callers (Desktop
+        # session-id search) don't have to fetch every row and filter in
+        # Python. ``id_query`` is matched as a case-insensitive substring
+        # against each surfaced row's id AND every id in its forward
+        # compression chain — so searching a compression *root* id or a *tip*
+        # id both resolve to the same projected conversation. Only used in the
+        # order_by_last_active path (which builds the chain CTE); other callers
+        # pass id_query=None.
+        id_needle = (id_query or "").strip().lower()
        if order_by_last_active:
            # Compute effective_last_active by walking each surfaced session's
            # compression-continuation chain forward in SQL and taking the MAX
@ -1638,6 +1649,28 @@ class SessionDB:
            # compression-continuation edges using the same criteria as
            # get_compression_tip (parent.end_reason='compression' AND
            # child.started_at >= parent.ended_at).
+            outer_where = where_sql
+            id_params: List[Any] = []
+            if id_needle:
+                # Admit a surfaced row if its own id or any id in its forward
+                # compression chain matches the needle. LIKE with a leading
+                # wildcard can't use an index, but the chain membership and
+                # the small result set keep this bounded — far cheaper than
+                # fetching every session and scanning in Python.
+                id_clause = (
+                    "EXISTS (SELECT 1 FROM chain cq"
+                    "        WHERE cq.root_id = s.id"
+                    "          AND LOWER(cq.cur_id) LIKE ? ESCAPE '\\')"
+                )
+                like_pattern = (
+                    "%"
+                    + id_needle.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+                    + "%"
+                )
+                id_params = [like_pattern]
+                outer_where = (
+                    f"{where_sql} AND {id_clause}" if where_sql else f"WHERE {id_clause}"
+                )
            query = f"""
                WITH RECURSIVE chain(root_id, cur_id) AS (
                    SELECT s.id, s.id FROM sessions s {where_sql}
@ -1674,12 +1707,13 @@ class SessionDB:
                    COALESCE(cm.effective_last_active, s.started_at) AS _effective_last_active
                FROM sessions s
                LEFT JOIN chain_max cm ON cm.root_id = s.id
-                {where_sql}
+                {outer_where}
                ORDER BY _effective_last_active DESC, s.started_at DESC, s.id DESC
                LIMIT ? OFFSET ?
            """
-            # WHERE params apply twice (CTE seed + outer select).
-            params = params + params + [limit, offset]
+            # WHERE params apply twice (CTE seed + outer select); the id filter
+            # only applies to the outer select.
+            params = params + params + id_params + [limit, offset]
        else:
            query = f"""
                SELECT s.*,
@ -3025,12 +3059,18 @@ class SessionDB:
        if not needle or limit <= 0:
            return []

-        scan_limit = max(limit, 10_000)
-        sessions = self.list_sessions_rich(
-            limit=scan_limit,
+        # SQL-bounded: list_sessions_rich pushes the id LIKE filter into the
+        # query (matching the row's own id AND any id in its forward
+        # compression chain), so we only materialize matching rows instead of
+        # scanning every session. Fetch a small multiple of `limit` so the
+        # in-Python exact/prefix/substring ranking below has enough candidates
+        # to order, then truncate.
+        candidates = self.list_sessions_rich(
+            limit=max(limit * 4, limit),
            offset=0,
            include_archived=include_archived,
            order_by_last_active=True,
+            id_query=needle,
        )

        def score(row: Dict[str, Any]) -> int:
@ -3042,14 +3082,11 @@ class SessionDB:
                return 1
            return 2

-        matches = [
-            (score(row), idx, row)
-            for idx, row in enumerate(sessions)
-            if needle in str(row.get("id") or "").lower()
-            or needle in str(row.get("_lineage_root_id") or "").lower()
-        ]
-        matches.sort(key=lambda item: (item[0], item[1]))
-        return [row for _, _, row in matches[:limit]]
+        ranked = sorted(
+            enumerate(candidates),
+            key=lambda item: (score(item[1]), item[0]),
+        )
+        return [row for _, row in ranked[:limit]]

    def search_sessions(
        self,
--- a/tests/hermes_cli/test_web_server_session_search.py
+++ b/tests/hermes_cli/test_web_server_session_search.py
@ -4,11 +4,18 @@ from hermes_cli import web_server


 class _FakeSessionDB:
+    """Fake backing the /api/sessions/search endpoint.
+
+    The endpoint surfaces direct session-id matches first, then FTS message
+    matches, deduping both by compression lineage root. This fake has no
+    compression chains (get_session returns no parent), so each session is its
+    own lineage root.
+    """
+
    closed = False

    def search_sessions_by_id(self, query, limit=20, include_archived=True):
        assert query == "20260603"
-        assert limit == 2
        assert include_archived is True
        return [
            {
@ -22,7 +29,6 @@ class _FakeSessionDB:

    def search_messages(self, query, limit=20):
        assert query == "20260603*"
-        assert limit == 2
        return [
            {
                "session_id": "20260603_090200_exact",
@ -42,6 +48,13 @@ class _FakeSessionDB:
            },
        ]

+    def get_session(self, session_id):
+        # No compression chains in this fixture — every session is its own root.
+        return {"id": session_id, "parent_session_id": None}
+
+    def get_compression_tip(self, session_id):
+        return session_id
+
    def close(self):
        self.closed = True

@ -51,10 +64,13 @@ def test_desktop_session_search_merges_id_matches_before_content_matches(monkeyp

    response = asyncio.run(web_server.search_sessions(q="20260603", limit=2))

+    # ID match surfaces first; the content hit on the SAME session is deduped
+    # by lineage root (not double-listed); the unrelated content hit follows.
    assert response == {
        "results": [
            {
                "session_id": "20260603_090200_exact",
+                "lineage_root": "20260603_090200_exact",
                "snippet": "ID match preview",
                "role": None,
                "source": "cli",
@ -63,6 +79,7 @@ def test_desktop_session_search_merges_id_matches_before_content_matches(monkeyp
            },
            {
                "session_id": "content_session",
+                "lineage_root": "content_session",
                "snippet": "content hit",
                "role": "assistant",
                "source": "desktop",