feat(cli): ranked fuzzy search in the curses model picker

Wires the salvaged search helpers into the shared curses menu driver and turns on type-to-filter for the CLI model pickers (the 100+ model lists that previously required scrolling). - Search lives in the shared `_run_curses_menu` driver behind a `searchable` flag + `search_labels`, so both `curses_radiolist` and `curses_single_select` get it without per-menu duplication. `/` opens the filter, BACKSPACE edits, Ctrl+U clears, ESC clears the filter then cancels. Returned values are always original item indices. - `_filter_indices` RANKS matches (best-first) via a Python port of the TS scorer in ui-tui/src/lib/fuzzy.ts and web/src/lib/fuzzy.ts. The port is byte-identical in score: same per-char bonuses, prefix (+8) and exact (+20) bonuses, camelCase/word-boundary detection (matching on the lowercased target, boundary on the original case), and the -len*0.01 length tiebreak — so the CLI, TUI, and WebUI rank results identically. A cross-language parity test pins the exact scores. - `_prompt_model_selection` (the canonical picker across the model flows) and the custom-provider model list pass `searchable=True`. - Split `_decode_menu_key` out of `read_menu_key` so the search loop can peek the raw key (catch `/`) before nav decoding. - ESC during active search now clears the query (restores the full list) so a no-match filter can't strand the user; printable-key capture is restricted to ASCII to avoid Latin-1 mojibake. - Update two setup-menu tests whose mock signatures predate the new `searchable` kwarg; add ranked-scorer + parity + state-machine tests.
2026-06-01 23:18:25 +05:30
parent 53f598e7a2
commit 0fdab53ef0
5 changed files with 375 additions and 33 deletions
--- a/tests/hermes_cli/test_curses_ui_fuzzy_rank.py
+++ b/tests/hermes_cli/test_curses_ui_fuzzy_rank.py
@ -0,0 +1,127 @@
+"""Tests for the ranked fuzzy scorer used by the searchable curses pickers."""
+from hermes_cli.curses_ui import (
+    _SearchState,
+    _filter_indices,
+    _fuzzy_score,
+    _handle_active_search_key,
+    _is_boundary,
+    _token_score,
+)
+
+
+class _FakeCurses:
+    KEY_BACKSPACE = 263
+    KEY_DOWN = 258
+    KEY_ENTER = 343
+
+
+def test_fuzzy_score_matches_subsequence():
+    assert _fuzzy_score("gpt-4o", "g4o") is not None
+    assert _fuzzy_score("gpt-4o", "4o") is not None
+    assert _fuzzy_score("gpt-4o", "o4g") is None
+    assert _fuzzy_score("gpt-4o", "xyz") is None
+
+
+def test_scorer_matches_typescript_reference():
+    """Score parity with ui-tui/web fuzzy.ts. These exact values are produced
+    by the TS fuzzyScoreMulti for the same inputs (verified via a cross-language
+    harness); keep the Python port byte-identical so all three surfaces rank
+    consistently. If you change the scoring constants, update the TS copies too.
+    """
+    cases = {
+        ("gpt-4o", "g4o"): 15.94,
+        ("gpt-4o", "gpt"): 28.94,
+        ("claude-sonnet-4", "sonnet"): 33.85,
+        ("claude-sonnet-4", "clad snnt"): 30.70,
+        ("GptO", "gpto"): 57.96,  # camelCase boundary on the original-case 'O'
+    }
+    for (label, query), expected in cases.items():
+        score = _fuzzy_score(label, query)
+        assert score is not None
+        assert round(score, 2) == expected, f"{label!r}/{query!r}: {score} != {expected}"
+
+
+def test_is_boundary_camelcase_and_separators():
+    assert _is_boundary("gpt-4o", 0) is True       # start
+    assert _is_boundary("gpt-4o", 4) is True        # after '-'
+    assert _is_boundary("gpt-4o", 2) is False       # mid-word
+    assert _is_boundary("GptO", 3) is True          # lower->upper transition
+
+
+def test_token_score_takes_orig_and_lower():
+    # Exact match (lower == token) earns the +20 bonus over a prefix.
+    exact = _token_score("sonnet", "sonnet", "sonnet")
+    prefix = _token_score("sonnet-x", "sonnet-x", "sonnet")
+    assert exact is not None and prefix is not None
+    assert exact > prefix
+
+
+def test_esc_clears_query_and_signals_changed():
+    # Esc during active search clears the filter (restores full list) and
+    # signals `changed` so the driver resets scroll/cursor.
+    search = _SearchState(active=True, query="gpt")
+    handled, confirm, changed = _handle_active_search_key(_FakeCurses, 27, search)
+    assert (handled, confirm, changed) == (True, False, True)
+    assert search.active is False
+    assert search.query == ""
+
+    # Esc with no query: still stops search, but nothing changed.
+    search2 = _SearchState(active=True, query="")
+    assert _handle_active_search_key(_FakeCurses, 27, search2) == (True, False, False)
+
+
+def test_high_byte_keys_ignored():
+    # Bytes 128-255 must NOT append Latin-1 mojibake to the query.
+    search = _SearchState(active=True, query="ab")
+    handled, _, changed = _handle_active_search_key(_FakeCurses, 200, search)
+    assert (handled, changed) == (False, False)
+    assert search.query == "ab"
+
+
+def test_fuzzy_score_empty_query_is_zero():
+    assert _fuzzy_score("anything", "") == 0
+    assert _fuzzy_score("anything", "   ") == 0
+
+
+def test_fuzzy_score_prefix_beats_scattered():
+    prefix = _fuzzy_score("gpt-4o-mini", "gpt")
+    scattered = _fuzzy_score("a-g-p-t", "gpt")
+    assert prefix is not None and scattered is not None
+    assert prefix > scattered
+
+
+def test_fuzzy_score_exact_and_shorter_rank_higher():
+    exact = _fuzzy_score("sonnet", "sonnet")
+    longer = _fuzzy_score("sonnet-extended", "sonnet")
+    assert exact is not None and longer is not None
+    # Same prefix match, but the shorter id wins on the length tiebreak.
+    assert exact > longer
+
+
+def test_filter_indices_ranks_best_first():
+    models = ["gpt-4o", "gpt-4o-mini", "claude-sonnet-4", "claude-haiku", "o1-preview"]
+
+    # g4o matches both gpt-4o variants; the shorter exact-ish one ranks first.
+    ranked = _filter_indices(models, "g4o")
+    assert [models[i] for i in ranked] == ["gpt-4o", "gpt-4o-mini"]
+
+    # son4 surfaces the sonnet model.
+    assert [models[i] for i in _filter_indices(models, "son4")] == ["claude-sonnet-4"]
+
+    # Multi-token AND.
+    assert [models[i] for i in _filter_indices(models, "clad snnt")] == ["claude-sonnet-4"]
+
+    # No match drops everything.
+    assert _filter_indices(models, "zzz") == []
+
+
+def test_filter_indices_blank_query_preserves_order():
+    models = ["b", "a", "c"]
+    assert _filter_indices(models, "") == [0, 1, 2]
+    assert _filter_indices(models, "   ") == [0, 1, 2]
+
+
+def test_filter_indices_stable_for_equal_scores():
+    # Identical labels score identically; original order is the tiebreak.
+    items = ["ab", "ab", "ab"]
+    assert _filter_indices(items, "ab") == [0, 1, 2]
--- a/tests/hermes_cli/test_setup_menu_curses_migration.py
+++ b/tests/hermes_cli/test_setup_menu_curses_migration.py
@ -13,7 +13,7 @@ def test_prompt_model_selection_uses_curses_radiolist():

    seen = {}

-    def _fake(title, items, *, selected=0, cancel_returns=None, description=None):
+    def _fake(title, items, *, selected=0, cancel_returns=None, description=None, searchable=False):
        seen["title"] = title
        seen["items"] = items
        return 1  # pick second model
@ -67,7 +67,7 @@ def test_model_selection_with_pricing_passes_description():

    seen = {}

-    def _fake(title, items, *, selected=0, cancel_returns=None, description=None):
+    def _fake(title, items, *, selected=0, cancel_returns=None, description=None, searchable=False):
        seen["description"] = description
        return len(items) - 1  # Skip