feat(cli): ranked fuzzy search in the curses model picker
Wires the salvaged search helpers into the shared curses menu driver and turns on type-to-filter for the CLI model pickers (the 100+ model lists that previously required scrolling). - Search lives in the shared `_run_curses_menu` driver behind a `searchable` flag + `search_labels`, so both `curses_radiolist` and `curses_single_select` get it without per-menu duplication. `/` opens the filter, BACKSPACE edits, Ctrl+U clears, ESC clears the filter then cancels. Returned values are always original item indices. - `_filter_indices` RANKS matches (best-first) via a Python port of the TS scorer in ui-tui/src/lib/fuzzy.ts and web/src/lib/fuzzy.ts. The port is byte-identical in score: same per-char bonuses, prefix (+8) and exact (+20) bonuses, camelCase/word-boundary detection (matching on the lowercased target, boundary on the original case), and the -len*0.01 length tiebreak — so the CLI, TUI, and WebUI rank results identically. A cross-language parity test pins the exact scores. - `_prompt_model_selection` (the canonical picker across the model flows) and the custom-provider model list pass `searchable=True`. - Split `_decode_menu_key` out of `read_menu_key` so the search loop can peek the raw key (catch `/`) before nav decoding. - ESC during active search now clears the query (restores the full list) so a no-match filter can't strand the user; printable-key capture is restricted to ASCII to avoid Latin-1 mojibake. - Update two setup-menu tests whose mock signatures predate the new `searchable` kwarg; add ranked-scorer + parity + state-machine tests.
This commit is contained in:
127
tests/hermes_cli/test_curses_ui_fuzzy_rank.py
Normal file
127
tests/hermes_cli/test_curses_ui_fuzzy_rank.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""Tests for the ranked fuzzy scorer used by the searchable curses pickers."""
|
||||
from hermes_cli.curses_ui import (
|
||||
_SearchState,
|
||||
_filter_indices,
|
||||
_fuzzy_score,
|
||||
_handle_active_search_key,
|
||||
_is_boundary,
|
||||
_token_score,
|
||||
)
|
||||
|
||||
|
||||
class _FakeCurses:
|
||||
KEY_BACKSPACE = 263
|
||||
KEY_DOWN = 258
|
||||
KEY_ENTER = 343
|
||||
|
||||
|
||||
def test_fuzzy_score_matches_subsequence():
|
||||
assert _fuzzy_score("gpt-4o", "g4o") is not None
|
||||
assert _fuzzy_score("gpt-4o", "4o") is not None
|
||||
assert _fuzzy_score("gpt-4o", "o4g") is None
|
||||
assert _fuzzy_score("gpt-4o", "xyz") is None
|
||||
|
||||
|
||||
def test_scorer_matches_typescript_reference():
|
||||
"""Score parity with ui-tui/web fuzzy.ts. These exact values are produced
|
||||
by the TS fuzzyScoreMulti for the same inputs (verified via a cross-language
|
||||
harness); keep the Python port byte-identical so all three surfaces rank
|
||||
consistently. If you change the scoring constants, update the TS copies too.
|
||||
"""
|
||||
cases = {
|
||||
("gpt-4o", "g4o"): 15.94,
|
||||
("gpt-4o", "gpt"): 28.94,
|
||||
("claude-sonnet-4", "sonnet"): 33.85,
|
||||
("claude-sonnet-4", "clad snnt"): 30.70,
|
||||
("GptO", "gpto"): 57.96, # camelCase boundary on the original-case 'O'
|
||||
}
|
||||
for (label, query), expected in cases.items():
|
||||
score = _fuzzy_score(label, query)
|
||||
assert score is not None
|
||||
assert round(score, 2) == expected, f"{label!r}/{query!r}: {score} != {expected}"
|
||||
|
||||
|
||||
def test_is_boundary_camelcase_and_separators():
|
||||
assert _is_boundary("gpt-4o", 0) is True # start
|
||||
assert _is_boundary("gpt-4o", 4) is True # after '-'
|
||||
assert _is_boundary("gpt-4o", 2) is False # mid-word
|
||||
assert _is_boundary("GptO", 3) is True # lower->upper transition
|
||||
|
||||
|
||||
def test_token_score_takes_orig_and_lower():
|
||||
# Exact match (lower == token) earns the +20 bonus over a prefix.
|
||||
exact = _token_score("sonnet", "sonnet", "sonnet")
|
||||
prefix = _token_score("sonnet-x", "sonnet-x", "sonnet")
|
||||
assert exact is not None and prefix is not None
|
||||
assert exact > prefix
|
||||
|
||||
|
||||
def test_esc_clears_query_and_signals_changed():
|
||||
# Esc during active search clears the filter (restores full list) and
|
||||
# signals `changed` so the driver resets scroll/cursor.
|
||||
search = _SearchState(active=True, query="gpt")
|
||||
handled, confirm, changed = _handle_active_search_key(_FakeCurses, 27, search)
|
||||
assert (handled, confirm, changed) == (True, False, True)
|
||||
assert search.active is False
|
||||
assert search.query == ""
|
||||
|
||||
# Esc with no query: still stops search, but nothing changed.
|
||||
search2 = _SearchState(active=True, query="")
|
||||
assert _handle_active_search_key(_FakeCurses, 27, search2) == (True, False, False)
|
||||
|
||||
|
||||
def test_high_byte_keys_ignored():
|
||||
# Bytes 128-255 must NOT append Latin-1 mojibake to the query.
|
||||
search = _SearchState(active=True, query="ab")
|
||||
handled, _, changed = _handle_active_search_key(_FakeCurses, 200, search)
|
||||
assert (handled, changed) == (False, False)
|
||||
assert search.query == "ab"
|
||||
|
||||
|
||||
def test_fuzzy_score_empty_query_is_zero():
|
||||
assert _fuzzy_score("anything", "") == 0
|
||||
assert _fuzzy_score("anything", " ") == 0
|
||||
|
||||
|
||||
def test_fuzzy_score_prefix_beats_scattered():
|
||||
prefix = _fuzzy_score("gpt-4o-mini", "gpt")
|
||||
scattered = _fuzzy_score("a-g-p-t", "gpt")
|
||||
assert prefix is not None and scattered is not None
|
||||
assert prefix > scattered
|
||||
|
||||
|
||||
def test_fuzzy_score_exact_and_shorter_rank_higher():
|
||||
exact = _fuzzy_score("sonnet", "sonnet")
|
||||
longer = _fuzzy_score("sonnet-extended", "sonnet")
|
||||
assert exact is not None and longer is not None
|
||||
# Same prefix match, but the shorter id wins on the length tiebreak.
|
||||
assert exact > longer
|
||||
|
||||
|
||||
def test_filter_indices_ranks_best_first():
|
||||
models = ["gpt-4o", "gpt-4o-mini", "claude-sonnet-4", "claude-haiku", "o1-preview"]
|
||||
|
||||
# g4o matches both gpt-4o variants; the shorter exact-ish one ranks first.
|
||||
ranked = _filter_indices(models, "g4o")
|
||||
assert [models[i] for i in ranked] == ["gpt-4o", "gpt-4o-mini"]
|
||||
|
||||
# son4 surfaces the sonnet model.
|
||||
assert [models[i] for i in _filter_indices(models, "son4")] == ["claude-sonnet-4"]
|
||||
|
||||
# Multi-token AND.
|
||||
assert [models[i] for i in _filter_indices(models, "clad snnt")] == ["claude-sonnet-4"]
|
||||
|
||||
# No match drops everything.
|
||||
assert _filter_indices(models, "zzz") == []
|
||||
|
||||
|
||||
def test_filter_indices_blank_query_preserves_order():
|
||||
models = ["b", "a", "c"]
|
||||
assert _filter_indices(models, "") == [0, 1, 2]
|
||||
assert _filter_indices(models, " ") == [0, 1, 2]
|
||||
|
||||
|
||||
def test_filter_indices_stable_for_equal_scores():
|
||||
# Identical labels score identically; original order is the tiebreak.
|
||||
items = ["ab", "ab", "ab"]
|
||||
assert _filter_indices(items, "ab") == [0, 1, 2]
|
||||
@ -13,7 +13,7 @@ def test_prompt_model_selection_uses_curses_radiolist():
|
||||
|
||||
seen = {}
|
||||
|
||||
def _fake(title, items, *, selected=0, cancel_returns=None, description=None):
|
||||
def _fake(title, items, *, selected=0, cancel_returns=None, description=None, searchable=False):
|
||||
seen["title"] = title
|
||||
seen["items"] = items
|
||||
return 1 # pick second model
|
||||
@ -67,7 +67,7 @@ def test_model_selection_with_pricing_passes_description():
|
||||
|
||||
seen = {}
|
||||
|
||||
def _fake(title, items, *, selected=0, cancel_returns=None, description=None):
|
||||
def _fake(title, items, *, selected=0, cancel_returns=None, description=None, searchable=False):
|
||||
seen["description"] = description
|
||||
return len(items) - 1 # Skip
|
||||
|
||||
|
||||
Reference in New Issue
Block a user