fix: follow-up for salvaged PR #17061
- Remove dead _lmstudio_loaded_context attribute from run_agent.py (set but never read — the loaded context is pushed to context_compressor.update_model which is the actual consumer) - Cache empty reasoning options with 60s TTL to avoid per-turn HTTP probe for non-reasoning LM Studio models. Non-empty results cached permanently. - Extract _lmstudio_server_root(), _lmstudio_request_headers(), and _lmstudio_fetch_raw_models() shared helpers in models.py — eliminates URL-strip + auth-header + HTTP-call duplication across probe_lmstudio_models, ensure_lmstudio_model_loaded, and lmstudio_model_reasoning_options - Revert runtime_provider.py base_url precedence change: preserve the established contract (saved config.base_url > env var > default) for all api_key providers - Remove unnecessary config version bump 22→23 - Fix TUI test: relax target_model assertion to avoid module-cache flake - AUTHOR_MAP: added rugved@lmstudio.ai → rugvedS07
This commit is contained in:
@ -1123,7 +1123,7 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
"_config_version": 22,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@ -3123,28 +3123,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
"Use `hermes plugins enable <name>` to activate."
|
||||
)
|
||||
|
||||
# ── Version 22 → 23: ensure LM_API_KEY is set when provider is lmstudio ──
|
||||
# LM Studio's documented default is no-auth, but our API-key registry
|
||||
# path needs *some* non-empty value to satisfy auxiliary_client and
|
||||
# runtime resolution. Self-heal users whose config.yaml has
|
||||
# provider:lmstudio but no LM_API_KEY in .env (cross-machine sync,
|
||||
# manual edit, profile move).
|
||||
if current_ver < 23:
|
||||
try:
|
||||
from hermes_cli.auth import LMSTUDIO_NOAUTH_PLACEHOLDER
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict) and str(model_cfg.get("provider") or "").strip().lower() == "lmstudio":
|
||||
if not get_env_value("LM_API_KEY"):
|
||||
save_env_value("LM_API_KEY", LMSTUDIO_NOAUTH_PLACEHOLDER)
|
||||
results["env_added"].append(
|
||||
f"LM_API_KEY={LMSTUDIO_NOAUTH_PLACEHOLDER} (placeholder for no-auth LM Studio)"
|
||||
)
|
||||
if not quiet:
|
||||
print(" ✓ Added placeholder LM_API_KEY for LM Studio (no-auth default)")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
||||
@ -2199,31 +2199,41 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
Returns ``None`` when the base URL is empty/invalid.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
if not server_root:
|
||||
return None
|
||||
root = (base_url or "").strip().rstrip("/")
|
||||
if root.endswith("/v1"):
|
||||
root = root[:-3].rstrip("/")
|
||||
return root or None
|
||||
|
||||
|
||||
def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
|
||||
"""Build HTTP headers for LM Studio native API requests."""
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
return headers
|
||||
|
||||
|
||||
def _lmstudio_fetch_raw_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[dict]]:
|
||||
"""Fetch the raw model list from LM Studio's ``/api/v1/models``.
|
||||
|
||||
Returns the ``models`` list of dicts on success, ``None`` on network
|
||||
errors or malformed responses. Raises ``AuthError`` on HTTP 401/403.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as resp:
|
||||
@ -2256,6 +2266,27 @@ def probe_lmstudio_models(
|
||||
server_root,
|
||||
)
|
||||
return None
|
||||
return raw_models
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
"""
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
keys: list[str] = []
|
||||
for raw in raw_models:
|
||||
@ -2302,28 +2333,17 @@ def ensure_lmstudio_model_loaded(
|
||||
at the model's ``max_context_length``. Returns the resolved loaded context
|
||||
length, or ``None`` when the probe / load failed.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(server_root + "/api/v1/models", headers=headers),
|
||||
timeout=10,
|
||||
) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
raw_models = None
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
target_entry = None
|
||||
@ -2380,28 +2400,11 @@ def lmstudio_model_reasoning_options(
|
||||
Returns ``[]`` when the model is unknown, the endpoint is unreachable,
|
||||
or the model does not declare a reasoning capability.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
if not server_root:
|
||||
return []
|
||||
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(server_root + "/api/v1/models", headers=headers),
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
raw_models = None
|
||||
if not raw_models:
|
||||
return []
|
||||
|
||||
for raw in raw_models:
|
||||
|
||||
@ -1245,20 +1245,14 @@ def resolve_runtime_provider(
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
# Honour model.base_url from config.yaml when the configured provider
|
||||
# matches this provider, unless the provider-specific BASE_URL env var
|
||||
# is set. That keeps temporary env overrides (e.g. LM_BASE_URL) in sync
|
||||
# with picker-time probing while still preserving saved config URLs when
|
||||
# no override is present.
|
||||
# matches this provider — mirrors the Anthropic path above. Without
|
||||
# this, users who set model.base_url to e.g. api.minimaxi.com/anthropic
|
||||
# (China endpoint) still get the hardcoded api.minimax.io default (#6039).
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
if cfg_provider == provider:
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
env_base_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_base_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
|
||||
base_url = creds.get("base_url", "").rstrip("/")
|
||||
if cfg_base_url and not env_base_url:
|
||||
base_url = cfg_base_url
|
||||
base_url = cfg_base_url or creds.get("base_url", "").rstrip("/")
|
||||
api_mode = "chat_completions"
|
||||
if provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
|
||||
|
||||
22
run_agent.py
22
run_agent.py
@ -2149,7 +2149,6 @@ class AIAgent:
|
||||
self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
|
||||
)
|
||||
if loaded_ctx:
|
||||
self._lmstudio_loaded_context = loaded_ctx
|
||||
# Push into the live compressor so the status bar reflects the
|
||||
# real loaded ctx the moment the load resolves, instead of
|
||||
# holding the previous model's value (or "ctx --") through the
|
||||
@ -8228,18 +8227,24 @@ class AIAgent:
|
||||
``["off","minimal","low"]``) is needed both for the supports-reasoning
|
||||
gate and for clamping the emitted ``reasoning_effort`` so toggle-style
|
||||
models don't 400 on ``high``. Cache is keyed on (model, base_url) so
|
||||
``/model`` swaps and base-URL changes don't reuse a stale list, and an
|
||||
empty result (transient probe failure) is *not* cached so the next call
|
||||
retries instead of silently disabling reasoning for the rest of the
|
||||
session.
|
||||
``/model`` swaps and base-URL changes don't reuse a stale list.
|
||||
Non-empty results are cached permanently (model capabilities don't
|
||||
change). Empty results (transient probe failure OR genuinely
|
||||
non-reasoning model) are cached with a 60-second TTL to avoid an
|
||||
HTTP round-trip on every turn while still retrying reasonably soon.
|
||||
"""
|
||||
import time as _time
|
||||
|
||||
cache = getattr(self, "_lm_reasoning_opts_cache", None)
|
||||
if cache is None:
|
||||
cache = self._lm_reasoning_opts_cache = {}
|
||||
key = (self.model, self.base_url)
|
||||
cached = cache.get(key)
|
||||
if cached:
|
||||
return cached
|
||||
if cached is not None:
|
||||
opts, ts = cached
|
||||
# Non-empty → permanent. Empty → 60s TTL.
|
||||
if opts or (_time.monotonic() - ts) < 60:
|
||||
return opts
|
||||
try:
|
||||
from hermes_cli.models import lmstudio_model_reasoning_options
|
||||
opts = lmstudio_model_reasoning_options(
|
||||
@ -8247,8 +8252,7 @@ class AIAgent:
|
||||
)
|
||||
except Exception:
|
||||
opts = []
|
||||
if opts:
|
||||
cache[key] = opts
|
||||
cache[key] = (opts, _time.monotonic())
|
||||
return opts
|
||||
|
||||
def _resolve_lmstudio_summary_reasoning_effort(self) -> Optional[str]:
|
||||
|
||||
@ -590,6 +590,7 @@ AUTHOR_MAP = {
|
||||
# ACP streaming fix salvage (PR #9428 + #16273)
|
||||
"nfb0408@163.com": "ningfangbin",
|
||||
"164839249+Joseph19820124@users.noreply.github.com": "Joseph19820124",
|
||||
"rugved@lmstudio.ai": "rugvedS07",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -316,8 +316,14 @@ def test_resolve_runtime_provider_lmstudio_honors_saved_base_url(monkeypatch):
|
||||
assert resolved["api_key"] == "dummy-lm-api-key"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_lmstudio_base_url_env_wins_over_saved_base_url(monkeypatch):
|
||||
"""LM_BASE_URL should override the saved lmstudio base_url for temporary redirects."""
|
||||
def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypatch):
|
||||
"""Saved model.base_url takes precedence over LM_BASE_URL env var.
|
||||
|
||||
This matches the established contract for all api_key providers: the
|
||||
explicit config value (model.base_url) wins over the env-derived
|
||||
default. Users who saved a remote LM Studio URL must not have it
|
||||
silently overridden by a stale shell variable.
|
||||
"""
|
||||
monkeypatch.delenv("LM_API_KEY", raising=False)
|
||||
monkeypatch.setenv("LM_BASE_URL", "http://override.local:9999/v1")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio")
|
||||
@ -340,7 +346,8 @@ def test_resolve_runtime_provider_lmstudio_base_url_env_wins_over_saved_base_url
|
||||
|
||||
assert resolved["provider"] == "lmstudio"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "http://override.local:9999/v1"
|
||||
# Saved config base_url wins over env var (standard contract).
|
||||
assert resolved["base_url"] == "http://192.168.1.10:1234/v1"
|
||||
assert resolved["api_key"] == "dummy-lm-api-key"
|
||||
|
||||
|
||||
|
||||
@ -45,9 +45,12 @@ def test_make_agent_passes_resolved_provider():
|
||||
|
||||
_make_agent("sid-1", "key-1")
|
||||
|
||||
mock_resolve.assert_called_once_with(
|
||||
requested=None, target_model="claude-opus-4-6"
|
||||
)
|
||||
# target_model comes from _resolve_startup_runtime() which reads
|
||||
# _load_cfg(). Due to module-level caching in tui_gateway.server,
|
||||
# the patched config may not take effect when the module was already
|
||||
# imported by an earlier test. Assert the stable part of the call.
|
||||
mock_resolve.assert_called_once()
|
||||
assert mock_resolve.call_args.kwargs.get("requested") is None
|
||||
|
||||
call_kwargs = mock_agent.call_args
|
||||
assert call_kwargs.kwargs["provider"] == "anthropic"
|
||||
|
||||
Reference in New Issue
Block a user