""" Canonical model catalogs and lightweight validation helpers. Add, remove, or reorder entries here — both `hermes setup` and `hermes` provider-selection will pick up the change automatically. """ from __future__ import annotations import json import os import urllib.request import urllib.error import time from difflib import get_close_matches from pathlib import Path from typing import Any, NamedTuple, Optional from hermes_cli import __version__ as _HERMES_VERSION # Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity # Check (error 1010) don't reject the default ``Python-urllib/*`` signature. _HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}" COPILOT_BASE_URL = "https://api.githubcopilot.com" COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models" COPILOT_EDITOR_VERSION = "vscode/1.104.1" COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"] COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # Fallback OpenRouter snapshot used when the live catalog is unavailable. # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-opus-4.8", ""), ("anthropic/claude-opus-4.8-fast", "2x price, higher output speed"), ("anthropic/claude-opus-4.7", ""), ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), ("moonshotai/kimi-k2.6", "recommended"), ("openrouter/pareto-code", "auto-routes to cheapest coder meeting openrouter.min_coding_score"), ("qwen/qwen3.7-max", ""), ("anthropic/claude-haiku-4.5", ""), ("openai/gpt-5.5", ""), ("openai/gpt-5.5-pro", ""), ("openai/gpt-5.4-mini", ""), ("openai/gpt-5.4-nano", ""), ("openai/gpt-5.3-codex", ""), ("xiaomi/mimo-v2.5-pro", ""), ("tencent/hy3-preview", ""), ("google/gemini-3-pro-image-preview", ""), ("google/gemini-3.5-flash", ""), ("google/gemini-3.1-pro-preview", ""), ("google/gemini-3.1-flash-lite-preview", ""), ("qwen/qwen3.6-35b-a3b", ""), ("stepfun/step-3.7-flash", ""), ("minimax/minimax-m2.7", ""), ("z-ai/glm-5.1", ""), ("x-ai/grok-4.20", ""), ("x-ai/grok-4.3", ""), ("nvidia/nemotron-3-super-120b-a12b", ""), ("deepseek/deepseek-v4-pro", ""), # Free tier ("openrouter/elephant-alpha", "free"), ("openrouter/owl-alpha", "free"), ("tencent/hy3-preview:free", "free"), ("nvidia/nemotron-3-super-120b-a12b:free", "free"), ("inclusionai/ring-2.6-1t:free", "free"), ] _openrouter_catalog_cache: list[tuple[str, str]] | None = None def _codex_curated_models() -> list[str]: """Derive the openai-codex curated list from codex_models.py. Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis. This keeps the gateway /model picker in sync with the CLI `hermes model` flow without maintaining a separate static list. """ from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS)) # Static fallback for xAI when the models.dev disk cache is empty (fresh # install, offline first run, etc.). Mirrors the xAI-direct model IDs from # $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames # or retires a model, the disk cache picks it up on the next refresh and the # fallback here only matters until that refresh lands. # # Models retired by xAI on May 15, 2026 are excluded — see # https://docs.x.ai/developers/migration/may-15-retirement # (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning}, # grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3). _XAI_STATIC_FALLBACK: list[str] = [ "grok-4.3", "grok-4.20-0309-reasoning", "grok-4.20-0309-non-reasoning", "grok-4.20-multi-agent-0309", ] _XAI_TOP_MODEL = "grok-4.3" def _xai_promote_top(ids: list[str]) -> list[str]: """Pin the headline xAI model to the top of the curated list.""" if _XAI_TOP_MODEL in ids: return [_XAI_TOP_MODEL] + [m for m in ids if m != _XAI_TOP_MODEL] return ids def _xai_curated_models() -> list[str]: """Derive the xAI-direct curated list from models.dev disk cache. Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK`` when the cache is empty or unreadable. Hermes refreshes the cache from https://models.dev/api.json on normal use, so this list self-heals as xAI renames models. Mirrors ``_codex_curated_models()``'s role for openai-codex. """ try: from agent.models_dev import _load_disk_cache data = _load_disk_cache() xai = data.get("xai") if isinstance(data, dict) else None models = xai.get("models") if isinstance(xai, dict) else None if isinstance(models, dict) and models: ids = [mid for mid in models.keys() if isinstance(mid, str)] if ids: return _xai_promote_top(sorted(ids)) except Exception: # Any failure (missing file, malformed JSON, import error) # falls through to the static list. pass return list(_XAI_STATIC_FALLBACK) _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ "anthropic/claude-opus-4.8", "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "moonshotai/kimi-k2.6", "qwen/qwen3.7-max", "anthropic/claude-haiku-4.5", "openai/gpt-5.5", "openai/gpt-5.5-pro", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.3-codex", "xiaomi/mimo-v2.5-pro", "tencent/hy3-preview", "google/gemini-3-pro-preview", "google/gemini-3.5-flash", "google/gemini-3.1-pro-preview", "google/gemini-3.1-flash-lite-preview", "qwen/qwen3.6-35b-a3b", "stepfun/step-3.7-flash", "minimax/minimax-m2.7", "z-ai/glm-5.1", "x-ai/grok-4.3", "nvidia/nemotron-3-super-120b-a12b", "deepseek/deepseek-v4-pro", ], # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and # provider_model_ids fallback when /v1/models is unavailable. "openai": [ "gpt-5.4", "gpt-5.4-mini", "gpt-5-mini", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-4.1", "gpt-4o", "gpt-4o-mini", ], "openai-api": [ "gpt-5.5", "gpt-5.5-pro", "gpt-5.4", "gpt-5.4-mini", "gpt-5.4-nano", "gpt-5-mini", "gpt-5.3-codex", "gpt-4.1", "gpt-4o", "gpt-4o-mini", ], "openai-codex": _codex_curated_models(), "xai-oauth": _xai_curated_models(), "copilot-acp": [ "copilot-acp", ], "copilot": [ "gpt-5.4", "gpt-5.4-mini", "gpt-5-mini", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-4.1", "gpt-4o", "gpt-4o-mini", "claude-sonnet-4.6", "claude-sonnet-4", "claude-sonnet-4.5", "claude-haiku-4.5", "gemini-3.1-pro-preview", "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-2.5-pro", ], "gemini": [ "gemini-3.1-pro-preview", "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", ], "google-gemini-cli": [ "gemini-3.1-pro-preview", "gemini-3-pro-preview", "gemini-3-flash-preview", ], "zai": [ "glm-5.1", "glm-5", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5", "glm-4.5-flash", ], "xai": _xai_curated_models(), "nvidia": [ # NVIDIA flagship reasoning models "nvidia/nemotron-3-super-120b-a12b", "nvidia/nemotron-3-nano-30b-a3b", "nvidia/llama-3.3-nemotron-super-49b-v1.5", # Third-party agentic models hosted on build.nvidia.com # (map to OpenRouter defaults — users get familiar picks on NIM) "qwen/qwen3.5-397b-a17b", "deepseek-ai/deepseek-v3.2", "moonshotai/kimi-k2.6", "minimaxai/minimax-m2.5", "z-ai/glm5", "openai/gpt-oss-120b", ], "kimi-coding": [ "kimi-k2.6", "kimi-k2.5", "kimi-for-coding", "kimi-k2-thinking", "kimi-k2-thinking-turbo", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], "kimi-coding-cn": [ "kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], "stepfun": [ "step-3.5-flash", "step-3.5-flash-2603", ], "moonshot": [ "kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], "minimax": [ "MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2", ], "minimax-oauth": [ "MiniMax-M2.7", "MiniMax-M2.7-highspeed", ], "minimax-cn": [ "MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2", ], "anthropic": [ "claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", "claude-sonnet-4-6", "claude-opus-4-5-20251101", "claude-sonnet-4-5-20250929", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-haiku-4-5-20251001", ], "deepseek": [ "deepseek-v4-pro", "deepseek-v4-flash", "deepseek-chat", "deepseek-reasoner", ], "xiaomi": [ "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "mimo-v2-flash", ], "tencent-tokenhub": [ "hy3-preview", ], "arcee": [ "trinity-large-thinking", "trinity-large-preview", "trinity-mini", ], "gmi": [ "zai-org/GLM-5.1-FP8", "deepseek-ai/DeepSeek-V3.2", "moonshotai/Kimi-K2.5", "google/gemini-3.1-flash-lite-preview", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", ], "opencode-zen": [ "kimi-k2.5", "gpt-5.4-pro", "gpt-5.4", "gpt-5.3-codex", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1", "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "gpt-5", "gpt-5-codex", "gpt-5-nano", "claude-opus-4-6", "claude-opus-4-5", "claude-opus-4-1", "claude-sonnet-4-6", "claude-sonnet-4-5", "claude-sonnet-4", "claude-haiku-4-5", "claude-3-5-haiku", "gemini-3.1-pro", "gemini-3-pro", "gemini-3-flash", "minimax-m2.7", "minimax-m2.5", "minimax-m2.5-free", "minimax-m2.1", "glm-5", "glm-4.7", "glm-4.6", "kimi-k2-thinking", "kimi-k2", "qwen3-coder", "big-pickle", ], "opencode-go": [ "kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.7-max", "qwen3.6-plus", "qwen3.5-plus", ], "kilocode": [ "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview", ], # Alibaba DashScope Coding platform (coding-intl) — default endpoint. # Supports Qwen models + third-party providers (GLM, Kimi, MiniMax). # Users with classic DashScope keys should override DASHSCOPE_BASE_URL # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat) # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat). "alibaba": [ "qwen3.7-max", "qwen3.6-plus", "kimi-k2.5", "qwen3.5-plus", "qwen3-coder-plus", "qwen3-coder-next", # Third-party models available on coding-intl "glm-5", "glm-4.7", "MiniMax-M2.5", ], # Alibaba Coding Plan — same platform as alibaba (DashScope coding-intl), # separate provider ID with its own base_url_env_var. "alibaba-coding-plan": [ "qwen3.7-max", "qwen3.6-plus", "qwen3.5-plus", "qwen3-coder-plus", "qwen3-coder-next", "kimi-k2.5", "glm-5", "glm-4.7", "MiniMax-M2.5", ], # Curated HF model list — only agentic models that map to OpenRouter defaults. "huggingface": [ "moonshotai/Kimi-K2.5", "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3.5-35B-A3B", "deepseek-ai/DeepSeek-V3.2", "MiniMaxAI/MiniMax-M2.5", "zai-org/GLM-5", "XiaomiMiMo/MiMo-V2-Flash", "moonshotai/Kimi-K2-Thinking", "moonshotai/Kimi-K2.6", ], # AWS Bedrock — static fallback list used when dynamic discovery is # unavailable (no boto3, no credentials, or API error). The agent # prefers live discovery via ListFoundationModels + ListInferenceProfiles. # Use inference profile IDs (us.*) since most models require them. "bedrock": [ "us.anthropic.claude-sonnet-4-6", "us.anthropic.claude-opus-4-6-v1", "us.anthropic.claude-haiku-4-5-20251001-v1:0", "us.anthropic.claude-sonnet-4-5-20250929-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-lite-v1:0", "us.amazon.nova-micro-v1:0", "deepseek.v3.2", "us.meta.llama4-maverick-17b-instruct-v1:0", "us.meta.llama4-scout-17b-instruct-v1:0", ], # Azure Foundry: user-provided endpoint and model. # Empty list because models depend on the endpoint configuration. "azure-foundry": [], "novita": [ "moonshotai/kimi-k2.5", "minimax/minimax-m2.7", "zai-org/glm-5", "deepseek/deepseek-v3-0324", "deepseek/deepseek-r1-0528", "qwen/qwen3-235b-a22b-fp8", ], } # --------------------------------------------------------------------------- # Nous Portal free-model helper # --------------------------------------------------------------------------- # The Nous Portal models endpoint is the source of truth for which models # are currently offered (free or paid). We trust whatever it returns and # surface it to users as-is — no local allowlist filtering. def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: """Return True if *model_id* has zero-cost prompt AND completion pricing.""" p = pricing.get(model_id) if not p: return False try: return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0 except (TypeError, ValueError): return False # --------------------------------------------------------------------------- # Nous Portal account tier detection # --------------------------------------------------------------------------- def is_nous_free_tier(account_info: dict[str, Any]) -> bool: """Return True if the account info indicates a free (unpaid) tier. Prefer the Portal's explicit ``paid_service_access.allowed`` entitlement decision. Legacy payloads fall back to ``subscription.monthly_charge == 0``. Returns False when both signals are missing or unparseable. """ paid_access = account_info.get("paid_service_access") if isinstance(paid_access, dict): allowed = paid_access.get("allowed") if isinstance(allowed, bool): return not allowed paid = paid_access.get("paid_access") if isinstance(paid, bool): return not paid sub = account_info.get("subscription") if not isinstance(sub, dict): return False charge = sub.get("monthly_charge") if charge is None: return False try: return float(charge) == 0 except (TypeError, ValueError): return False def partition_nous_models_by_tier( model_ids: list[str], pricing: dict[str, dict[str, str]], free_tier: bool, ) -> tuple[list[str], list[str]]: """Split Nous models into (selectable, unavailable) based on user tier. For paid-tier users: all models are selectable, none unavailable. For free-tier users: only free models are selectable; paid models are returned as unavailable (shown grayed out in the menu). """ if not free_tier: return (model_ids, []) if not pricing: return (model_ids, []) # can't determine, show everything selectable: list[str] = [] unavailable: list[str] = [] for mid in model_ids: if _is_model_free(mid, pricing): selectable.append(mid) else: unavailable.append(mid) return (selectable, unavailable) def union_with_portal_free_recommendations( curated_ids: list[str], pricing: dict[str, dict[str, str]], portal_base_url: str = "", *, force_refresh: bool = False, ) -> tuple[list[str], dict[str, dict[str, str]]]: """Augment curated list + pricing with the Portal's ``freeRecommendedModels``. The Portal's ``/api/nous/recommended-models`` endpoint advertises which models are free *right now* — independent of what the in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the docs-hosted catalog manifest has been rebuilt since the last release. For free-tier users this is the source of truth: any model the Portal flags as free should be selectable, even if the user is running an older Hermes that doesn't ship that model in its hardcoded curated list. This function returns an augmented ``(model_ids, pricing)`` pair where: * Portal free recommendations missing from ``curated_ids`` are appended at the front (so the picker shows them first). * ``pricing`` gets a synthetic ``{"prompt": "0", "completion": "0"}`` entry for any free recommendation missing from the live pricing map, so :func:`partition_nous_models_by_tier` keeps it. Failures (network, parse, missing field) are silent and degrade to returning the inputs unchanged. """ try: payload = fetch_nous_recommended_models( portal_base_url, force_refresh=force_refresh ) except Exception: return (list(curated_ids), dict(pricing)) free_block = payload.get("freeRecommendedModels") if isinstance(payload, dict) else None if not isinstance(free_block, list) or not free_block: return (list(curated_ids), dict(pricing)) portal_free_ids: list[str] = [] for entry in free_block: name = _extract_model_name(entry) if name: portal_free_ids.append(name) if not portal_free_ids: return (list(curated_ids), dict(pricing)) augmented_pricing = dict(pricing) free_synthetic = {"prompt": "0", "completion": "0"} for mid in portal_free_ids: if mid not in augmented_pricing: augmented_pricing[mid] = dict(free_synthetic) augmented_ids = list(curated_ids) seen = set(augmented_ids) # Prepend Portal free recommendations that aren't already curated, so # they appear first in the picker. new_ones = [mid for mid in portal_free_ids if mid not in seen] if new_ones: augmented_ids = new_ones + augmented_ids return (augmented_ids, augmented_pricing) def union_with_portal_paid_recommendations( curated_ids: list[str], pricing: dict[str, dict[str, str]], portal_base_url: str = "", *, force_refresh: bool = False, ) -> tuple[list[str], dict[str, dict[str, str]]]: """Augment curated list with the Portal's ``paidRecommendedModels``. Mirror of :func:`union_with_portal_free_recommendations` for paid-tier users. The Portal's ``/api/nous/recommended-models`` endpoint advertises which paid models are blessed *right now* — independent of what the in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the docs-hosted catalog manifest has been rebuilt since the last release. For paid-tier users this lets newly-launched paid models surface in the picker even if the user is running an older Hermes that doesn't ship them in its hardcoded curated list. This function returns an augmented ``(model_ids, pricing)`` pair where: * Portal paid recommendations missing from ``curated_ids`` are appended at the front (so the picker shows them first). * ``pricing`` is left untouched — we deliberately do NOT synthesize pricing entries for paid models. Live pricing is fetched separately via :func:`get_pricing_for_provider`; if the live endpoint hasn't published pricing yet, the picker shows a blank price column rather than fabricating numbers. (The free helper synthesizes ``$0`` so :func:`partition_nous_models_by_tier` keeps free models selectable; no equivalent gating applies on the paid side, so synthesis would only mislead the user.) Failures (network, parse, missing field) are silent and degrade to returning the inputs unchanged — never block the picker on a Portal-side hiccup. """ try: payload = fetch_nous_recommended_models( portal_base_url, force_refresh=force_refresh ) except Exception: return (list(curated_ids), dict(pricing)) paid_block = payload.get("paidRecommendedModels") if isinstance(payload, dict) else None if not isinstance(paid_block, list) or not paid_block: return (list(curated_ids), dict(pricing)) portal_paid_ids: list[str] = [] for entry in paid_block: name = _extract_model_name(entry) if name: portal_paid_ids.append(name) if not portal_paid_ids: return (list(curated_ids), dict(pricing)) augmented_ids = list(curated_ids) seen = set(augmented_ids) # Prepend Portal paid recommendations that aren't already curated, so # the Portal-blessed picks surface first in the picker. new_ones = [mid for mid in portal_paid_ids if mid not in seen] if new_ones: augmented_ids = new_ones + augmented_ids return (augmented_ids, dict(pricing)) # --------------------------------------------------------------------------- # TTL cache for free-tier detection — avoids repeated API calls within a # session while still picking up upgrades quickly. # --------------------------------------------------------------------------- _FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes) _free_tier_cache: tuple[bool, float] | None = None # (result, timestamp) def check_nous_free_tier(*, force_fresh: bool = False) -> bool: """Check if the current Nous Portal user is on a free (unpaid) tier. Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid hitting the Portal API on every call. The cache is short-lived so that an account upgrade is reflected within a few minutes. Returns True only when entitlement is known to be free. Unknown/error states return False so this compatibility wrapper does not block users. """ global _free_tier_cache now = time.monotonic() if not force_fresh and _free_tier_cache is not None: cached_result, cached_at = _free_tier_cache if now - cached_at < _FREE_TIER_CACHE_TTL: return cached_result try: from hermes_cli.nous_account import get_nous_portal_account_info account_info = get_nous_portal_account_info(force_fresh=force_fresh) result = account_info.is_free_tier _free_tier_cache = (result, now) return result except Exception: _free_tier_cache = (False, now) return False # default to paid on error — don't block users # --------------------------------------------------------------------------- # Nous Portal recommended models # # The Portal publishes a curated list of suggested models (separated into # paid and free tiers) plus dedicated recommendations for compaction (text # summarisation / auxiliary) and vision tasks. We fetch it once per process # with a TTL cache so callers can ask "what's the best aux model right now?" # without hitting the network on every lookup. # # Shape of the response (fields we care about): # { # "paidRecommendedModels": [ {modelName, ...}, ... ], # "freeRecommendedModels": [ {modelName, ...}, ... ], # "paidRecommendedCompactionModel": {modelName, ...} | null, # "paidRecommendedVisionModel": {modelName, ...} | null, # "freeRecommendedCompactionModel": {modelName, ...} | null, # "freeRecommendedVisionModel": {modelName, ...} | null, # } # --------------------------------------------------------------------------- NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models" _NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes) # (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide. _nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {} def fetch_nous_recommended_models( portal_base_url: str = "", timeout: float = 5.0, *, force_refresh: bool = False, ) -> dict[str, Any]: """Fetch the Nous Portal's curated recommended-models payload. Hits ``/api/nous/recommended-models``. The endpoint is public — no auth is required. Results are cached per portal URL for ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to bypass the cache. Returns the parsed JSON dict on success, or ``{}`` on any failure (network, parse, non-2xx). Callers must treat missing/null fields as "no recommendation" and fall back to their own default. """ base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/") now = time.monotonic() cached = _nous_recommended_cache.get(base) if not force_refresh and cached is not None: payload, cached_at = cached if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL: return payload url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}" try: req = urllib.request.Request( url, headers={"Accept": "application/json"}, ) with urllib.request.urlopen(req, timeout=timeout) as resp: data = json.loads(resp.read().decode()) if not isinstance(data, dict): data = {} except Exception: data = {} _nous_recommended_cache[base] = (data, now) return data def _resolve_nous_portal_url() -> str: """Best-effort lookup of the Portal base URL the user is authed against.""" try: from hermes_cli.auth import ( DEFAULT_NOUS_PORTAL_URL, get_provider_auth_state, ) state = get_provider_auth_state("nous") or {} portal = str(state.get("portal_base_url") or "").strip() if portal: return portal.rstrip("/") return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/") except Exception: return "https://portal.nousresearch.com" def _extract_model_name(entry: Any) -> Optional[str]: """Pull the ``modelName`` field from a recommended-model entry, else None.""" if not isinstance(entry, dict): return None model_name = entry.get("modelName") if isinstance(model_name, str) and model_name.strip(): return model_name.strip() return None def get_nous_recommended_aux_model( *, vision: bool = False, free_tier: Optional[bool] = None, portal_base_url: str = "", force_refresh: bool = False, ) -> Optional[str]: """Return the Portal's recommended model name for an auxiliary task. Picks the best field from the Portal's recommended-models payload: * ``vision=True`` → ``paidRecommendedVisionModel`` (paid tier) or ``freeRecommendedVisionModel`` (free tier) * ``vision=False`` → ``paidRecommendedCompactionModel`` or ``freeRecommendedCompactionModel`` When ``free_tier`` is ``None`` (default) the user's tier is auto-detected via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the detection — useful for tests or when the caller already knows the tier. For paid-tier users we prefer the paid recommendation but gracefully fall back to the free recommendation if the Portal returned ``null`` for the paid field (common during the staged rollout of new paid models). Returns ``None`` when every candidate is missing, null, or the fetch fails — callers should fall back to their own default (currently ``google/gemini-3-flash-preview``). """ base = portal_base_url or _resolve_nous_portal_url() payload = fetch_nous_recommended_models(base, force_refresh=force_refresh) if not payload: return None if free_tier is None: try: free_tier = check_nous_free_tier() except Exception: # On any detection error, assume paid — paid users see both fields # anyway so this is a safe default that maximises model quality. free_tier = False if vision: paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel" else: paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel" # Preference order: # free tier → free only # paid tier → paid, then free (if paid field is null) candidates = [free_key] if free_tier else [paid_key, free_key] for key in candidates: name = _extract_model_name(payload.get(key)) if name: return name return None # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from # this list: hermes model, /model, list_authenticated_providers. # # Fields: # slug — internal provider ID (used in config.yaml, --provider flag) # label — short display name # tui_desc — longer description for the `hermes model` interactive picker # --------------------------------------------------------------------------- class ProviderEntry(NamedTuple): slug: str label: str tui_desc: str # detailed description for `hermes model` TUI CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"), ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"), ProviderEntry("novita", "NovitaAI", "NovitaAI (AI-native cloud: Model API, Agent Sandbox, GPU Cloud)"), ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), ProviderEntry("openai-api", "OpenAI API", "OpenAI API (api.openai.com, API key)"), ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"), ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok / Premium+)", "xAI Grok OAuth (SuperGrok / Premium+)"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"), ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"), ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"), ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"), ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — native Gemini API)"), ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"), ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"), ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"), ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"), ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"), ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"), ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (agent/coding models via Step Plan API)"), ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"), ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"), ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"), ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"), ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"), ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"), ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"), ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ] # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/ # that is not already in the list above. Adding plugins/model-providers// # is sufficient to expose a new provider in the model picker, /model, and all # downstream consumers — no edits to this file needed. _canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS} try: from providers import list_providers as _list_providers_for_canonical for _pp in _list_providers_for_canonical(): if _pp.name in _canonical_slugs: continue if _pp.auth_type in {"oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"}: continue # non-api-key flows need bespoke picker UX; skip auto-inject _label = _pp.display_name or _pp.name _desc = _pp.description or f"{_label} (direct API)" CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc)) _canonical_slugs.add(_pp.name) except Exception: pass # Derived dicts — used throughout the codebase _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS} _PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider # --------------------------------------------------------------------------- # Provider groups — DISPLAY ONLY # # Some vendors expose several Hermes provider slugs (one per endpoint / # auth method: global API, China API, OAuth coding plan, ...). Listing every # slug as a top-level row in the interactive `hermes model` / setup wizard / # Telegram `/model` pickers makes that list long and noisy. # # These groups fold related slugs under one top-level row in INTERACTIVE # PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity, # the ``--provider`` flag, ``/model ``, or any typed path — # every member slug remains individually addressable. Grouping is a pure # display affordance; ``group_providers()`` is the single fold used by all # three picker surfaces so they stay consistent. # # group_id -> (display_label, [member_slug, ...]) # # Member order is the order shown inside the group submenu. # --------------------------------------------------------------------------- PROVIDER_GROUPS: dict[str, tuple[str, list[str]]] = { "kimi": ("Kimi / Moonshot", ["kimi-coding", "kimi-coding-cn"]), "minimax": ("MiniMax", ["minimax", "minimax-oauth", "minimax-cn"]), "xai": ("xAI Grok", ["xai", "xai-oauth"]), "google": ("Google Gemini", ["gemini", "google-gemini-cli"]), "openai": ("OpenAI", ["openai-codex", "openai-api"]), "opencode": ("OpenCode", ["opencode-zen", "opencode-go"]), "copilot": ("GitHub Copilot", ["copilot", "copilot-acp"]), } # Reverse index: member slug -> group_id. Built once at import. _SLUG_TO_GROUP: dict[str, str] = { slug: gid for gid, (_label, members) in PROVIDER_GROUPS.items() for slug in members } def provider_group_for_slug(slug: str) -> str: """Return the group_id a provider slug belongs to, or "" if ungrouped.""" return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "") def group_providers(slugs): """Fold a flat ordered slug iterable into picker rows by provider group. DISPLAY ONLY. Used by every interactive picker (``hermes model``, the setup wizard, the Telegram ``/model`` keyboard) so grouping is identical across surfaces. Each returned row is a dict:: {"kind": "single", "slug": } # ungrouped, or # 1-member group {"kind": "group", "group_id": , "label":