fix(auxiliary): auto-detect Anthropic Messages transport for all aux clients (#17027)

Auxiliary tasks (title_generation, vision, compression, web_extract,
session_search) now pick the correct wire protocol based on the
endpoint, not just on which resolve_provider_client branch built the
client.  Fixes 404s on Kimi Coding Plan and any other named provider
whose endpoint speaks Anthropic Messages.

Root cause: the 'api_key' branch of resolve_provider_client (and the
Step 2 fallback chain inside _resolve_auto) always built a plain
OpenAI client regardless of what the endpoint actually spoke.  For
provider=kimi-coding + model=kimi-for-coding, that meant:

    POST https://api.kimi.com/coding/v1/chat/completions
    { "model": "kimi-for-coding", ... }
    → 404 resource_not_found_error

The /coding route only accepts the Anthropic Messages shape (the main
agent already uses api_mode=anthropic_messages for it).  Earlier fixes
(#16819, #22ddac4b1) patched the anonymous-custom, named-custom, and
external-process branches — but the named api_key branch (kimi-coding,
minimax, zai, future /anthropic providers) was the fourth sibling and
never got the same treatment.

Fix: one module-level helper _maybe_wrap_anthropic() that rewraps a
plain OpenAI client in AnthropicAuxiliaryClient when:

  - api_mode is explicitly 'anthropic_messages', OR
  - the URL ends in '/anthropic', OR
  - the host is api.kimi.com + path contains '/coding', OR
  - the host is api.anthropic.com.

Wired into _wrap_if_needed (covers all resolve_provider_client
branches that already go through it) and into the Step 2 api_key
fallback chain inside _resolve_auto.  Explicit api_mode still wins:
passing api_mode='chat_completions' forces OpenAI wire, and already-
wrapped specialized adapters (Codex, Gemini native, CopilotACP) pass
through unchanged.

E2E verified:
- resolve_provider_client('kimi-coding', 'kimi-for-coding')
  → AnthropicAuxiliaryClient (was plain OpenAI, which 404'd)
- _resolve_auto Step 1 for kimi-coding runtime → AnthropicAuxiliaryClient
- resolve_provider_client('openrouter', ...) → plain OpenAI (no regression)
- api_mode='chat_completions' override → plain OpenAI (explicit wins)

Tests:
- tests/agent/test_auxiliary_transport_autodetect.py (new): 21 tests
  covering URL detection, wrap decisions, and integration.
- 204/205 existing auxiliary tests pass (1 pre-existing failure on
  main, unrelated to this change).

Co-authored-by: teknium1 <teknium@users.noreply.github.com>
This commit is contained in:
Teknium
2026-04-28 06:50:14 -07:00
committed by GitHub
parent e123f4ecf0
commit 1d8b9e6458
2 changed files with 388 additions and 11 deletions

View File

@ -744,6 +744,116 @@ class AsyncAnthropicAuxiliaryClient:
self.base_url = sync_wrapper.base_url
def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
"""True if the endpoint at ``base_url`` speaks the Anthropic Messages
protocol instead of OpenAI chat.completions.
Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
auxiliary client and the main agent stay in sync on transport selection.
Covers:
- Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
Anthropic-compatible gateways).
- ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
speaks Claude-Code's native Anthropic shape; ``chat.completions``
returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
- ``api.anthropic.com`` (native Anthropic).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if not normalized:
return False
if normalized.endswith("/anthropic"):
return True
hostname = base_url_hostname(normalized)
if hostname == "api.anthropic.com":
return True
if hostname == "api.kimi.com" and "/coding" in normalized:
return True
return False
def _maybe_wrap_anthropic(
client_obj: Any,
model: str,
api_key: str,
base_url: str,
api_mode: Optional[str] = None,
) -> Any:
"""Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
the endpoint actually speaks Anthropic Messages.
This is the single chokepoint for aux-client transport correction.
Runs at the end of every ``resolve_provider_client`` branch so that
api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
future /anthropic gateways all land on the right wire format
regardless of which branch built the client.
Returns ``client_obj`` unchanged when:
- It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
- The endpoint is an OpenAI-wire endpoint.
- ``api_mode`` is explicitly set to a non-Anthropic transport.
- The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
"""
# Already wrapped — don't double-wrap.
if isinstance(client_obj, AnthropicAuxiliaryClient):
return client_obj
# Other specialized adapters we should never re-dispatch.
if isinstance(client_obj, CodexAuxiliaryClient):
return client_obj
try:
from agent.gemini_native_adapter import GeminiNativeClient
if isinstance(client_obj, GeminiNativeClient):
return client_obj
except ImportError:
pass
try:
from agent.copilot_acp_client import CopilotACPClient
if isinstance(client_obj, CopilotACPClient):
return client_obj
except ImportError:
pass
# Explicit non-anthropic api_mode wins over URL heuristics.
if api_mode and api_mode != "anthropic_messages":
return client_obj
should_wrap = (
api_mode == "anthropic_messages"
or _endpoint_speaks_anthropic_messages(base_url)
)
if not should_wrap:
return client_obj
try:
from agent.anthropic_adapter import build_anthropic_client
except ImportError:
logger.warning(
"Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
"not installed — falling back to OpenAI-wire (will likely 404).",
base_url,
)
return client_obj
try:
real_client = build_anthropic_client(api_key, base_url)
except Exception as exc:
logger.warning(
"Failed to build Anthropic client for %s (%s) — falling back to "
"OpenAI-wire client.", base_url, exc,
)
return client_obj
logger.debug(
"Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
"(model=%s, base_url=%s, api_mode=%s)",
model, base_url[:60] if base_url else "", api_mode or "auto-detected",
)
return AnthropicAuxiliaryClient(
real_client, model, api_key, base_url, is_oauth=False,
)
def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
@ -914,7 +1024,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
return _client, model
creds = resolve_api_key_provider_credentials(provider_id)
api_key = str(creds.get("api_key", "")).strip()
@ -940,7 +1052,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
return _client, model
return None, None
@ -1224,7 +1338,13 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
# URL-based anthropic detection for custom endpoints that didn't set
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
_fallback_client = _maybe_wrap_anthropic(
_fallback_client, model, custom_key, custom_base, custom_mode,
)
return _fallback_client, model
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@ -1775,8 +1895,20 @@ def resolve_provider_client(
return True
return False
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
"""Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
api_key_str: str = ""):
"""Wrap a plain OpenAI client in the correct transport adapter.
Handles two cases:
- ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
(explicit ``api_mode=codex_responses`` or api.openai.com + codex
model name).
- ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
Clients that are already specialized wrappers pass through unchanged.
"""
if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
logger.debug(
"resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@ -1784,7 +1916,11 @@ def resolve_provider_client(
api_mode or "auto-detected", final_model_str,
base_url_str[:60] if base_url_str else "")
return CodexAuxiliaryClient(client_obj, final_model_str)
return client_obj
# Anthropic-wire endpoints: rewrap plain OpenAI clients so
# chat.completions.create() is translated to /v1/messages.
return _maybe_wrap_anthropic(
client_obj, final_model_str, api_key_str, base_url_str, api_mode,
)
# ── Auto: try all providers in priority order ────────────────────
if provider == "auto":
@ -1892,7 +2028,7 @@ def resolve_provider_client(
is_agent_turn=True, is_vision=is_vision
)
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base)
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers
@ -1902,7 +2038,8 @@ def resolve_provider_client(
if client is not None:
final_model = _normalize_resolved_model(model or default, provider)
_cbase = str(getattr(client, "base_url", "") or "")
client = _wrap_if_needed(client, final_model, _cbase)
_ckey = str(getattr(client, "api_key", "") or "")
client = _wrap_if_needed(client, final_model, _cbase, _ckey)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
@ -1983,7 +2120,7 @@ def resolve_provider_client(
):
client = CodexAuxiliaryClient(client, final_model)
else:
client = _wrap_if_needed(client, final_model, openai_base)
client = _wrap_if_needed(client, final_model, openai_base, custom_key)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
logger.warning(
@ -2076,8 +2213,11 @@ def resolve_provider_client(
# Honor api_mode for any API-key provider (e.g. direct OpenAI with
# codex-family models). The copilot-specific wrapping above handles
# copilot; this covers the general case (#6800).
client = _wrap_if_needed(client, final_model, base_url)
# copilot; this covers the general case (#6800). Also rewraps
# Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
# /anthropic-suffixed gateways) so named providers like kimi-coding
# land on the right transport without needing per-provider branches.
client = _wrap_if_needed(client, final_model, base_url, api_key)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode

View File

@ -0,0 +1,237 @@
"""Tests for transport auto-detection in agent.auxiliary_client.
Auxiliary clients must pick the correct wire protocol (OpenAI
chat.completions vs native Anthropic Messages) based on the endpoint,
regardless of which resolve_provider_client branch built them.
Regression target (April 2026): Kimi Coding Plan's ``api.kimi.com/coding``
endpoint only speaks Anthropic Messages — sending ``kimi-for-coding`` over
chat.completions returns 404 "resource_not_found_error". The named
``kimi-coding`` provider branch in resolve_provider_client used to build a
plain OpenAI client, so title generation / vision / compression /
web_extract all failed on Kimi Coding Plan users.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
for key in (
"OPENAI_API_KEY", "OPENAI_BASE_URL",
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
"KIMI_API_KEY", "KIMI_CODING_API_KEY", "KIMI_BASE_URL",
):
monkeypatch.delenv(key, raising=False)
# ---------------------------------------------------------------------------
# URL detection helper
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("url,expected,label", [
("https://api.kimi.com/coding/v1", True, "Kimi Coding Plan /v1"),
("https://api.kimi.com/coding", True, "Kimi Coding Plan no /v1"),
("https://api.moonshot.ai/v1", False, "Moonshot legacy"),
("https://api.minimax.io/anthropic", True, "MiniMax /anthropic"),
("https://litellm.example.com/v1/anthropic", True, "/anthropic suffix"),
("https://api.anthropic.com", True, "native Anthropic"),
("https://api.anthropic.com/v1", True, "native Anthropic /v1"),
("https://openrouter.ai/api/v1", False, "OpenRouter"),
("https://api.openai.com/v1", False, "OpenAI"),
("https://inference-api.nousresearch.com/v1", False, "Nous"),
("", False, "empty"),
(None, False, "None"),
])
def test_endpoint_speaks_anthropic_messages(url, expected, label):
from agent.auxiliary_client import _endpoint_speaks_anthropic_messages
assert _endpoint_speaks_anthropic_messages(url) is expected, (
f"{label}: {url!r} should be {expected}"
)
# ---------------------------------------------------------------------------
# _maybe_wrap_anthropic decision table
# ---------------------------------------------------------------------------
def test_maybe_wrap_anthropic_rewraps_kimi_coding_url():
"""Plain OpenAI client pointed at api.kimi.com/coding gets rewrapped."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
fake_anthropic = MagicMock(name="anthropic_sdk_client")
with patch(
"agent.anthropic_adapter.build_anthropic_client",
return_value=fake_anthropic,
):
result = _maybe_wrap_anthropic(
plain_client, "kimi-for-coding", "sk-kimi-test",
"https://api.kimi.com/coding", api_mode=None,
)
assert isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_rewraps_slash_anthropic_url():
"""Plain OpenAI client pointed at any /anthropic URL gets rewrapped."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
fake_anthropic = MagicMock(name="anthropic_sdk_client")
with patch(
"agent.anthropic_adapter.build_anthropic_client",
return_value=fake_anthropic,
):
result = _maybe_wrap_anthropic(
plain_client, "MiniMax-M2.7", "mm-key",
"https://api.minimax.io/anthropic", api_mode=None,
)
assert isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_skips_openai_wire_urls():
"""OpenRouter / OpenAI / Moonshot-legacy stay as plain OpenAI clients."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
# No patch on build_anthropic_client — if the function tried to call it,
# we'd get an AttributeError-style failure. The point is it shouldn't.
result = _maybe_wrap_anthropic(
plain_client, "claude-sonnet-4.6", "sk-or-test",
"https://openrouter.ai/api/v1", api_mode=None,
)
assert result is plain_client
assert not isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_respects_explicit_chat_completions():
"""api_mode=chat_completions overrides URL heuristics."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
result = _maybe_wrap_anthropic(
plain_client, "kimi-for-coding", "sk-kimi-test",
"https://api.kimi.com/coding",
api_mode="chat_completions", # explicit override
)
assert result is plain_client, "Explicit chat_completions must bypass wrap"
assert not isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_honors_explicit_anthropic_messages():
"""api_mode=anthropic_messages wraps even when URL wouldn't trigger."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
fake_anthropic = MagicMock(name="anthropic_sdk_client")
with patch(
"agent.anthropic_adapter.build_anthropic_client",
return_value=fake_anthropic,
):
result = _maybe_wrap_anthropic(
plain_client, "model-name", "some-key",
"https://opaque.internal/v1", # URL alone wouldn't trigger
api_mode="anthropic_messages",
)
assert isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_double_wrap_safe():
"""Already-wrapped AnthropicAuxiliaryClient passes through unchanged."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
already_wrapped = MagicMock(spec=AnthropicAuxiliaryClient)
result = _maybe_wrap_anthropic(
already_wrapped, "model", "key",
"https://api.kimi.com/coding", api_mode=None,
)
assert result is already_wrapped
def test_maybe_wrap_anthropic_codex_client_passes_through():
"""CodexAuxiliaryClient is never re-dispatched."""
from agent.auxiliary_client import (
_maybe_wrap_anthropic,
CodexAuxiliaryClient,
AnthropicAuxiliaryClient,
)
codex_client = MagicMock(spec=CodexAuxiliaryClient)
result = _maybe_wrap_anthropic(
codex_client, "model", "key",
"https://api.kimi.com/coding", api_mode=None,
)
assert result is codex_client
assert not isinstance(result, AnthropicAuxiliaryClient)
def test_maybe_wrap_anthropic_sdk_missing_falls_back():
"""ImportError on anthropic SDK returns plain client with warning."""
from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
plain_client = MagicMock(name="plain_openai")
def _raise_import(*args, **kwargs):
raise ImportError("no anthropic SDK")
with patch(
"agent.anthropic_adapter.build_anthropic_client",
side_effect=_raise_import,
):
# The ImportError is caught on the `from ... import` line inside
# _maybe_wrap_anthropic, which runs before build_anthropic_client is
# called. To exercise the ImportError path we need to patch the
# module lookup itself.
import sys as _sys
saved = _sys.modules.get("agent.anthropic_adapter")
_sys.modules["agent.anthropic_adapter"] = None # force ImportError
try:
result = _maybe_wrap_anthropic(
plain_client, "kimi-for-coding", "sk-kimi-test",
"https://api.kimi.com/coding", api_mode=None,
)
finally:
if saved is not None:
_sys.modules["agent.anthropic_adapter"] = saved
else:
_sys.modules.pop("agent.anthropic_adapter", None)
assert result is plain_client
assert not isinstance(result, AnthropicAuxiliaryClient)
# ---------------------------------------------------------------------------
# Integration: resolve_provider_client for named kimi-coding provider
# ---------------------------------------------------------------------------
def test_resolve_provider_client_kimi_coding_wraps_anthropic(monkeypatch, tmp_path):
"""End-to-end: resolve_provider_client('kimi-coding', 'kimi-for-coding')
must return AnthropicAuxiliaryClient because /coding speaks Anthropic.
This is the primary regression guard: the bug that caused title
generation 404s on every Kimi Coding Plan user after the "main model
for every user" aux design shipped.
"""
from agent.auxiliary_client import (
resolve_provider_client,
AnthropicAuxiliaryClient,
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# sk-kimi- prefix triggers /coding endpoint auto-detection
monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-faketesttoken123")
client, model = resolve_provider_client("kimi-coding", "kimi-for-coding")
assert client is not None, "Should resolve a client"
assert isinstance(client, AnthropicAuxiliaryClient), (
"Kimi Coding Plan endpoint (api.kimi.com/coding) speaks Anthropic "
"Messages — aux client MUST be AnthropicAuxiliaryClient, got "
f"{type(client).__name__}"
)
assert "kimi.com/coding" in str(client.base_url)