fix(anthropic): complete third-party Anthropic-compatible provider support (#12846)

Third-party gateways that speak the native Anthropic protocol (MiniMax,
Zhipu GLM, Alibaba DashScope, Kimi, LiteLLM proxies) now work end-to-end
with the same feature set as direct api.anthropic.com callers.  Synthesizes
eight stale community PRs into one consolidated change.

Five fixes:

- URL detection: consolidate three inline `endswith("/anthropic")`
  checks in runtime_provider.py into the shared _detect_api_mode_for_url
  helper.  Third-party /anthropic endpoints now auto-resolve to
  api_mode=anthropic_messages via one code path instead of three.

- OAuth leak-guard: all five sites that assign `_is_anthropic_oauth`
  (__init__, switch_model, _try_refresh_anthropic_client_credentials,
  _swap_credential, _try_activate_fallback) now gate on
  `provider == "anthropic"` so a stale ANTHROPIC_TOKEN never trips
  Claude-Code identity injection on third-party endpoints.  Previously
  only 2 of 5 sites were guarded.

- Prompt caching: new method `_anthropic_prompt_cache_policy()` returns
  `(should_cache, use_native_layout)` per endpoint.  Replaces three
  inline conditions and the `native_anthropic=(api_mode=='anthropic_messages')`
  call-site flag.  Native Anthropic and third-party Anthropic gateways
  both get the native cache_control layout; OpenRouter gets envelope
  layout.  Layout is persisted in `_primary_runtime` so fallback
  restoration preserves the per-endpoint choice.

- Auxiliary client: `_try_custom_endpoint` honors
  `api_mode=anthropic_messages` and builds `AnthropicAuxiliaryClient`
  instead of silently downgrading to an OpenAI-wire client.  Degrades
  gracefully to OpenAI-wire when the anthropic SDK isn't installed.

- Config hygiene: `_update_config_for_provider` (hermes_cli/auth.py)
  clears stale `api_key`/`api_mode` when switching to a built-in
  provider, so a previous MiniMax custom endpoint's credentials can't
  leak into a later OpenRouter session.

- Truncation continuation: length-continuation and tool-call-truncation
  retry now cover `anthropic_messages` in addition to `chat_completions`
  and `bedrock_converse`.  Reuses the existing `_build_assistant_message`
  path via `normalize_anthropic_response()` so the interim message
  shape is byte-identical to the non-truncated path.

Tests: 6 new files, 42 test cases.  Targeted run + tests/run_agent,
tests/agent, tests/hermes_cli all pass (4554 passed).

Synthesized from (credits preserved via Co-authored-by trailers):
  #7410  @nocoo           — URL detection helper
  #7393  @keyuyuan        — OAuth 5-site guard
  #7367  @n-WN            — OAuth guard (narrower cousin, kept comment)
  #8636  @sgaofen         — caching helper + native-vs-proxy layout split
  #10954 @Only-Code-A     — caching on anthropic_messages+Claude
  #7648  @zhongyueming1121 — aux client anthropic_messages branch
  #6096  @hansnow         — /model switch clears stale api_mode
  #9691  @TroyMitchell911 — anthropic_messages truncation continuation

Closes: #7366, #8294 (third-party Anthropic identity + caching).
Supersedes: #7410, #7367, #7393, #8636, #10954, #7648, #6096, #9691.
Rejects:    #9621 (OpenAI-wire caching with incomplete blocklist — risky),
            #7242 (superseded by #9691, stale branch),
            #8321 (targets smart_model_routing which was removed in #12732).

Co-authored-by: nocoo <nocoo@users.noreply.github.com>
Co-authored-by: Keyu Yuan <leoyuan0099@gmail.com>
Co-authored-by: Zoee <30841158+n-WN@users.noreply.github.com>
Co-authored-by: sgaofen <135070653+sgaofen@users.noreply.github.com>
Co-authored-by: Only-Code-A <bxzt2006@163.com>
Co-authored-by: zhongyueming <mygamez@163.com>
Co-authored-by: Xiaohan Li <hansnow@users.noreply.github.com>
Co-authored-by: Troy Mitchell <i@troy-y.org>
This commit is contained in:
Teknium
2026-04-19 22:43:09 -07:00
committed by GitHub
parent 491cf25eef
commit 65a31ee0d5
11 changed files with 911 additions and 58 deletions

View File

@ -1098,7 +1098,7 @@ def _validate_base_url(base_url: str) -> None:
) from exc
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
runtime = _resolve_custom_runtime()
if len(runtime) == 2:
custom_base, custom_key = runtime
@ -1114,6 +1114,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
if custom_mode == "codex_responses":
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
# LiteLLM proxies, etc.). Must NEVER be treated as OAuth —
# Anthropic OAuth claims only apply to api.anthropic.com.
try:
from agent.anthropic_adapter import build_anthropic_client
real_client = build_anthropic_client(custom_key, custom_base)
except ImportError:
logger.warning(
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return OpenAI(api_key=custom_key, base_url=custom_base), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
return OpenAI(api_key=custom_key, base_url=custom_base), model

View File

@ -2732,6 +2732,17 @@ def _update_config_for_provider(
# Clear stale base_url to prevent contamination when switching providers
model_cfg.pop("base_url", None)
# Clear stale api_key/api_mode left over from a previous custom provider.
# When the user switches from e.g. a MiniMax custom endpoint
# (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider
# (e.g. OpenRouter), the stale api_key/api_mode would override the new
# provider's credentials and transport choice. Built-in providers that
# need a specific api_mode (copilot, xai) set it at request-resolution
# time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so
# removing the persisted value here is safe.
model_cfg.pop("api_key", None)
model_cfg.pop("api_mode", None)
# When switching to a non-OpenRouter provider, ensure model.default is
# valid for the new provider. An OpenRouter-formatted name like
# "anthropic/claude-opus-4.6" will fail on direct-API providers.

View File

@ -38,14 +38,21 @@ def _normalize_custom_provider_name(value: str) -> str:
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
"""Auto-detect api_mode from the resolved base URL.
Direct api.openai.com endpoints need the Responses API for GPT-5.x
tool calls with reasoning (chat/completions returns 400).
- Direct api.openai.com endpoints need the Responses API for GPT-5.x
tool calls with reasoning (chat/completions returns 400).
- Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM,
LiteLLM proxies, etc.) conventionally expose the native Anthropic
protocol under a ``/anthropic`` suffix — treat those as
``anthropic_messages`` transport instead of the default
``chat_completions``.
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if "api.x.ai" in normalized:
return "codex_responses"
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
if normalized.endswith("/anthropic"):
return "anthropic_messages"
return None
@ -194,8 +201,12 @@ def _resolve_runtime_from_pool_entry(
elif provider in ("opencode-zen", "opencode-go"):
from hermes_cli.models import opencode_model_api_mode
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
elif base_url.rstrip("/").endswith("/anthropic"):
api_mode = "anthropic_messages"
else:
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
# api.openai.com → codex_responses, api.x.ai → codex_responses).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
@ -642,8 +653,11 @@ def _resolve_explicit_runtime(
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
api_mode = configured_mode
elif base_url.rstrip("/").endswith("/anthropic"):
api_mode = "anthropic_messages"
else:
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
return {
"provider": provider,
@ -965,10 +979,13 @@ def resolve_runtime_provider(
elif provider in ("opencode-zen", "opencode-go"):
from hermes_cli.models import opencode_model_api_mode
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
# Auto-detect Anthropic-compatible endpoints by URL convention
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
elif base_url.rstrip("/").endswith("/anthropic"):
api_mode = "anthropic_messages"
else:
# Auto-detect Anthropic-compatible endpoints by URL convention
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
# plus api.openai.com → codex_responses and api.x.ai → codex_responses.
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
# Strip trailing /v1 for OpenCode Anthropic models (see comment above).
if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
base_url = re.sub(r"/v1/?$", "", base_url)

View File

@ -892,13 +892,15 @@ class AIAgent:
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
self._force_ascii_payload = False
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
# Reduces input costs by ~75% on multi-turn conversations by caching the
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
is_openrouter = self._is_openrouter_url()
is_claude = "claude" in self.model.lower()
is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic"
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
# Anthropic prompt caching: auto-enabled for Claude models on native
# Anthropic, OpenRouter, and third-party gateways that speak the
# Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces
# input costs by ~75% on multi-turn conversations. Uses system_and_3
# strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy``
# for the layout-vs-transport decision.
self._use_prompt_caching, self._use_native_cache_layout = (
self._anthropic_prompt_cache_policy()
)
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
# Iteration budget: the LLM is only notified when it actually exhausts
@ -1013,8 +1015,15 @@ class AIAgent:
self.api_key = effective_key
self._anthropic_api_key = effective_key
self._anthropic_base_url = base_url
# Only mark the session as OAuth-authenticated when the token
# genuinely belongs to native Anthropic. Third-party providers
# (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the
# Anthropic protocol must never trip OAuth code paths — doing
# so injects Claude-Code identity headers and system prompts
# that cause 401/403 on their endpoints. Guards #1739 and
# the third-party identity-injection bug.
from agent.anthropic_adapter import _is_oauth_token as _is_oat
self._is_anthropic_oauth = _is_oat(effective_key)
self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
# No OpenAI client needed for Anthropic mode
self.client = None
@ -1227,7 +1236,12 @@ class AIAgent:
# Show prompt caching status
if self._use_prompt_caching and not self.quiet_mode:
source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
if self._use_native_cache_layout and self.provider == "anthropic":
source = "native Anthropic"
elif self._use_native_cache_layout:
source = "Anthropic-compatible endpoint"
else:
source = "Claude via OpenRouter"
print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
# Session logging setup - auto-save conversation trajectories for debugging
@ -1701,6 +1715,7 @@ class AIAgent:
"api_key": getattr(self, "api_key", ""),
"client_kwargs": dict(self._client_kwargs),
"use_prompt_caching": self._use_prompt_caching,
"use_native_cache_layout": self._use_native_cache_layout,
# Context engine state that _try_activate_fallback() overwrites.
# Use getattr for model/base_url/api_key/provider since plugin
# engines may not have these (they're ContextCompressor-specific).
@ -1822,7 +1837,7 @@ class AIAgent:
effective_key, self._anthropic_base_url,
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = _is_oauth_token(effective_key)
self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
self.client = None
self._client_kwargs = {}
else:
@ -1842,10 +1857,13 @@ class AIAgent:
)
# ── Re-evaluate prompt caching ──
is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic"
self._use_prompt_caching = (
("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower())
or is_native_anthropic
self._use_prompt_caching, self._use_native_cache_layout = (
self._anthropic_prompt_cache_policy(
provider=new_provider,
base_url=self.base_url,
api_mode=api_mode,
model=new_model,
)
)
# ── Update context compressor ──
@ -1880,6 +1898,7 @@ class AIAgent:
"api_key": getattr(self, "api_key", ""),
"client_kwargs": dict(self._client_kwargs),
"use_prompt_caching": self._use_prompt_caching,
"use_native_cache_layout": self._use_native_cache_layout,
"compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
"compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
"compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
@ -2143,6 +2162,55 @@ class AIAgent:
"""Return True when the base URL targets OpenRouter."""
return "openrouter" in self._base_url_lower
def _anthropic_prompt_cache_policy(
self,
*,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_mode: Optional[str] = None,
model: Optional[str] = None,
) -> tuple[bool, bool]:
"""Decide whether to apply Anthropic prompt caching and which layout to use.
Returns ``(should_cache, use_native_layout)``:
* ``should_cache`` — inject ``cache_control`` breakpoints for this
request (applies to OpenRouter Claude, native Anthropic, and
third-party gateways that speak the native Anthropic protocol).
* ``use_native_layout`` — place markers on the *inner* content
blocks (native Anthropic accepts and requires this layout);
when False markers go on the message envelope (OpenRouter and
OpenAI-wire proxies expect the looser layout).
Third-party providers using the native Anthropic transport
(``api_mode == 'anthropic_messages'`` + Claude-named model) get
caching with the native layout so they benefit from the same
cost reduction as direct Anthropic callers, provided their
gateway implements the Anthropic cache_control contract
(MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
"""
eff_provider = (provider if provider is not None else self.provider) or ""
eff_base_url = base_url if base_url is not None else (self.base_url or "")
eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
eff_model = (model if model is not None else self.model) or ""
base_lower = eff_base_url.lower()
is_claude = "claude" in eff_model.lower()
is_openrouter = "openrouter" in base_lower
is_anthropic_wire = eff_api_mode == "anthropic_messages"
is_native_anthropic = (
is_anthropic_wire
and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower)
)
if is_native_anthropic:
return True, True
if is_openrouter and is_claude:
return True, False
if is_anthropic_wire and is_claude:
# Third-party Anthropic-compatible gateway.
return True, True
return False, False
@staticmethod
def _model_requires_responses_api(model: str) -> bool:
"""Return True for models that require the Responses API path.
@ -5322,9 +5390,12 @@ class AIAgent:
return False
self._anthropic_api_key = new_token
# Update OAuth flag — token type may have changed (API key ↔ OAuth)
# Update OAuth flag — token type may have changed (API key ↔ OAuth).
# Only treat as OAuth on native Anthropic; third-party endpoints using
# the Anthropic protocol must not trip OAuth paths (#1739 & third-party
# identity-injection guard).
from agent.anthropic_adapter import _is_oauth_token
self._is_anthropic_oauth = _is_oauth_token(new_token)
self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False
return True
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
@ -5367,7 +5438,7 @@ class AIAgent:
runtime_key, runtime_base,
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = _is_oauth_token(runtime_key)
self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
self.api_key = runtime_key
self.base_url = runtime_base
return
@ -6523,7 +6594,7 @@ class AIAgent:
self._anthropic_client = build_anthropic_client(
effective_key, self._anthropic_base_url, timeout=_fb_timeout,
)
self._is_anthropic_oauth = _is_oauth_token(effective_key)
self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False
self.client = None
self._client_kwargs = {}
else:
@ -6554,10 +6625,13 @@ class AIAgent:
self._replace_primary_openai_client(reason="fallback_timeout_apply")
# Re-evaluate prompt caching for the new provider/model
is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
self._use_prompt_caching = (
("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower())
or is_native_anthropic
self._use_prompt_caching, self._use_native_cache_layout = (
self._anthropic_prompt_cache_policy(
provider=fb_provider,
base_url=fb_base_url,
api_mode=fb_api_mode,
model=fb_model,
)
)
# Update context compressor limits for the fallback model.
@ -6617,6 +6691,12 @@ class AIAgent:
self.api_key = rt["api_key"]
self._client_kwargs = dict(rt["client_kwargs"])
self._use_prompt_caching = rt["use_prompt_caching"]
# Default to native layout when the restored snapshot predates the
# native-vs-proxy split (older sessions saved before this PR).
self._use_native_cache_layout = rt.get(
"use_native_cache_layout",
self.api_mode == "anthropic_messages" and self.provider == "anthropic",
)
# ── Rebuild client for the primary provider ──
if self.api_mode == "anthropic_messages":
@ -9317,12 +9397,19 @@ class AIAgent:
for idx, pfm in enumerate(self.prefill_messages):
api_messages.insert(sys_offset + idx, pfm.copy())
# Apply Anthropic prompt caching for Claude models via OpenRouter.
# Auto-detected: if model name contains "claude" and base_url is OpenRouter,
# inject cache_control breakpoints (system + last 3 messages) to reduce
# input token costs by ~75% on multi-turn conversations.
# Apply Anthropic prompt caching for Claude models on native
# Anthropic, OpenRouter, and third-party Anthropic-compatible
# gateways. Auto-detected: if ``_use_prompt_caching`` is set,
# inject cache_control breakpoints (system + last 3 messages)
# to reduce input token costs by ~75% on multi-turn
# conversations. Layout is chosen per endpoint by
# ``_anthropic_prompt_cache_policy``.
if self._use_prompt_caching:
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
api_messages = apply_anthropic_cache_control(
api_messages,
cache_ttl=self._cache_ttl,
native_anthropic=self._use_native_cache_layout,
)
# Safety net: strip orphaned tool results / add stubs for missing
# results before sending to the API. Runs unconditionally — not
@ -9779,25 +9866,30 @@ class AIAgent:
if finish_reason == "length":
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
# Normalize the truncated response to a single OpenAI-style
# message shape so text-continuation and tool-call retry
# work uniformly across chat_completions, bedrock_converse,
# and anthropic_messages. For Anthropic we use the same
# adapter the agent loop already relies on so the rebuilt
# interim assistant message is byte-identical to what
# would have been appended in the non-truncated path.
_trunc_msg = None
if self.api_mode in ("chat_completions", "bedrock_converse"):
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
elif self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import normalize_anthropic_response
_trunc_msg, _ = normalize_anthropic_response(
response, strip_tool_prefix=self._is_anthropic_oauth
)
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
# ── Detect thinking-budget exhaustion ──────────────
# When the model spends ALL output tokens on reasoning
# and has none left for the response, continuation
# retries are pointless. Detect this early and give a
# targeted error instead of wasting 3 API calls.
_trunc_content = None
_trunc_has_tool_calls = False
if self.api_mode in ("chat_completions", "bedrock_converse"):
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
elif self.api_mode == "anthropic_messages":
# Anthropic response.content is a list of blocks
_text_parts = []
for _blk in getattr(response, "content", []):
if getattr(_blk, "type", None) == "text":
_text_parts.append(getattr(_blk, "text", ""))
_trunc_content = "\n".join(_text_parts) if _text_parts else None
# A response is "thinking exhausted" only when the model
# actually produced reasoning blocks but no visible text after
# them. Models that do not use <think> tags (e.g. GLM-4.7 on
@ -9854,9 +9946,9 @@ class AIAgent:
"error": _exhaust_error,
}
if self.api_mode in ("chat_completions", "bedrock_converse"):
assistant_message = response.choices[0].message
if not assistant_message.tool_calls:
if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
assistant_message = _trunc_msg
if assistant_message is not None and not _trunc_has_tool_calls:
length_continue_retries += 1
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
messages.append(interim_msg)
@ -9894,9 +9986,9 @@ class AIAgent:
"error": "Response remained truncated after 3 continuation attempts",
}
if self.api_mode in ("chat_completions", "bedrock_converse"):
assistant_message = response.choices[0].message
if assistant_message.tool_calls:
if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
assistant_message = _trunc_msg
if assistant_message is not None and _trunc_has_tool_calls:
if truncated_tool_call_retries < 1:
truncated_tool_call_retries += 1
self._vprint(

View File

@ -80,6 +80,13 @@ AUTHOR_MAP = {
"nish3451@users.noreply.github.com": "nish3451",
"Mibayy@users.noreply.github.com": "Mibayy",
"135070653+sgaofen@users.noreply.github.com": "sgaofen",
"nocoo@users.noreply.github.com": "nocoo",
"30841158+n-WN@users.noreply.github.com": "n-WN",
"leoyuan0099@gmail.com": "keyuyuan",
"bxzt2006@163.com": "Only-Code-A",
"i@troy-y.org": "TroyMitchell911",
"mygamez@163.com": "zhongyueming1121",
"hansnow@users.noreply.github.com": "hansnow",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",

View File

@ -0,0 +1,107 @@
"""Tests for agent.auxiliary_client._try_custom_endpoint's anthropic_messages branch.
When a user configures a custom endpoint with ``api_mode: anthropic_messages``
(e.g. MiniMax, Zhipu GLM, LiteLLM in Anthropic-proxy mode), auxiliary tasks
(compression, web_extract, session_search, title generation) must use the
native Anthropic transport rather than being silently downgraded to an
OpenAI-wire client that speaks the wrong protocol.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
for key in (
"OPENAI_API_KEY", "OPENAI_BASE_URL",
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
):
monkeypatch.delenv(key, raising=False)
def _install_anthropic_adapter_mocks():
"""Patch build_anthropic_client so the test doesn't need the SDK."""
fake_client = MagicMock(name="anthropic_client")
return patch(
"agent.anthropic_adapter.build_anthropic_client",
return_value=fake_client,
), fake_client
def test_custom_endpoint_anthropic_messages_builds_anthropic_wrapper():
"""api_mode=anthropic_messages → returns AnthropicAuxiliaryClient, not OpenAI."""
from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
with patch(
"agent.auxiliary_client._resolve_custom_runtime",
return_value=(
"https://api.minimax.io/anthropic",
"minimax-key",
"anthropic_messages",
),
), patch(
"agent.auxiliary_client._read_main_model",
return_value="claude-sonnet-4-6",
):
adapter_patch, fake_client = _install_anthropic_adapter_mocks()
with adapter_patch:
client, model = _try_custom_endpoint()
assert isinstance(client, AnthropicAuxiliaryClient), (
"Custom endpoint with api_mode=anthropic_messages must return the "
f"native Anthropic wrapper, got {type(client).__name__}"
)
assert model == "claude-sonnet-4-6"
# Wrapper should NOT be marked as OAuth — third-party endpoints are
# always API-key authenticated.
assert client.api_key == "minimax-key"
assert client.base_url == "https://api.minimax.io/anthropic"
def test_custom_endpoint_anthropic_messages_falls_back_when_sdk_missing():
"""Graceful degradation when anthropic SDK is unavailable."""
from agent.auxiliary_client import _try_custom_endpoint
import_error = ImportError("anthropic package not installed")
with patch(
"agent.auxiliary_client._resolve_custom_runtime",
return_value=("https://api.minimax.io/anthropic", "k", "anthropic_messages"),
), patch(
"agent.auxiliary_client._read_main_model",
return_value="claude-sonnet-4-6",
), patch(
"agent.anthropic_adapter.build_anthropic_client",
side_effect=import_error,
):
client, model = _try_custom_endpoint()
# Should fall back to an OpenAI-wire client rather than returning
# (None, None) — the tool still needs to do *something*.
assert client is not None
assert model == "claude-sonnet-4-6"
# OpenAI client, not AnthropicAuxiliaryClient.
from agent.auxiliary_client import AnthropicAuxiliaryClient
assert not isinstance(client, AnthropicAuxiliaryClient)
def test_custom_endpoint_chat_completions_still_uses_openai_wire():
"""Regression: default path (no api_mode) must remain OpenAI client."""
from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
with patch(
"agent.auxiliary_client._resolve_custom_runtime",
return_value=("https://api.example.com/v1", "key", None),
), patch(
"agent.auxiliary_client._read_main_model",
return_value="my-model",
):
client, model = _try_custom_endpoint()
assert client is not None
assert model == "my-model"
assert not isinstance(client, AnthropicAuxiliaryClient)

View File

@ -0,0 +1,70 @@
"""Tests for hermes_cli.runtime_provider._detect_api_mode_for_url.
The helper maps base URLs to api_modes for three cases:
* api.openai.com → codex_responses
* api.x.ai → codex_responses
* */anthropic → anthropic_messages (third-party gateways like MiniMax,
Zhipu GLM, LiteLLM proxies)
Consolidating the /anthropic detection in this helper (instead of three
inline ``endswith`` checks spread across _resolve_runtime_from_pool_entry,
the explicit-provider path, and the api-key-provider path) means every
future update to the detection logic lives in one place.
"""
from __future__ import annotations
from hermes_cli.runtime_provider import _detect_api_mode_for_url
class TestCodexResponsesDetection:
def test_openai_api_returns_codex_responses(self):
assert _detect_api_mode_for_url("https://api.openai.com/v1") == "codex_responses"
def test_xai_api_returns_codex_responses(self):
assert _detect_api_mode_for_url("https://api.x.ai/v1") == "codex_responses"
def test_openrouter_is_not_codex_responses(self):
# api.openai.com check must exclude openrouter (which routes to openai-hosted models).
assert _detect_api_mode_for_url("https://openrouter.ai/api/v1") is None
class TestAnthropicMessagesDetection:
"""Third-party gateways that speak the Anthropic protocol under /anthropic."""
def test_minimax_anthropic_endpoint(self):
assert _detect_api_mode_for_url("https://api.minimax.io/anthropic") == "anthropic_messages"
def test_minimax_cn_anthropic_endpoint(self):
assert _detect_api_mode_for_url("https://api.minimaxi.com/anthropic") == "anthropic_messages"
def test_dashscope_anthropic_endpoint(self):
assert (
_detect_api_mode_for_url("https://dashscope.aliyuncs.com/api/v2/apps/anthropic")
== "anthropic_messages"
)
def test_trailing_slash_tolerated(self):
assert _detect_api_mode_for_url("https://api.minimax.io/anthropic/") == "anthropic_messages"
def test_uppercase_path_tolerated(self):
assert _detect_api_mode_for_url("https://API.MINIMAX.IO/Anthropic") == "anthropic_messages"
def test_anthropic_in_middle_of_path_does_not_match(self):
# The helper requires ``/anthropic`` as the path SUFFIX, not anywhere.
# Protects against false positives on e.g. /anthropic/v1/models.
assert _detect_api_mode_for_url("https://api.example.com/anthropic/v1") is None
class TestDefaultCase:
def test_generic_url_returns_none(self):
assert _detect_api_mode_for_url("https://api.together.xyz/v1") is None
def test_empty_string_returns_none(self):
assert _detect_api_mode_for_url("") is None
def test_none_returns_none(self):
assert _detect_api_mode_for_url(None) is None
def test_localhost_returns_none(self):
assert _detect_api_mode_for_url("http://localhost:11434/v1") is None

View File

@ -0,0 +1,84 @@
"""Tests for hermes_cli.auth._update_config_for_provider clearing stale fields.
When the user switches from a custom provider (e.g. MiniMax with
``api_mode: anthropic_messages``, ``api_key: mxp-...``) to a built-in
provider (e.g. OpenRouter), the stale ``api_key`` and ``api_mode`` would
otherwise override the new provider's credentials and transport choice.
Built-in providers that legitimately need a specific ``api_mode`` (copilot,
xai) compute it at request-resolution time in
``_copilot_runtime_api_mode`` / ``_detect_api_mode_for_url``, so removing
the persisted value here is safe.
"""
from __future__ import annotations
import yaml
from hermes_cli.auth import _update_config_for_provider
from hermes_cli.config import get_config_path
def _read_model_cfg() -> dict:
path = get_config_path()
if not path.exists():
return {}
data = yaml.safe_load(path.read_text()) or {}
model = data.get("model", {})
return model if isinstance(model, dict) else {}
def _seed_custom_provider_config(api_mode: str = "anthropic_messages") -> None:
"""Write a config.yaml mimicking a user on a MiniMax-style custom provider."""
path = get_config_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
yaml.safe_dump(
{
"model": {
"provider": "custom",
"base_url": "https://api.minimax.io/anthropic",
"api_key": "mxp-stale-key",
"api_mode": api_mode,
"default": "claude-sonnet-4-6",
}
},
sort_keys=False,
)
)
class TestUpdateConfigForProviderClearsStaleCustomFields:
def test_switching_to_openrouter_clears_api_key_and_api_mode(self):
_seed_custom_provider_config()
_update_config_for_provider(
"openrouter",
"https://openrouter.ai/api/v1",
default_model="anthropic/claude-sonnet-4.6",
)
model_cfg = _read_model_cfg()
assert model_cfg.get("provider") == "openrouter"
assert model_cfg.get("base_url") == "https://openrouter.ai/api/v1"
assert "api_key" not in model_cfg, (
"Stale custom api_key would leak into OpenRouter requests — must be cleared"
)
assert "api_mode" not in model_cfg, (
"Stale api_mode=anthropic_messages from MiniMax would mis-route "
"OpenRouter requests to the Anthropic SDK — must be cleared"
)
def test_switching_to_nous_clears_stale_api_mode(self):
_seed_custom_provider_config()
_update_config_for_provider("nous", "https://inference-api.nousresearch.com/v1")
model_cfg = _read_model_cfg()
assert model_cfg.get("provider") == "nous"
assert "api_mode" not in model_cfg
assert "api_key" not in model_cfg
def test_switching_clears_codex_responses_api_mode(self):
"""Also covers codex_responses, not just anthropic_messages."""
_seed_custom_provider_config(api_mode="codex_responses")
_update_config_for_provider("openrouter", "https://openrouter.ai/api/v1")
assert "api_mode" not in _read_model_cfg()

View File

@ -0,0 +1,152 @@
"""Tests for AIAgent._anthropic_prompt_cache_policy().
The policy returns ``(should_cache, use_native_layout)`` for five endpoint
classes. The test matrix pins the decision for each so a regression (e.g.
silently dropping caching on third-party Anthropic gateways, or applying
the native layout on OpenRouter) surfaces loudly.
"""
from __future__ import annotations
from unittest.mock import MagicMock
from run_agent import AIAgent
def _make_agent(
*,
provider: str = "openrouter",
base_url: str = "https://openrouter.ai/api/v1",
api_mode: str = "chat_completions",
model: str = "anthropic/claude-sonnet-4.6",
) -> AIAgent:
agent = AIAgent.__new__(AIAgent)
agent.provider = provider
agent.base_url = base_url
agent.api_mode = api_mode
agent.model = model
agent._base_url_lower = (base_url or "").lower()
agent.client = MagicMock()
agent.quiet_mode = True
return agent
class TestNativeAnthropic:
def test_claude_on_native_anthropic_caches_with_native_layout(self):
agent = _make_agent(
provider="anthropic",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
model="claude-sonnet-4-6",
)
assert agent._anthropic_prompt_cache_policy() == (True, True)
def test_api_anthropic_host_detected_even_when_provider_label_differs(self):
# Some pool configurations label native Anthropic as "anthropic-direct"
# or similar; falling back to hostname keeps caching on.
agent = _make_agent(
provider="anthropic-direct",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
model="claude-opus-4.6",
)
assert agent._anthropic_prompt_cache_policy() == (True, True)
class TestOpenRouter:
def test_claude_on_openrouter_caches_with_envelope_layout(self):
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4.6",
)
should, native = agent._anthropic_prompt_cache_policy()
assert should is True
assert native is False # OpenRouter uses envelope layout
def test_non_claude_on_openrouter_does_not_cache(self):
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="openai/gpt-5.4",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestThirdPartyAnthropicGateway:
"""Third-party gateways speaking the Anthropic protocol (MiniMax, Zhipu GLM, LiteLLM)."""
def test_minimax_claude_via_anthropic_messages(self):
agent = _make_agent(
provider="custom",
base_url="https://api.minimax.io/anthropic",
api_mode="anthropic_messages",
model="claude-sonnet-4-6",
)
should, native = agent._anthropic_prompt_cache_policy()
assert should is True, "Third-party Anthropic gateway with Claude must cache"
assert native is True, "Third-party Anthropic gateway uses native cache_control layout"
def test_third_party_without_claude_name_does_not_cache(self):
# A provider exposing e.g. GLM via anthropic_messages transport — we
# don't know whether it supports cache_control, so stay conservative.
agent = _make_agent(
provider="custom",
base_url="https://api.minimax.io/anthropic",
api_mode="anthropic_messages",
model="minimax-m2.7",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestOpenAIWireFormatOnCustomProvider:
"""A custom provider using chat_completions (OpenAI wire) should NOT get caching."""
def test_custom_openai_wire_does_not_cache_even_with_claude_name(self):
# This is the blocklist risk #9621 failed to avoid: sending
# cache_control fields in OpenAI-wire JSON can trip strict providers
# that reject unknown keys. Stay off unless the transport is
# explicitly anthropic_messages or the aggregator is OpenRouter.
agent = _make_agent(
provider="custom",
base_url="https://api.fireworks.ai/inference/v1",
api_mode="chat_completions",
model="claude-sonnet-4",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestExplicitOverrides:
"""Policy accepts keyword overrides for switch_model / fallback activation."""
def test_overrides_take_precedence_over_self(self):
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="openai/gpt-5.4",
)
# Simulate switch_model evaluating cache policy for a Claude target
# before self.model is mutated.
should, native = agent._anthropic_prompt_cache_policy(
model="anthropic/claude-sonnet-4.6",
)
assert (should, native) == (True, False)
def test_fallback_target_evaluated_independently(self):
# Starting on native Anthropic but falling back to OpenRouter.
agent = _make_agent(
provider="anthropic",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
model="claude-opus-4.6",
)
should, native = agent._anthropic_prompt_cache_policy(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4.6",
)
assert (should, native) == (True, False)

View File

@ -0,0 +1,182 @@
"""Tests for ``_is_anthropic_oauth`` guard against third-party Anthropic-compatible providers.
The invariant: ``self._is_anthropic_oauth`` must only ever be True when
``self.provider == 'anthropic'`` (native Anthropic). Third-party providers
that speak the Anthropic protocol (MiniMax, Zhipu GLM, Alibaba DashScope,
Kimi, LiteLLM proxies, etc.) must never trip OAuth code paths — doing so
injects Claude-Code identity headers and system prompts that cause
401/403 from those endpoints.
This test class covers all FIVE sites that assign ``_is_anthropic_oauth``:
1. ``AIAgent.__init__`` (line ~1022)
2. ``AIAgent.switch_model`` (line ~1832)
3. ``AIAgent._try_refresh_anthropic_client_credentials`` (line ~5335)
4. ``AIAgent._swap_credential`` (line ~5378)
5. ``AIAgent._try_activate_fallback`` (line ~6536)
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from run_agent import AIAgent
# A plausible-looking OAuth token (``sk-ant-`` without the ``-api`` suffix).
_OAUTH_LIKE_TOKEN = "sk-ant-oauth-example-1234567890abcdef"
_API_KEY_TOKEN = "sk-ant-api-abcdef1234567890"
@pytest.fixture
def agent():
"""Minimal AIAgent construction, skipping tool discovery."""
with (
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
a = AIAgent(
api_key="test-key-1234567890",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
return a
class TestOAuthFlagOnRefresh:
"""Site 3 — _try_refresh_anthropic_client_credentials."""
def test_third_party_provider_refresh_is_noop(self, agent):
"""Refresh path returns False immediately when provider != anthropic — the
OAuth flag can never be mutated for third-party providers. Double-defended
by the per-assignment guard at line ~5393 so future refactors can't
reintroduce the bug."""
agent.api_mode = "anthropic_messages"
agent.provider = "minimax" # ← third-party
agent._anthropic_api_key = "***"
agent._anthropic_client = MagicMock()
agent._is_anthropic_oauth = False
with (
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value=_OAUTH_LIKE_TOKEN),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
):
result = agent._try_refresh_anthropic_client_credentials()
# The function short-circuits on non-anthropic providers.
assert result is False
# And the flag is untouched regardless.
assert agent._is_anthropic_oauth is False
def test_native_anthropic_preserves_existing_oauth_behaviour(self, agent):
"""Regression: native anthropic with OAuth token still flips flag to True."""
agent.api_mode = "anthropic_messages"
agent.provider = "anthropic"
agent._anthropic_api_key = "***"
agent._anthropic_client = MagicMock()
agent._is_anthropic_oauth = False
with (
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value=_OAUTH_LIKE_TOKEN),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
):
result = agent._try_refresh_anthropic_client_credentials()
assert result is True
assert agent._is_anthropic_oauth is True
class TestOAuthFlagOnCredentialSwap:
"""Site 4 — _swap_credential (credential pool rotation)."""
def test_pool_swap_on_third_party_never_flips_oauth(self, agent):
agent.api_mode = "anthropic_messages"
agent.provider = "glm" # ← Zhipu GLM via /anthropic
agent._anthropic_api_key = "old-key"
agent._anthropic_base_url = "https://open.bigmodel.cn/api/anthropic"
agent._anthropic_client = MagicMock()
agent._is_anthropic_oauth = False
entry = MagicMock()
entry.runtime_api_key = _OAUTH_LIKE_TOKEN
entry.runtime_base_url = "https://open.bigmodel.cn/api/anthropic"
with patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()):
agent._swap_credential(entry)
assert agent._is_anthropic_oauth is False
class TestOAuthFlagOnConstruction:
"""Site 1 — AIAgent.__init__ on a third-party anthropic_messages provider."""
def test_minimax_init_does_not_flip_oauth(self):
with (
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
# Simulate a stale ANTHROPIC_TOKEN in the env — the init code
# MUST NOT fall back to it when provider != anthropic.
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value=_OAUTH_LIKE_TOKEN),
):
agent = AIAgent(
api_key="minimax-key-1234",
base_url="https://api.minimax.io/anthropic",
provider="minimax",
api_mode="anthropic_messages",
model="claude-sonnet-4-6",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
# The effective key should be the explicit minimax-key, not the
# stale Anthropic OAuth token, and the OAuth flag must be False.
assert agent._anthropic_api_key == "minimax-key-1234"
assert agent._is_anthropic_oauth is False
class TestOAuthFlagOnFallbackActivation:
"""Site 5 — _try_activate_fallback targeting a third-party Anthropic endpoint."""
def test_fallback_to_third_party_does_not_flip_oauth(self, agent):
"""Directly mimic the post-fallback assignment at line ~6537."""
from agent.anthropic_adapter import _is_oauth_token
# Emulate the relevant lines of _try_activate_fallback without
# running the entire recovery stack (which pulls in streaming,
# sessions, etc.).
fb_provider = "minimax"
effective_key = _OAUTH_LIKE_TOKEN
agent._is_anthropic_oauth = (
_is_oauth_token(effective_key) if fb_provider == "anthropic" else False
)
assert agent._is_anthropic_oauth is False
class TestApiKeyTokensAlwaysSafe:
"""Regression: plain API-key shapes must always resolve to non-OAuth, any provider."""
def test_native_anthropic_with_api_key_token(self):
from agent.anthropic_adapter import _is_oauth_token
assert _is_oauth_token(_API_KEY_TOKEN) is False
def test_third_party_key_shape(self):
from agent.anthropic_adapter import _is_oauth_token
# Third-party key shapes (MiniMax 'mxp-...', GLM 'glm.sess.', etc.)
# already return False from _is_oauth_token; the guard adds a second
# defense line in case future token formats accidentally look OAuth-y.
assert _is_oauth_token("mxp-abcdef123") is False

View File

@ -0,0 +1,114 @@
"""Regression test for anthropic_messages truncation continuation.
When an Anthropic response hits ``stop_reason: max_tokens`` (mapped to
``finish_reason == 'length'`` in run_agent), the agent must retry with
a continuation prompt — the same behavior it has always had for
chat_completions and bedrock_converse. Before this PR, the
``if self.api_mode in ('chat_completions', 'bedrock_converse'):`` guard
silently dropped Anthropic-wire truncations on the floor, returning a
half-finished response with no retry.
We don't exercise the full agent loop here (it's 3000 lines of inference,
streaming, plugin hooks, etc.) — instead we verify the normalization
adapter produces exactly the shape the continuation block now consumes.
"""
from __future__ import annotations
from types import SimpleNamespace
import pytest
def _make_anthropic_text_block(text: str) -> SimpleNamespace:
return SimpleNamespace(type="text", text=text)
def _make_anthropic_tool_use_block(name: str = "my_tool") -> SimpleNamespace:
return SimpleNamespace(
type="tool_use",
id="toolu_01",
name=name,
input={"foo": "bar"},
)
def _make_anthropic_response(blocks, stop_reason: str = "max_tokens"):
return SimpleNamespace(
id="msg_01",
type="message",
role="assistant",
model="claude-sonnet-4-6",
content=blocks,
stop_reason=stop_reason,
stop_sequence=None,
usage=SimpleNamespace(input_tokens=100, output_tokens=200),
)
class TestTruncatedAnthropicResponseNormalization:
"""normalize_anthropic_response() gives us the shape _build_assistant_message expects."""
def test_text_only_truncation_produces_text_content_no_tool_calls(self):
"""Pure-text Anthropic truncation → continuation path should fire."""
from agent.anthropic_adapter import normalize_anthropic_response
response = _make_anthropic_response(
[_make_anthropic_text_block("partial response that was cut off")]
)
msg, finish = normalize_anthropic_response(response)
# The continuation block checks these two attributes:
# assistant_message.content → appended to truncated_response_prefix
# assistant_message.tool_calls → guards the text-retry branch
assert msg.content is not None
assert "partial response" in msg.content
assert not msg.tool_calls, (
"Pure-text truncation must have no tool_calls so the text-continuation "
"branch (not the tool-retry branch) fires"
)
assert finish == "length", "max_tokens stop_reason must map to OpenAI-style 'length'"
def test_truncated_tool_call_produces_tool_calls(self):
"""Tool-use truncation → tool-call retry path should fire."""
from agent.anthropic_adapter import normalize_anthropic_response
response = _make_anthropic_response(
[
_make_anthropic_text_block("thinking..."),
_make_anthropic_tool_use_block(),
]
)
msg, finish = normalize_anthropic_response(response)
assert bool(msg.tool_calls), (
"Truncation mid-tool_use must expose tool_calls so the "
"tool-call retry branch fires instead of text continuation"
)
assert finish == "length"
def test_empty_content_does_not_crash(self):
"""Empty response.content — defensive: treat as a truncation with no text."""
from agent.anthropic_adapter import normalize_anthropic_response
response = _make_anthropic_response([])
msg, finish = normalize_anthropic_response(response)
# Depending on the adapter, content may be "" or None — both are
# acceptable; what matters is no exception.
assert msg is not None
assert not msg.tool_calls
class TestContinuationLogicBranching:
"""Symbolic check that the api_mode gate now includes anthropic_messages."""
@pytest.mark.parametrize("api_mode", ["chat_completions", "bedrock_converse", "anthropic_messages"])
def test_all_three_api_modes_hit_continuation_branch(self, api_mode):
# The guard in run_agent.py is:
# if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
assert api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages")
def test_codex_responses_still_excluded(self):
# codex_responses has its own truncation path (not continuation-based)
# and should NOT be routed through the shared block.
assert "codex_responses" not in ("chat_completions", "bedrock_converse", "anthropic_messages")