From 023f5c74b1bb9251e242c192fefab2cf91cb4427 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 28 Apr 2026 03:51:17 -0700
Subject: [PATCH] fix(anthropic): remove Claude Code fingerprinting from OAuth
 Messages API path (#16957)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(anthropic): remove Claude Code fingerprinting from OAuth Messages API path

OAuth requests now identify as Hermes on the wire. Removed:

  - "You are Claude Code, Anthropic's official CLI for Claude." system
    prompt prepend
  - Hermes Agent → Claude Code / Nous Research → Anthropic
    system-prompt substitutions
  - mcp_ tool-name prefix on outgoing tool schemas + message history
  - Matching mcp_ strip on inbound tool_use blocks (strip_tool_prefix path
    removed from AnthropicTransport.normalize_response, + all 5 call
    sites in run_agent.py and auxiliary_client.py)
  - user-agent: claude-cli/<v> (external, cli) and x-app: cli headers on
    the Messages API client

Added:

  - OAuth path strips context-1m-2025-08-07 — Anthropic rejects OAuth
    requests carrying it with HTTP 400 'This authentication style is
    incompatible with the long context beta header.'

Kept (auth plumbing, not identity spoofing):

  - _is_oauth_token classifier and is_oauth flag threading
  - Bearer vs x-api-key auth routing
  - _OAUTH_ONLY_BETAS (claude-code-20250219, oauth-2025-04-20) — backend
    requires these on the OAuth-gated Messages endpoint
  - _OAUTH_CLIENT_ID (Claude Code's) — Anthropic doesn't issue OAuth
    creds to third parties; this is the only way the login flow works
  - claude-cli/<v> User-Agent on the OAuth token exchange + refresh
    endpoints at platform.claude.com/v1/oauth/token — bare requests get
    Cloudflare 1010 blocked

Verified live against api.anthropic.com with a fresh sk-ant-oat01-*
token:

  - claude-haiku-4-5 simple message: HTTP 200, 'OK' response
  - claude-haiku-4-5 tool call: HTTP 200, stop_reason=tool_use, tool
    named 'terminal' (no mcp_ prefix) round-tripped correctly
  - Outgoing wire: no user-agent, no x-app, real Hermes identity in
    system prompt, real tool name in schema

Closes/supersedes #16820 (mcp_ PascalCase normalization patch — no longer
needed since the mcp_ round-trip is gone).

* fix(anthropic): resolve_anthropic_token() reads credential pool first

Close the gap where ~/.hermes/auth.json → credential_pool.anthropic
(where hermes login + dashboard PKCE flow write OAuth tokens) was not
in resolve_anthropic_token()'s source list.

Before: users who authed via hermes login got the token written into
the pool, but legacy fallback code paths (auxiliary_client, models
catalog fetch, explicit-runtime path) that call resolve_anthropic_token()
saw None and raised 'No Anthropic credentials found' — even though the
token was sitting in auth.json.

New priority 1: pool.select() with env-sourced entries skipped. Skipping
env:* entries preserves the existing env-var priority logic further
down the chain (static env OAuth → refreshable Claude Code upgrade via
_prefer_refreshable_claude_code_token).

Surfaced while writing the hermes-agent-dev skill playbook for
'finding a live OAuth token for an E2E test'.

---------

Co-authored-by: teknium1 <teknium@users.noreply.github.com>
---
 agent/anthropic_adapter.py            | 141 ++++++++++++++------------
 agent/auxiliary_client.py             |   4 +-
 agent/transports/anthropic.py         |   8 +-
 run_agent.py                          |  16 +--
 tests/agent/test_anthropic_adapter.py |  27 +++++
 5 files changed, 107 insertions(+), 89 deletions(-)
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index d7d955b6a..af25b62b0 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -242,10 +242,11 @@ _OAUTH_ONLY_BETAS = [
     "oauth-2025-04-20",
 ]
 
-# Claude Code identity — required for OAuth requests to be routed correctly.
-# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
-# The version must stay reasonably current — Anthropic rejects OAuth requests
-# when the spoofed user-agent version is too far behind the actual release.
+# Claude Code version — sent on OAuth token-exchange / refresh requests
+# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
+# OAuth flow validates the UA and may reject requests with a version that's
+# too old, so detecting dynamically keeps users on a current Claude Code
+# install from hitting stale-version errors during login/refresh.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None
 
@@ -253,9 +254,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
     """Detect the installed Claude Code version, fall back to a static constant.
 
-    Anthropic's OAuth infrastructure validates the user-agent version and may
-    reject requests with a version that's too old.  Detecting dynamically means
-    users who keep Claude Code updated never hit stale-version 400s.
+    Used only by the OAuth token-exchange / refresh flow
+    (``platform.claude.com/v1/oauth/token``). The Messages API client no
+    longer sends a claude-cli user-agent.
     """
     import subprocess as _sp
 
@@ -275,12 +276,13 @@ def _detect_claude_code_version() -> str:
     return _CLAUDE_CODE_VERSION_FALLBACK
 
 
-_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
-_MCP_TOOL_PREFIX = "mcp_"
-
-
 def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version when OAuth headers need it."""
+    """Lazily detect the installed Claude Code version for OAuth flow headers.
+
+    Used only on the OAuth token-exchange and refresh endpoints
+    (``platform.claude.com/v1/oauth/token``). The Messages API client does
+    not send a claude-cli user-agent.
+    """
     global _claude_code_version_cache
     if _claude_code_version_cache is None:
         _claude_code_version_cache = _detect_claude_code_version()
@@ -449,15 +451,21 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
         if common_betas:
             kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
     elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
-        # Anthropic routes OAuth requests based on user-agent and headers;
-        # without Claude Code's fingerprint, requests get intermittent 500s.
-        all_betas = common_betas + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
+        # The OAuth-specific beta headers are still required by Anthropic's
+        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
+        # spoofing is deliberately NOT sent — Hermes identifies as itself.
+        #
+        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
+        # OAuth requests that carry it with
+        #   "This authentication style is incompatible with the long
+        #    context beta header."
+        # Subscription-gated OAuth traffic gets the 200K default window.
+        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
+        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
         kwargs["auth_token"] = api_key
         kwargs["default_headers"] = {
             "anthropic-beta": ",".join(all_betas),
-            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-            "x-app": "cli",
         }
     else:
         # Regular API key → x-api-key header + common betas
@@ -803,17 +811,45 @@ def resolve_anthropic_token() -> Optional[str]:
     """Resolve an Anthropic token from all available sources.
 
     Priority:
-      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      2. CLAUDE_CODE_OAUTH_TOKEN env var
-      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. Hermes credential pool (``~/.hermes/auth.json`` →
+         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
+         own PKCE login flow. Entries are auto-refreshed when near
+         expiry. Env-sourced pool entries (``source="env:..."``) are
+         skipped here so the env-var priority logic below still runs.
+      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      3. CLAUDE_CODE_OAUTH_TOKEN env var
+      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
          — with automatic refresh if expired and a refresh token is available
-      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
 
     Returns the token string or None.
     """
+    # 1. Hermes credential pool — the live source of truth for tokens
+    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
+    #    ``select()`` picks the best available entry and refreshes it if
+    #    it's near expiry, so callers always get a fresh token.
+    #
+    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
+    #    those are passthroughs of the env var, and the env-var branches
+    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
+    #    that can upgrade a static env OAuth token to a refreshed
+    #    Claude Code token. Letting the pool win here would short-circuit
+    #    that upgrade.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("anthropic")
+        entry = pool.select()
+        if entry and entry.access_token and not entry.source.startswith("env:"):
+            return entry.access_token
+    except Exception as exc:
+        # Pool lookup is best-effort — fall through to env/file sources
+        # if anything goes wrong (e.g. auth.json corruption during a
+        # concurrent write).
+        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
+
     creds = read_claude_code_credentials()
 
-    # 1. Hermes-managed OAuth/setup token env var
+    # 2. Hermes-managed OAuth/setup token env var
     token = os.getenv("ANTHROPIC_TOKEN", "").strip()
     if token:
         preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -821,7 +857,7 @@ def resolve_anthropic_token() -> Optional[str]:
             return preferred
         return token
 
-    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
     cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
     if cc_token:
         preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -829,12 +865,12 @@ def resolve_anthropic_token() -> Optional[str]:
             return preferred
         return cc_token
 
-    # 3. Claude Code credential file
+    # 4. Claude Code credential file
     resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
     if resolved_claude_token:
         return resolved_claude_token
 
-    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
     # This remains as a compatibility fallback for pre-migration Hermes configs.
     api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
     if api_key:
@@ -1570,8 +1606,10 @@ def build_anthropic_kwargs(
     "max_tokens too large given prompt" errors and retry with a smaller cap
     (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
 
-    When *is_oauth* is True, applies Claude Code compatibility transforms:
-    system prompt prefix, tool name prefixing, and prompt sanitization.
+    When *is_oauth* is True, enables the OAuth-only beta headers required by
+    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
+    the default headers are set by build_anthropic_client). No system-prompt
+    or tool-name rewriting is performed — Hermes identifies as itself.
 
     When *preserve_dots* is True, model name dots are not converted to hyphens
     (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1604,45 +1642,11 @@ def build_anthropic_kwargs(
     if context_length and effective_max_tokens > context_length:
         effective_max_tokens = max(context_length - 1, 1)
 
-    # ── OAuth: Claude Code identity ──────────────────────────────────
-    if is_oauth:
-        # 1. Prepend Claude Code system prompt identity
-        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
-        if isinstance(system, list):
-            system = [cc_block] + system
-        elif isinstance(system, str) and system:
-            system = [cc_block, {"type": "text", "text": system}]
-        else:
-            system = [cc_block]
-
-        # 2. Sanitize system prompt — replace product name references
-        #    to avoid Anthropic's server-side content filters.
-        for block in system:
-            if isinstance(block, dict) and block.get("type") == "text":
-                text = block.get("text", "")
-                text = text.replace("Hermes Agent", "Claude Code")
-                text = text.replace("Hermes agent", "Claude Code")
-                text = text.replace("hermes-agent", "claude-code")
-                text = text.replace("Nous Research", "Anthropic")
-                block["text"] = text
-
-        # 3. Prefix tool names with mcp_ (Claude Code convention)
-        if anthropic_tools:
-            for tool in anthropic_tools:
-                if "name" in tool:
-                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
-
-        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
-        for msg in anthropic_messages:
-            content = msg.get("content")
-            if isinstance(content, list):
-                for block in content:
-                    if isinstance(block, dict):
-                        if block.get("type") == "tool_use" and "name" in block:
-                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
-                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
-                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
-                            pass  # tool_result uses ID, not name
+    # OAuth requests go through Anthropic's subscription-gated Messages
+    # endpoint but otherwise send the real Hermes system prompt and real
+    # Hermes tool names — the only OAuth-specific wire differences are
+    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
+    # build_anthropic_client and the fast-mode branch below).
 
     kwargs: Dict[str, Any] = {
         "model": model,
@@ -1733,6 +1737,9 @@ def build_anthropic_kwargs(
         # extra_headers override the client-level anthropic-beta header).
         betas = list(_common_betas_for_base_url(base_url))
         if is_oauth:
+            # Strip context-1m — incompatible with OAuth auth. See matching
+            # comment in build_anthropic_client().
+            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
             betas.extend(_OAUTH_ONLY_BETAS)
         betas.append(_FAST_MODE_BETA)
         kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 74fede3e6..73f716f32 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -639,9 +639,7 @@ class _AnthropicCompletionsAdapter:
 
         response = self._client.messages.create(**anthropic_kwargs)
         _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(
-            response, strip_tool_prefix=self._is_oauth
-        )
+        _nr = _transport.normalize_response(response)
 
         # ToolCall already duck-types as OpenAI shape (.type, .function.name,
         # .function.arguments) via properties, so no wrapping needed.
diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py
index 66c485b52..5ecc8a29d 100644
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -85,9 +85,6 @@ class AnthropicTransport(ProviderTransport):
         from agent.anthropic_adapter import _to_plain_data
         from agent.transports.types import ToolCall
 
-        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp_"
-
         text_parts = []
         reasoning_parts = []
         reasoning_details = []
@@ -102,13 +99,10 @@ class AnthropicTransport(ProviderTransport):
                 if isinstance(block_dict, dict):
                     reasoning_details.append(block_dict)
             elif block.type == "tool_use":
-                name = block.name
-                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
                 tool_calls.append(
                     ToolCall(
                         id=block.id,
-                        name=name,
+                        name=block.name,
                         arguments=json.dumps(block.input),
                     )
                 )
diff --git a/run_agent.py b/run_agent.py
index 387a6d00e..b4cf70625 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9706,7 +9706,7 @@ class AIAgent:
                                    is_oauth=self._is_anthropic_oauth,
                                    preserve_dots=self._anthropic_preserve_dots())
                     summary_response = self._anthropic_messages_create(_ant_kw)
-                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    _summary_result = _tsum.normalize_response(summary_response)
                     final_response = (_summary_result.content or "").strip()
                 else:
                     summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
@@ -9736,7 +9736,7 @@ class AIAgent:
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                     preserve_dots=self._anthropic_preserve_dots())
                     retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    _retry_result = _tretry.normalize_response(retry_response)
                     final_response = (_retry_result.content or "").strip()
                 else:
                     summary_kwargs = {
@@ -10852,12 +10852,7 @@ class AIAgent:
                         # would have been appended in the non-truncated path.
                         _trunc_msg = None
                         _trunc_transport = self._get_transport()
-                        if self.api_mode == "anthropic_messages":
-                            _trunc_result = _trunc_transport.normalize_response(
-                                response, strip_tool_prefix=self._is_anthropic_oauth
-                            )
-                        else:
-                            _trunc_result = _trunc_transport.normalize_response(response)
+                        _trunc_result = _trunc_transport.normalize_response(response)
                         _trunc_msg = _trunc_result
 
                         _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
@@ -12195,10 +12190,7 @@ class AIAgent:
 
             try:
                 _transport = self._get_transport()
-                _normalize_kwargs = {}
-                if self.api_mode == "anthropic_messages":
-                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
-                normalized = _transport.normalize_response(response, **_normalize_kwargs)
+                normalized = _transport.normalize_response(response)
                 assistant_message = normalized
                 finish_reason = normalized.finish_reason
                 
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index e2c1cd1d2..32d24666b 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -68,6 +68,33 @@ class TestBuildAnthropicClient:
             assert "fine-grained-tool-streaming-2025-05-14" in betas
             assert "api_key" not in kwargs
 
+    def test_oauth_does_not_send_claude_code_spoof_headers(self):
+        """OAuth requests identify as Hermes — no claude-cli UA, no x-app: cli.
+
+        Anthropic's OAuth-gated Messages API accepts requests from non-Claude-Code
+        clients as long as auth is correct and the OAuth beta headers are present.
+        See commit that removed fingerprinting for the live-test write-up.
+        """
+        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
+            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
+            headers = mock_sdk.Anthropic.call_args[1]["default_headers"]
+            assert "user-agent" not in {k.lower() for k in headers}
+            assert "x-app" not in {k.lower() for k in headers}
+
+    def test_oauth_strips_context_1m_beta(self):
+        """context-1m-2025-08-07 is incompatible with OAuth auth — must be stripped.
+
+        Anthropic returns HTTP 400 "This authentication style is incompatible
+        with the long context beta header." when OAuth traffic carries it.
+        """
+        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
+            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
+            betas = mock_sdk.Anthropic.call_args[1]["default_headers"]["anthropic-beta"]
+            assert "context-1m-2025-08-07" not in betas
+            # But other common betas still flow through
+            assert "interleaved-thinking-2025-05-14" in betas
+            assert "oauth-2025-04-20" in betas
+
     def test_api_key_uses_api_key(self):
         with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
             build_anthropic_client("sk-ant-api03-something")