fix: strip extra_content from tool_calls for strict APIs (Fireworks, Mistral)

Fireworks/Mistral reject HTTP 400 'Extra inputs are not permitted, field: messages[N].tool_calls[M].extra_content' on any session whose history contains prior Gemini tool calls. Gemini 3 thinking models attach extra_content (thought_signature) to tool_calls; it survived to the wire because the sanitize paths only stripped call_id/response_item_id. Strip extra_content from the outgoing wire copy in both sanitize paths (ChatCompletionsTransport.convert_messages + _sanitize_tool_calls_for_strict_api), but gate it on the target model: keep extra_content for Gemini-family targets (the thought_signature MUST be replayed or Gemini 400s), strip it for everyone else — including non-Gemini models that inherit a stale Gemini signature earlier in a mixed-provider session. Native Gemini is unaffected (GeminiNativeClient bypasses these paths). Original stored history is never mutated (only the per-call copy). Fixes #17986.
2026-06-03 16:23:48 -07:00
parent ec69c767ff
commit e8c3ac2f5c
6 changed files with 173 additions and 9 deletions
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -1296,7 +1296,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
            for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
                api_msg.pop(internal_key, None)
            if _needs_sanitize:
-                agent._sanitize_tool_calls_for_strict_api(api_msg)
+                agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
            api_messages.append(api_msg)

        effective_system = agent._cached_system_prompt or ""
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -982,7 +982,7 @@ def run_conversation(
            # Uses new dicts so the internal messages list retains the fields
            # for Codex Responses compatibility.
            if agent._should_sanitize_tool_calls():
-                agent._sanitize_tool_calls_for_strict_api(api_msg)
+                agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
            # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
            # The signature field helps maintain reasoning continuity
            api_messages.append(api_msg)
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@ -99,6 +99,22 @@ def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
    return normalized.endswith("/openai")


+def _model_consumes_thought_signature(model: Any) -> bool:
+    """True when the outgoing model is a Gemini family model that requires
+    ``extra_content`` (thought_signature) to be replayed on tool calls.
+
+    Gemini 3 thinking models attach ``extra_content`` to each tool call and
+    reject subsequent requests with HTTP 400 if it is missing. Every other
+    strict OpenAI-compatible provider (Fireworks, Mistral, ...) rejects the
+    request with 400 if ``extra_content`` *is* present. So the field must be
+    kept only when the target model is itself Gemini-family, and stripped
+    otherwise — including when a non-Gemini model inherits stale Gemini
+    ``extra_content`` from earlier in a mixed-provider session.
+    """
+    m = str(model or "").lower()
+    return "gemini" in m or "gemma" in m
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@ -119,6 +135,14 @@ class ChatCompletionsTransport(ProviderTransport):
        - Codex Responses API fields: ``codex_reasoning_items`` /
          ``codex_message_items`` on the message, ``call_id`` /
          ``response_item_id`` on ``tool_calls`` entries.
+        - ``extra_content`` on ``tool_calls`` (Gemini thought_signature) —
+          stripped unless the outgoing ``model`` is itself Gemini-family.
+          Gemini 3 thinking models attach it for replay, but strict providers
+          (Fireworks, Mistral) reject any payload containing it with
+          ``Extra inputs are not permitted, field: 'messages[N].tool_calls[M].extra_content'``.
+          It must be kept for Gemini targets (replay required) and dropped for
+          everyone else, including non-Gemini models that inherited stale
+          Gemini ``extra_content`` earlier in a mixed-provider session.
        - ``tool_name`` on tool-result messages — written by
          ``make_tool_result_message()`` for the SQLite FTS index, but not
          part of the Chat Completions schema. Strict providers (Fireworks,
@ -137,6 +161,9 @@ class ChatCompletionsTransport(ProviderTransport):
          ``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
          which then poisons every subsequent request in the session.
        """
+        strip_extra_content = not _model_consumes_thought_signature(
+            kwargs.get("model")
+        )
        needs_sanitize = False
        for msg in messages:
            if not isinstance(msg, dict):
@ -155,7 +182,9 @@ class ChatCompletionsTransport(ProviderTransport):
            if isinstance(tool_calls, list):
                for tc in tool_calls:
                    if isinstance(tc, dict) and (
-                        "call_id" in tc or "response_item_id" in tc
+                        "call_id" in tc
+                        or "response_item_id" in tc
+                        or (strip_extra_content and "extra_content" in tc)
                    ):
                        needs_sanitize = True
                        break
@ -183,6 +212,8 @@ class ChatCompletionsTransport(ProviderTransport):
                    if isinstance(tc, dict):
                        tc.pop("call_id", None)
                        tc.pop("response_item_id", None)
+                        if strip_extra_content:
+                            tc.pop("extra_content", None)
        return sanitized

    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
@ -240,8 +271,10 @@ class ChatCompletionsTransport(ProviderTransport):
            anthropic_max_output: int | None
            extra_body_additions: dict | None
        """
-        # Codex sanitization: drop reasoning_items / call_id / response_item_id
-        sanitized = self.convert_messages(messages)
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id.
+        # Pass model so the Gemini thought_signature (extra_content) is kept for
+        # Gemini targets and stripped for strict non-Gemini providers.
+        sanitized = self.convert_messages(messages, model=model)

        # ── Provider profile: single-path when present ──────────────────
        _profile = params.get("provider_profile")
--- a/run_agent.py
+++ b/run_agent.py
@ -4627,7 +4627,7 @@ class AIAgent:
        return reapply_reasoning_echo_for_provider(self, api_messages)

    @staticmethod
-    def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
+    def _sanitize_tool_calls_for_strict_api(api_msg: dict, model: "str | None" = None) -> dict:
        """Strip Codex Responses API fields from tool_calls for strict providers.

        Providers like Mistral, Fireworks, and other strict OpenAI-compatible APIs
@ -4636,17 +4636,26 @@ class AIAgent:
        the internal message history — this method only modifies the outgoing
        API copy.

+        ``extra_content`` (Gemini thought_signature) is also stripped — strict
+        providers reject it with "Extra inputs are not permitted" — UNLESS the
+        outgoing ``model`` is itself Gemini-family, in which case it must be
+        replayed (Gemini 3 thinking models 400 without it). Defaults to
+        stripping when no model is supplied.
+
        Creates new tool_call dicts rather than mutating in-place, so the
        original messages list retains call_id/response_item_id for Codex
        Responses API compatibility (e.g. if the session falls back to a
        Codex provider later).

-        Fields stripped: call_id, response_item_id
+        Fields stripped: call_id, response_item_id, extra_content (model-gated)
        """
        tool_calls = api_msg.get("tool_calls")
        if not isinstance(tool_calls, list):
            return api_msg
+        from agent.transports.chat_completions import _model_consumes_thought_signature
        _STRIP_KEYS = {"call_id", "response_item_id"}
+        if not _model_consumes_thought_signature(model):
+            _STRIP_KEYS = _STRIP_KEYS | {"extra_content"}
        api_msg["tool_calls"] = [
            {k: v for k, v in tc.items() if k not in _STRIP_KEYS}
            if isinstance(tc, dict) else tc
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@ -46,6 +46,44 @@ class TestChatCompletionsBasic:
        assert "codex_reasoning_items" in msgs[0]
        assert "codex_message_items" in msgs[0]

+    def _msg_with_extra_content(self):
+        return [
+            {"role": "assistant", "content": "ok",
+             "tool_calls": [{"id": "call_1", "type": "function",
+                             "extra_content": {"google": {"thought_signature": "SIG_123"}},
+                             "function": {"name": "t", "arguments": "{}"}}]},
+        ]
+
+    def test_convert_messages_strips_extra_content_for_strict_provider(self, transport):
+        """Strict providers (Fireworks, Mistral) reject extra_content on
+        tool_calls with HTTP 400. When the outgoing model is NOT Gemini-family,
+        the Gemini thought_signature must be stripped — including stale
+        signatures inherited from earlier in a mixed-provider session.
+        """
+        msgs = self._msg_with_extra_content()
+        result = transport.convert_messages(msgs, model="accounts/fireworks/models/llama-v3p1-70b")
+        assert "extra_content" not in result[0]["tool_calls"][0]
+        # Original list untouched (deepcopy-on-demand)
+        assert "extra_content" in msgs[0]["tool_calls"][0]
+
+    def test_convert_messages_strips_extra_content_when_model_unknown(self, transport):
+        """Default (no model supplied) is to strip — safe for strict providers."""
+        msgs = self._msg_with_extra_content()
+        result = transport.convert_messages(msgs)
+        assert "extra_content" not in result[0]["tool_calls"][0]
+
+    def test_convert_messages_keeps_extra_content_for_gemini(self, transport):
+        """Gemini 3 thinking models require the thought_signature replayed on
+        every turn — stripping it would 400. Keep extra_content for Gemini
+        targets (including aggregator slugs like google/gemini-3-pro).
+        """
+        for model in ("gemini-3-pro", "google/gemini-3-pro-preview", "gemma-3-27b"):
+            msgs = self._msg_with_extra_content()
+            result = transport.convert_messages(msgs, model=model)
+            assert result[0]["tool_calls"][0]["extra_content"] == {
+                "google": {"thought_signature": "SIG_123"}
+            }, model
+
    def test_convert_messages_strips_tool_name(self, transport):
        """Internal `tool_name` (used for FTS indexing in the SQLite store) is
        not part of the OpenAI Chat Completions schema. Strict providers like
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -148,14 +148,57 @@ class TestBuildApiKwargsOpenRouter:
        assert "codex_reasoning_items" not in assistant_msg
        assert tool_call["id"] == "call_123"
        assert tool_call["function"]["name"] == "terminal"
-        assert tool_call["extra_content"] == {"thought_signature": "opaque"}
+        # extra_content (Gemini thought_signature) is stripped for non-Gemini
+        # targets — strict providers like Fireworks 400 on it. The agent here
+        # is not a Gemini model, so it must be dropped.
+        assert "extra_content" not in tool_call
+        assert "call_id" not in tool_call
+        assert "response_item_id" not in tool_call
+
+        # Original stored history must remain unchanged (only the outgoing copy
+        # is sanitized) — Codex/Responses replay relies on these fields.
+        assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
+        assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
+        assert "codex_reasoning_items" in messages[1]
+        assert messages[1]["tool_calls"][0]["extra_content"] == {"thought_signature": "opaque"}
+
+    def test_keeps_extra_content_for_gemini_target(self, monkeypatch):
+        """Gemini-family targets must keep extra_content (thought_signature) —
+        Gemini 3 thinking models 400 without it replayed on the next turn.
+        """
+        agent = _make_agent(monkeypatch, "openrouter", model="google/gemini-3-pro-preview")
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "content": "Checking now.",
+                "tool_calls": [
+                    {
+                        "id": "call_123",
+                        "call_id": "call_123",
+                        "response_item_id": "fc_123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"},
+                        "extra_content": {"google": {"thought_signature": "opaque"}},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_123", "content": "/tmp"},
+        ]
+
+        kwargs = agent._build_api_kwargs(messages)
+        tool_call = kwargs["messages"][1]["tool_calls"][0]
+        assert tool_call["extra_content"] == {"google": {"thought_signature": "opaque"}}
+        # call_id/response_item_id still stripped regardless of model
        assert "call_id" not in tool_call
        assert "response_item_id" not in tool_call

        # Original stored history must remain unchanged for Responses replay mode.
        assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
        assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
-        assert "codex_reasoning_items" in messages[1]
+        assert messages[1]["tool_calls"][0]["extra_content"] == {
+            "google": {"thought_signature": "opaque"}
+        }

    def test_gemini_native_passes_base_url_for_top_level_thinking_config(self, monkeypatch):
        agent = _make_agent(
@ -204,6 +247,47 @@ class TestBuildApiKwargsOpenRouter:
        anthropic_agent.api_mode = "anthropic_messages"
        assert anthropic_agent._should_sanitize_tool_calls() is True

+    def _api_msg_with_extra_content(self):
+        return {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {"id": "call_1", "call_id": "call_1", "type": "function",
+                 "extra_content": {"google": {"thought_signature": "SIG_123"}},
+                 "function": {"name": "t", "arguments": "{}"}},
+            ],
+        }
+
+    def test_sanitize_tool_calls_strips_extra_content_for_strict_model(self, monkeypatch):
+        """Strict providers reject extra_content; strip it for non-Gemini models."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        api_msg = self._api_msg_with_extra_content()
+        result = agent._sanitize_tool_calls_for_strict_api(
+            api_msg, model="accounts/fireworks/models/llama-v3p1-70b"
+        )
+        assert "extra_content" not in result["tool_calls"][0]
+        assert "call_id" not in result["tool_calls"][0]
+
+    def test_sanitize_tool_calls_strips_extra_content_when_model_none(self, monkeypatch):
+        """Default (no model) strips extra_content — safe for strict providers."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        api_msg = self._api_msg_with_extra_content()
+        result = agent._sanitize_tool_calls_for_strict_api(api_msg)
+        assert "extra_content" not in result["tool_calls"][0]
+
+    def test_sanitize_tool_calls_keeps_extra_content_for_gemini(self, monkeypatch):
+        """Gemini thinking models 400 without the replayed thought_signature."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        api_msg = self._api_msg_with_extra_content()
+        result = agent._sanitize_tool_calls_for_strict_api(
+            api_msg, model="google/gemini-3-pro-preview"
+        )
+        assert result["tool_calls"][0]["extra_content"] == {
+            "google": {"thought_signature": "SIG_123"}
+        }
+        # call_id/response_item_id still stripped regardless of model
+        assert "call_id" not in result["tool_calls"][0]
+

 class TestDeveloperRoleSwap:
    """GPT-5 and Codex models should get 'developer' instead of 'system' role."""