diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index cc7427950..496e74e39 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -1296,7 +1296,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str: for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]: api_msg.pop(internal_key, None) if _needs_sanitize: - agent._sanitize_tool_calls_for_strict_api(api_msg) + agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model) api_messages.append(api_msg) effective_system = agent._cached_system_prompt or "" diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 0cc842285..8be763513 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -982,7 +982,7 @@ def run_conversation( # Uses new dicts so the internal messages list retains the fields # for Codex Responses compatibility. if agent._should_sanitize_tool_calls(): - agent._sanitize_tool_calls_for_strict_api(api_msg) + agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model) # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context # The signature field helps maintain reasoning continuity api_messages.append(api_msg) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 259b1b0ca..7b1935528 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -99,6 +99,22 @@ def _is_gemini_openai_compat_base_url(base_url: Any) -> bool: return normalized.endswith("/openai") +def _model_consumes_thought_signature(model: Any) -> bool: + """True when the outgoing model is a Gemini family model that requires + ``extra_content`` (thought_signature) to be replayed on tool calls. + + Gemini 3 thinking models attach ``extra_content`` to each tool call and + reject subsequent requests with HTTP 400 if it is missing. Every other + strict OpenAI-compatible provider (Fireworks, Mistral, ...) rejects the + request with 400 if ``extra_content`` *is* present. So the field must be + kept only when the target model is itself Gemini-family, and stripped + otherwise — including when a non-Gemini model inherits stale Gemini + ``extra_content`` from earlier in a mixed-provider session. + """ + m = str(model or "").lower() + return "gemini" in m or "gemma" in m + + class ChatCompletionsTransport(ProviderTransport): """Transport for api_mode='chat_completions'. @@ -119,6 +135,14 @@ class ChatCompletionsTransport(ProviderTransport): - Codex Responses API fields: ``codex_reasoning_items`` / ``codex_message_items`` on the message, ``call_id`` / ``response_item_id`` on ``tool_calls`` entries. + - ``extra_content`` on ``tool_calls`` (Gemini thought_signature) — + stripped unless the outgoing ``model`` is itself Gemini-family. + Gemini 3 thinking models attach it for replay, but strict providers + (Fireworks, Mistral) reject any payload containing it with + ``Extra inputs are not permitted, field: 'messages[N].tool_calls[M].extra_content'``. + It must be kept for Gemini targets (replay required) and dropped for + everyone else, including non-Gemini models that inherited stale + Gemini ``extra_content`` earlier in a mixed-provider session. - ``tool_name`` on tool-result messages — written by ``make_tool_result_message()`` for the SQLite FTS index, but not part of the Chat Completions schema. Strict providers (Fireworks, @@ -137,6 +161,9 @@ class ChatCompletionsTransport(ProviderTransport): ``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``, which then poisons every subsequent request in the session. """ + strip_extra_content = not _model_consumes_thought_signature( + kwargs.get("model") + ) needs_sanitize = False for msg in messages: if not isinstance(msg, dict): @@ -155,7 +182,9 @@ class ChatCompletionsTransport(ProviderTransport): if isinstance(tool_calls, list): for tc in tool_calls: if isinstance(tc, dict) and ( - "call_id" in tc or "response_item_id" in tc + "call_id" in tc + or "response_item_id" in tc + or (strip_extra_content and "extra_content" in tc) ): needs_sanitize = True break @@ -183,6 +212,8 @@ class ChatCompletionsTransport(ProviderTransport): if isinstance(tc, dict): tc.pop("call_id", None) tc.pop("response_item_id", None) + if strip_extra_content: + tc.pop("extra_content", None) return sanitized def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]: @@ -240,8 +271,10 @@ class ChatCompletionsTransport(ProviderTransport): anthropic_max_output: int | None extra_body_additions: dict | None """ - # Codex sanitization: drop reasoning_items / call_id / response_item_id - sanitized = self.convert_messages(messages) + # Codex sanitization: drop reasoning_items / call_id / response_item_id. + # Pass model so the Gemini thought_signature (extra_content) is kept for + # Gemini targets and stripped for strict non-Gemini providers. + sanitized = self.convert_messages(messages, model=model) # ── Provider profile: single-path when present ────────────────── _profile = params.get("provider_profile") diff --git a/run_agent.py b/run_agent.py index d0d029343..6d0b370e6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4627,7 +4627,7 @@ class AIAgent: return reapply_reasoning_echo_for_provider(self, api_messages) @staticmethod - def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: + def _sanitize_tool_calls_for_strict_api(api_msg: dict, model: "str | None" = None) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. Providers like Mistral, Fireworks, and other strict OpenAI-compatible APIs @@ -4636,17 +4636,26 @@ class AIAgent: the internal message history — this method only modifies the outgoing API copy. + ``extra_content`` (Gemini thought_signature) is also stripped — strict + providers reject it with "Extra inputs are not permitted" — UNLESS the + outgoing ``model`` is itself Gemini-family, in which case it must be + replayed (Gemini 3 thinking models 400 without it). Defaults to + stripping when no model is supplied. + Creates new tool_call dicts rather than mutating in-place, so the original messages list retains call_id/response_item_id for Codex Responses API compatibility (e.g. if the session falls back to a Codex provider later). - Fields stripped: call_id, response_item_id + Fields stripped: call_id, response_item_id, extra_content (model-gated) """ tool_calls = api_msg.get("tool_calls") if not isinstance(tool_calls, list): return api_msg + from agent.transports.chat_completions import _model_consumes_thought_signature _STRIP_KEYS = {"call_id", "response_item_id"} + if not _model_consumes_thought_signature(model): + _STRIP_KEYS = _STRIP_KEYS | {"extra_content"} api_msg["tool_calls"] = [ {k: v for k, v in tc.items() if k not in _STRIP_KEYS} if isinstance(tc, dict) else tc diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 9f3a205f8..255d46f43 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -46,6 +46,44 @@ class TestChatCompletionsBasic: assert "codex_reasoning_items" in msgs[0] assert "codex_message_items" in msgs[0] + def _msg_with_extra_content(self): + return [ + {"role": "assistant", "content": "ok", + "tool_calls": [{"id": "call_1", "type": "function", + "extra_content": {"google": {"thought_signature": "SIG_123"}}, + "function": {"name": "t", "arguments": "{}"}}]}, + ] + + def test_convert_messages_strips_extra_content_for_strict_provider(self, transport): + """Strict providers (Fireworks, Mistral) reject extra_content on + tool_calls with HTTP 400. When the outgoing model is NOT Gemini-family, + the Gemini thought_signature must be stripped — including stale + signatures inherited from earlier in a mixed-provider session. + """ + msgs = self._msg_with_extra_content() + result = transport.convert_messages(msgs, model="accounts/fireworks/models/llama-v3p1-70b") + assert "extra_content" not in result[0]["tool_calls"][0] + # Original list untouched (deepcopy-on-demand) + assert "extra_content" in msgs[0]["tool_calls"][0] + + def test_convert_messages_strips_extra_content_when_model_unknown(self, transport): + """Default (no model supplied) is to strip — safe for strict providers.""" + msgs = self._msg_with_extra_content() + result = transport.convert_messages(msgs) + assert "extra_content" not in result[0]["tool_calls"][0] + + def test_convert_messages_keeps_extra_content_for_gemini(self, transport): + """Gemini 3 thinking models require the thought_signature replayed on + every turn — stripping it would 400. Keep extra_content for Gemini + targets (including aggregator slugs like google/gemini-3-pro). + """ + for model in ("gemini-3-pro", "google/gemini-3-pro-preview", "gemma-3-27b"): + msgs = self._msg_with_extra_content() + result = transport.convert_messages(msgs, model=model) + assert result[0]["tool_calls"][0]["extra_content"] == { + "google": {"thought_signature": "SIG_123"} + }, model + def test_convert_messages_strips_tool_name(self, transport): """Internal `tool_name` (used for FTS indexing in the SQLite store) is not part of the OpenAI Chat Completions schema. Strict providers like diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index 523c5b09d..c99ab433d 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -148,14 +148,57 @@ class TestBuildApiKwargsOpenRouter: assert "codex_reasoning_items" not in assistant_msg assert tool_call["id"] == "call_123" assert tool_call["function"]["name"] == "terminal" - assert tool_call["extra_content"] == {"thought_signature": "opaque"} + # extra_content (Gemini thought_signature) is stripped for non-Gemini + # targets — strict providers like Fireworks 400 on it. The agent here + # is not a Gemini model, so it must be dropped. + assert "extra_content" not in tool_call + assert "call_id" not in tool_call + assert "response_item_id" not in tool_call + + # Original stored history must remain unchanged (only the outgoing copy + # is sanitized) — Codex/Responses replay relies on these fields. + assert messages[1]["tool_calls"][0]["call_id"] == "call_123" + assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123" + assert "codex_reasoning_items" in messages[1] + assert messages[1]["tool_calls"][0]["extra_content"] == {"thought_signature": "opaque"} + + def test_keeps_extra_content_for_gemini_target(self, monkeypatch): + """Gemini-family targets must keep extra_content (thought_signature) — + Gemini 3 thinking models 400 without it replayed on the next turn. + """ + agent = _make_agent(monkeypatch, "openrouter", model="google/gemini-3-pro-preview") + messages = [ + {"role": "user", "content": "hi"}, + { + "role": "assistant", + "content": "Checking now.", + "tool_calls": [ + { + "id": "call_123", + "call_id": "call_123", + "response_item_id": "fc_123", + "type": "function", + "function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"}, + "extra_content": {"google": {"thought_signature": "opaque"}}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_123", "content": "/tmp"}, + ] + + kwargs = agent._build_api_kwargs(messages) + tool_call = kwargs["messages"][1]["tool_calls"][0] + assert tool_call["extra_content"] == {"google": {"thought_signature": "opaque"}} + # call_id/response_item_id still stripped regardless of model assert "call_id" not in tool_call assert "response_item_id" not in tool_call # Original stored history must remain unchanged for Responses replay mode. assert messages[1]["tool_calls"][0]["call_id"] == "call_123" assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123" - assert "codex_reasoning_items" in messages[1] + assert messages[1]["tool_calls"][0]["extra_content"] == { + "google": {"thought_signature": "opaque"} + } def test_gemini_native_passes_base_url_for_top_level_thinking_config(self, monkeypatch): agent = _make_agent( @@ -204,6 +247,47 @@ class TestBuildApiKwargsOpenRouter: anthropic_agent.api_mode = "anthropic_messages" assert anthropic_agent._should_sanitize_tool_calls() is True + def _api_msg_with_extra_content(self): + return { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "call_id": "call_1", "type": "function", + "extra_content": {"google": {"thought_signature": "SIG_123"}}, + "function": {"name": "t", "arguments": "{}"}}, + ], + } + + def test_sanitize_tool_calls_strips_extra_content_for_strict_model(self, monkeypatch): + """Strict providers reject extra_content; strip it for non-Gemini models.""" + agent = _make_agent(monkeypatch, "openrouter") + api_msg = self._api_msg_with_extra_content() + result = agent._sanitize_tool_calls_for_strict_api( + api_msg, model="accounts/fireworks/models/llama-v3p1-70b" + ) + assert "extra_content" not in result["tool_calls"][0] + assert "call_id" not in result["tool_calls"][0] + + def test_sanitize_tool_calls_strips_extra_content_when_model_none(self, monkeypatch): + """Default (no model) strips extra_content — safe for strict providers.""" + agent = _make_agent(monkeypatch, "openrouter") + api_msg = self._api_msg_with_extra_content() + result = agent._sanitize_tool_calls_for_strict_api(api_msg) + assert "extra_content" not in result["tool_calls"][0] + + def test_sanitize_tool_calls_keeps_extra_content_for_gemini(self, monkeypatch): + """Gemini thinking models 400 without the replayed thought_signature.""" + agent = _make_agent(monkeypatch, "openrouter") + api_msg = self._api_msg_with_extra_content() + result = agent._sanitize_tool_calls_for_strict_api( + api_msg, model="google/gemini-3-pro-preview" + ) + assert result["tool_calls"][0]["extra_content"] == { + "google": {"thought_signature": "SIG_123"} + } + # call_id/response_item_id still stripped regardless of model + assert "call_id" not in result["tool_calls"][0] + class TestDeveloperRoleSwap: """GPT-5 and Codex models should get 'developer' instead of 'system' role."""