fix: strip extra_content from tool_calls for strict APIs (Fireworks, Mistral)

Fireworks/Mistral reject HTTP 400 'Extra inputs are not permitted, field:
messages[N].tool_calls[M].extra_content' on any session whose history
contains prior Gemini tool calls. Gemini 3 thinking models attach
extra_content (thought_signature) to tool_calls; it survived to the wire
because the sanitize paths only stripped call_id/response_item_id.

Strip extra_content from the outgoing wire copy in both sanitize paths
(ChatCompletionsTransport.convert_messages + _sanitize_tool_calls_for_strict_api),
but gate it on the target model: keep extra_content for Gemini-family
targets (the thought_signature MUST be replayed or Gemini 400s), strip it
for everyone else — including non-Gemini models that inherit a stale Gemini
signature earlier in a mixed-provider session. Native Gemini is unaffected
(GeminiNativeClient bypasses these paths).

Original stored history is never mutated (only the per-call copy).

Fixes #17986.
This commit is contained in:
Nate George
2026-06-03 16:23:48 -07:00
committed by Teknium
parent ec69c767ff
commit e8c3ac2f5c
6 changed files with 173 additions and 9 deletions

View File

@ -1296,7 +1296,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
api_msg.pop(internal_key, None)
if _needs_sanitize:
agent._sanitize_tool_calls_for_strict_api(api_msg)
agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
api_messages.append(api_msg)
effective_system = agent._cached_system_prompt or ""

View File

@ -982,7 +982,7 @@ def run_conversation(
# Uses new dicts so the internal messages list retains the fields
# for Codex Responses compatibility.
if agent._should_sanitize_tool_calls():
agent._sanitize_tool_calls_for_strict_api(api_msg)
agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
# The signature field helps maintain reasoning continuity
api_messages.append(api_msg)

View File

@ -99,6 +99,22 @@ def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
return normalized.endswith("/openai")
def _model_consumes_thought_signature(model: Any) -> bool:
"""True when the outgoing model is a Gemini family model that requires
``extra_content`` (thought_signature) to be replayed on tool calls.
Gemini 3 thinking models attach ``extra_content`` to each tool call and
reject subsequent requests with HTTP 400 if it is missing. Every other
strict OpenAI-compatible provider (Fireworks, Mistral, ...) rejects the
request with 400 if ``extra_content`` *is* present. So the field must be
kept only when the target model is itself Gemini-family, and stripped
otherwise — including when a non-Gemini model inherits stale Gemini
``extra_content`` from earlier in a mixed-provider session.
"""
m = str(model or "").lower()
return "gemini" in m or "gemma" in m
class ChatCompletionsTransport(ProviderTransport):
"""Transport for api_mode='chat_completions'.
@ -119,6 +135,14 @@ class ChatCompletionsTransport(ProviderTransport):
- Codex Responses API fields: ``codex_reasoning_items`` /
``codex_message_items`` on the message, ``call_id`` /
``response_item_id`` on ``tool_calls`` entries.
- ``extra_content`` on ``tool_calls`` (Gemini thought_signature) —
stripped unless the outgoing ``model`` is itself Gemini-family.
Gemini 3 thinking models attach it for replay, but strict providers
(Fireworks, Mistral) reject any payload containing it with
``Extra inputs are not permitted, field: 'messages[N].tool_calls[M].extra_content'``.
It must be kept for Gemini targets (replay required) and dropped for
everyone else, including non-Gemini models that inherited stale
Gemini ``extra_content`` earlier in a mixed-provider session.
- ``tool_name`` on tool-result messages — written by
``make_tool_result_message()`` for the SQLite FTS index, but not
part of the Chat Completions schema. Strict providers (Fireworks,
@ -137,6 +161,9 @@ class ChatCompletionsTransport(ProviderTransport):
``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
which then poisons every subsequent request in the session.
"""
strip_extra_content = not _model_consumes_thought_signature(
kwargs.get("model")
)
needs_sanitize = False
for msg in messages:
if not isinstance(msg, dict):
@ -155,7 +182,9 @@ class ChatCompletionsTransport(ProviderTransport):
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict) and (
"call_id" in tc or "response_item_id" in tc
"call_id" in tc
or "response_item_id" in tc
or (strip_extra_content and "extra_content" in tc)
):
needs_sanitize = True
break
@ -183,6 +212,8 @@ class ChatCompletionsTransport(ProviderTransport):
if isinstance(tc, dict):
tc.pop("call_id", None)
tc.pop("response_item_id", None)
if strip_extra_content:
tc.pop("extra_content", None)
return sanitized
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
@ -240,8 +271,10 @@ class ChatCompletionsTransport(ProviderTransport):
anthropic_max_output: int | None
extra_body_additions: dict | None
"""
# Codex sanitization: drop reasoning_items / call_id / response_item_id
sanitized = self.convert_messages(messages)
# Codex sanitization: drop reasoning_items / call_id / response_item_id.
# Pass model so the Gemini thought_signature (extra_content) is kept for
# Gemini targets and stripped for strict non-Gemini providers.
sanitized = self.convert_messages(messages, model=model)
# ── Provider profile: single-path when present ──────────────────
_profile = params.get("provider_profile")

View File

@ -4627,7 +4627,7 @@ class AIAgent:
return reapply_reasoning_echo_for_provider(self, api_messages)
@staticmethod
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
def _sanitize_tool_calls_for_strict_api(api_msg: dict, model: "str | None" = None) -> dict:
"""Strip Codex Responses API fields from tool_calls for strict providers.
Providers like Mistral, Fireworks, and other strict OpenAI-compatible APIs
@ -4636,17 +4636,26 @@ class AIAgent:
the internal message history — this method only modifies the outgoing
API copy.
``extra_content`` (Gemini thought_signature) is also stripped — strict
providers reject it with "Extra inputs are not permitted" — UNLESS the
outgoing ``model`` is itself Gemini-family, in which case it must be
replayed (Gemini 3 thinking models 400 without it). Defaults to
stripping when no model is supplied.
Creates new tool_call dicts rather than mutating in-place, so the
original messages list retains call_id/response_item_id for Codex
Responses API compatibility (e.g. if the session falls back to a
Codex provider later).
Fields stripped: call_id, response_item_id
Fields stripped: call_id, response_item_id, extra_content (model-gated)
"""
tool_calls = api_msg.get("tool_calls")
if not isinstance(tool_calls, list):
return api_msg
from agent.transports.chat_completions import _model_consumes_thought_signature
_STRIP_KEYS = {"call_id", "response_item_id"}
if not _model_consumes_thought_signature(model):
_STRIP_KEYS = _STRIP_KEYS | {"extra_content"}
api_msg["tool_calls"] = [
{k: v for k, v in tc.items() if k not in _STRIP_KEYS}
if isinstance(tc, dict) else tc

View File

@ -46,6 +46,44 @@ class TestChatCompletionsBasic:
assert "codex_reasoning_items" in msgs[0]
assert "codex_message_items" in msgs[0]
def _msg_with_extra_content(self):
return [
{"role": "assistant", "content": "ok",
"tool_calls": [{"id": "call_1", "type": "function",
"extra_content": {"google": {"thought_signature": "SIG_123"}},
"function": {"name": "t", "arguments": "{}"}}]},
]
def test_convert_messages_strips_extra_content_for_strict_provider(self, transport):
"""Strict providers (Fireworks, Mistral) reject extra_content on
tool_calls with HTTP 400. When the outgoing model is NOT Gemini-family,
the Gemini thought_signature must be stripped — including stale
signatures inherited from earlier in a mixed-provider session.
"""
msgs = self._msg_with_extra_content()
result = transport.convert_messages(msgs, model="accounts/fireworks/models/llama-v3p1-70b")
assert "extra_content" not in result[0]["tool_calls"][0]
# Original list untouched (deepcopy-on-demand)
assert "extra_content" in msgs[0]["tool_calls"][0]
def test_convert_messages_strips_extra_content_when_model_unknown(self, transport):
"""Default (no model supplied) is to strip — safe for strict providers."""
msgs = self._msg_with_extra_content()
result = transport.convert_messages(msgs)
assert "extra_content" not in result[0]["tool_calls"][0]
def test_convert_messages_keeps_extra_content_for_gemini(self, transport):
"""Gemini 3 thinking models require the thought_signature replayed on
every turn — stripping it would 400. Keep extra_content for Gemini
targets (including aggregator slugs like google/gemini-3-pro).
"""
for model in ("gemini-3-pro", "google/gemini-3-pro-preview", "gemma-3-27b"):
msgs = self._msg_with_extra_content()
result = transport.convert_messages(msgs, model=model)
assert result[0]["tool_calls"][0]["extra_content"] == {
"google": {"thought_signature": "SIG_123"}
}, model
def test_convert_messages_strips_tool_name(self, transport):
"""Internal `tool_name` (used for FTS indexing in the SQLite store) is
not part of the OpenAI Chat Completions schema. Strict providers like

View File

@ -148,14 +148,57 @@ class TestBuildApiKwargsOpenRouter:
assert "codex_reasoning_items" not in assistant_msg
assert tool_call["id"] == "call_123"
assert tool_call["function"]["name"] == "terminal"
assert tool_call["extra_content"] == {"thought_signature": "opaque"}
# extra_content (Gemini thought_signature) is stripped for non-Gemini
# targets — strict providers like Fireworks 400 on it. The agent here
# is not a Gemini model, so it must be dropped.
assert "extra_content" not in tool_call
assert "call_id" not in tool_call
assert "response_item_id" not in tool_call
# Original stored history must remain unchanged (only the outgoing copy
# is sanitized) — Codex/Responses replay relies on these fields.
assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
assert "codex_reasoning_items" in messages[1]
assert messages[1]["tool_calls"][0]["extra_content"] == {"thought_signature": "opaque"}
def test_keeps_extra_content_for_gemini_target(self, monkeypatch):
"""Gemini-family targets must keep extra_content (thought_signature) —
Gemini 3 thinking models 400 without it replayed on the next turn.
"""
agent = _make_agent(monkeypatch, "openrouter", model="google/gemini-3-pro-preview")
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"content": "Checking now.",
"tool_calls": [
{
"id": "call_123",
"call_id": "call_123",
"response_item_id": "fc_123",
"type": "function",
"function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"},
"extra_content": {"google": {"thought_signature": "opaque"}},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "/tmp"},
]
kwargs = agent._build_api_kwargs(messages)
tool_call = kwargs["messages"][1]["tool_calls"][0]
assert tool_call["extra_content"] == {"google": {"thought_signature": "opaque"}}
# call_id/response_item_id still stripped regardless of model
assert "call_id" not in tool_call
assert "response_item_id" not in tool_call
# Original stored history must remain unchanged for Responses replay mode.
assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
assert "codex_reasoning_items" in messages[1]
assert messages[1]["tool_calls"][0]["extra_content"] == {
"google": {"thought_signature": "opaque"}
}
def test_gemini_native_passes_base_url_for_top_level_thinking_config(self, monkeypatch):
agent = _make_agent(
@ -204,6 +247,47 @@ class TestBuildApiKwargsOpenRouter:
anthropic_agent.api_mode = "anthropic_messages"
assert anthropic_agent._should_sanitize_tool_calls() is True
def _api_msg_with_extra_content(self):
return {
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "call_id": "call_1", "type": "function",
"extra_content": {"google": {"thought_signature": "SIG_123"}},
"function": {"name": "t", "arguments": "{}"}},
],
}
def test_sanitize_tool_calls_strips_extra_content_for_strict_model(self, monkeypatch):
"""Strict providers reject extra_content; strip it for non-Gemini models."""
agent = _make_agent(monkeypatch, "openrouter")
api_msg = self._api_msg_with_extra_content()
result = agent._sanitize_tool_calls_for_strict_api(
api_msg, model="accounts/fireworks/models/llama-v3p1-70b"
)
assert "extra_content" not in result["tool_calls"][0]
assert "call_id" not in result["tool_calls"][0]
def test_sanitize_tool_calls_strips_extra_content_when_model_none(self, monkeypatch):
"""Default (no model) strips extra_content — safe for strict providers."""
agent = _make_agent(monkeypatch, "openrouter")
api_msg = self._api_msg_with_extra_content()
result = agent._sanitize_tool_calls_for_strict_api(api_msg)
assert "extra_content" not in result["tool_calls"][0]
def test_sanitize_tool_calls_keeps_extra_content_for_gemini(self, monkeypatch):
"""Gemini thinking models 400 without the replayed thought_signature."""
agent = _make_agent(monkeypatch, "openrouter")
api_msg = self._api_msg_with_extra_content()
result = agent._sanitize_tool_calls_for_strict_api(
api_msg, model="google/gemini-3-pro-preview"
)
assert result["tool_calls"][0]["extra_content"] == {
"google": {"thought_signature": "SIG_123"}
}
# call_id/response_item_id still stripped regardless of model
assert "call_id" not in result["tool_calls"][0]
class TestDeveloperRoleSwap:
"""GPT-5 and Codex models should get 'developer' instead of 'system' role."""