fix: strip extra_content from tool_calls for strict APIs (Fireworks, Mistral)
Fireworks/Mistral reject HTTP 400 'Extra inputs are not permitted, field: messages[N].tool_calls[M].extra_content' on any session whose history contains prior Gemini tool calls. Gemini 3 thinking models attach extra_content (thought_signature) to tool_calls; it survived to the wire because the sanitize paths only stripped call_id/response_item_id. Strip extra_content from the outgoing wire copy in both sanitize paths (ChatCompletionsTransport.convert_messages + _sanitize_tool_calls_for_strict_api), but gate it on the target model: keep extra_content for Gemini-family targets (the thought_signature MUST be replayed or Gemini 400s), strip it for everyone else — including non-Gemini models that inherit a stale Gemini signature earlier in a mixed-provider session. Native Gemini is unaffected (GeminiNativeClient bypasses these paths). Original stored history is never mutated (only the per-call copy). Fixes #17986.
This commit is contained in:
@ -1296,7 +1296,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
|
||||
for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
|
||||
api_msg.pop(internal_key, None)
|
||||
if _needs_sanitize:
|
||||
agent._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
|
||||
api_messages.append(api_msg)
|
||||
|
||||
effective_system = agent._cached_system_prompt or ""
|
||||
|
||||
@ -982,7 +982,7 @@ def run_conversation(
|
||||
# Uses new dicts so the internal messages list retains the fields
|
||||
# for Codex Responses compatibility.
|
||||
if agent._should_sanitize_tool_calls():
|
||||
agent._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
agent._sanitize_tool_calls_for_strict_api(api_msg, model=agent.model)
|
||||
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
|
||||
# The signature field helps maintain reasoning continuity
|
||||
api_messages.append(api_msg)
|
||||
|
||||
@ -99,6 +99,22 @@ def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
|
||||
return normalized.endswith("/openai")
|
||||
|
||||
|
||||
def _model_consumes_thought_signature(model: Any) -> bool:
|
||||
"""True when the outgoing model is a Gemini family model that requires
|
||||
``extra_content`` (thought_signature) to be replayed on tool calls.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` to each tool call and
|
||||
reject subsequent requests with HTTP 400 if it is missing. Every other
|
||||
strict OpenAI-compatible provider (Fireworks, Mistral, ...) rejects the
|
||||
request with 400 if ``extra_content`` *is* present. So the field must be
|
||||
kept only when the target model is itself Gemini-family, and stripped
|
||||
otherwise — including when a non-Gemini model inherits stale Gemini
|
||||
``extra_content`` from earlier in a mixed-provider session.
|
||||
"""
|
||||
m = str(model or "").lower()
|
||||
return "gemini" in m or "gemma" in m
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
@ -119,6 +135,14 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
- Codex Responses API fields: ``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id`` /
|
||||
``response_item_id`` on ``tool_calls`` entries.
|
||||
- ``extra_content`` on ``tool_calls`` (Gemini thought_signature) —
|
||||
stripped unless the outgoing ``model`` is itself Gemini-family.
|
||||
Gemini 3 thinking models attach it for replay, but strict providers
|
||||
(Fireworks, Mistral) reject any payload containing it with
|
||||
``Extra inputs are not permitted, field: 'messages[N].tool_calls[M].extra_content'``.
|
||||
It must be kept for Gemini targets (replay required) and dropped for
|
||||
everyone else, including non-Gemini models that inherited stale
|
||||
Gemini ``extra_content`` earlier in a mixed-provider session.
|
||||
- ``tool_name`` on tool-result messages — written by
|
||||
``make_tool_result_message()`` for the SQLite FTS index, but not
|
||||
part of the Chat Completions schema. Strict providers (Fireworks,
|
||||
@ -137,6 +161,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
|
||||
which then poisons every subsequent request in the session.
|
||||
"""
|
||||
strip_extra_content = not _model_consumes_thought_signature(
|
||||
kwargs.get("model")
|
||||
)
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
@ -155,7 +182,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and (
|
||||
"call_id" in tc or "response_item_id" in tc
|
||||
"call_id" in tc
|
||||
or "response_item_id" in tc
|
||||
or (strip_extra_content and "extra_content" in tc)
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
@ -183,6 +212,8 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if isinstance(tc, dict):
|
||||
tc.pop("call_id", None)
|
||||
tc.pop("response_item_id", None)
|
||||
if strip_extra_content:
|
||||
tc.pop("extra_content", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
@ -240,8 +271,10 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
anthropic_max_output: int | None
|
||||
extra_body_additions: dict | None
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id.
|
||||
# Pass model so the Gemini thought_signature (extra_content) is kept for
|
||||
# Gemini targets and stripped for strict non-Gemini providers.
|
||||
sanitized = self.convert_messages(messages, model=model)
|
||||
|
||||
# ── Provider profile: single-path when present ──────────────────
|
||||
_profile = params.get("provider_profile")
|
||||
|
||||
13
run_agent.py
13
run_agent.py
@ -4627,7 +4627,7 @@ class AIAgent:
|
||||
return reapply_reasoning_echo_for_provider(self, api_messages)
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
|
||||
def _sanitize_tool_calls_for_strict_api(api_msg: dict, model: "str | None" = None) -> dict:
|
||||
"""Strip Codex Responses API fields from tool_calls for strict providers.
|
||||
|
||||
Providers like Mistral, Fireworks, and other strict OpenAI-compatible APIs
|
||||
@ -4636,17 +4636,26 @@ class AIAgent:
|
||||
the internal message history — this method only modifies the outgoing
|
||||
API copy.
|
||||
|
||||
``extra_content`` (Gemini thought_signature) is also stripped — strict
|
||||
providers reject it with "Extra inputs are not permitted" — UNLESS the
|
||||
outgoing ``model`` is itself Gemini-family, in which case it must be
|
||||
replayed (Gemini 3 thinking models 400 without it). Defaults to
|
||||
stripping when no model is supplied.
|
||||
|
||||
Creates new tool_call dicts rather than mutating in-place, so the
|
||||
original messages list retains call_id/response_item_id for Codex
|
||||
Responses API compatibility (e.g. if the session falls back to a
|
||||
Codex provider later).
|
||||
|
||||
Fields stripped: call_id, response_item_id
|
||||
Fields stripped: call_id, response_item_id, extra_content (model-gated)
|
||||
"""
|
||||
tool_calls = api_msg.get("tool_calls")
|
||||
if not isinstance(tool_calls, list):
|
||||
return api_msg
|
||||
from agent.transports.chat_completions import _model_consumes_thought_signature
|
||||
_STRIP_KEYS = {"call_id", "response_item_id"}
|
||||
if not _model_consumes_thought_signature(model):
|
||||
_STRIP_KEYS = _STRIP_KEYS | {"extra_content"}
|
||||
api_msg["tool_calls"] = [
|
||||
{k: v for k, v in tc.items() if k not in _STRIP_KEYS}
|
||||
if isinstance(tc, dict) else tc
|
||||
|
||||
@ -46,6 +46,44 @@ class TestChatCompletionsBasic:
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
assert "codex_message_items" in msgs[0]
|
||||
|
||||
def _msg_with_extra_content(self):
|
||||
return [
|
||||
{"role": "assistant", "content": "ok",
|
||||
"tool_calls": [{"id": "call_1", "type": "function",
|
||||
"extra_content": {"google": {"thought_signature": "SIG_123"}},
|
||||
"function": {"name": "t", "arguments": "{}"}}]},
|
||||
]
|
||||
|
||||
def test_convert_messages_strips_extra_content_for_strict_provider(self, transport):
|
||||
"""Strict providers (Fireworks, Mistral) reject extra_content on
|
||||
tool_calls with HTTP 400. When the outgoing model is NOT Gemini-family,
|
||||
the Gemini thought_signature must be stripped — including stale
|
||||
signatures inherited from earlier in a mixed-provider session.
|
||||
"""
|
||||
msgs = self._msg_with_extra_content()
|
||||
result = transport.convert_messages(msgs, model="accounts/fireworks/models/llama-v3p1-70b")
|
||||
assert "extra_content" not in result[0]["tool_calls"][0]
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert "extra_content" in msgs[0]["tool_calls"][0]
|
||||
|
||||
def test_convert_messages_strips_extra_content_when_model_unknown(self, transport):
|
||||
"""Default (no model supplied) is to strip — safe for strict providers."""
|
||||
msgs = self._msg_with_extra_content()
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "extra_content" not in result[0]["tool_calls"][0]
|
||||
|
||||
def test_convert_messages_keeps_extra_content_for_gemini(self, transport):
|
||||
"""Gemini 3 thinking models require the thought_signature replayed on
|
||||
every turn — stripping it would 400. Keep extra_content for Gemini
|
||||
targets (including aggregator slugs like google/gemini-3-pro).
|
||||
"""
|
||||
for model in ("gemini-3-pro", "google/gemini-3-pro-preview", "gemma-3-27b"):
|
||||
msgs = self._msg_with_extra_content()
|
||||
result = transport.convert_messages(msgs, model=model)
|
||||
assert result[0]["tool_calls"][0]["extra_content"] == {
|
||||
"google": {"thought_signature": "SIG_123"}
|
||||
}, model
|
||||
|
||||
def test_convert_messages_strips_tool_name(self, transport):
|
||||
"""Internal `tool_name` (used for FTS indexing in the SQLite store) is
|
||||
not part of the OpenAI Chat Completions schema. Strict providers like
|
||||
|
||||
@ -148,14 +148,57 @@ class TestBuildApiKwargsOpenRouter:
|
||||
assert "codex_reasoning_items" not in assistant_msg
|
||||
assert tool_call["id"] == "call_123"
|
||||
assert tool_call["function"]["name"] == "terminal"
|
||||
assert tool_call["extra_content"] == {"thought_signature": "opaque"}
|
||||
# extra_content (Gemini thought_signature) is stripped for non-Gemini
|
||||
# targets — strict providers like Fireworks 400 on it. The agent here
|
||||
# is not a Gemini model, so it must be dropped.
|
||||
assert "extra_content" not in tool_call
|
||||
assert "call_id" not in tool_call
|
||||
assert "response_item_id" not in tool_call
|
||||
|
||||
# Original stored history must remain unchanged (only the outgoing copy
|
||||
# is sanitized) — Codex/Responses replay relies on these fields.
|
||||
assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
|
||||
assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
|
||||
assert "codex_reasoning_items" in messages[1]
|
||||
assert messages[1]["tool_calls"][0]["extra_content"] == {"thought_signature": "opaque"}
|
||||
|
||||
def test_keeps_extra_content_for_gemini_target(self, monkeypatch):
|
||||
"""Gemini-family targets must keep extra_content (thought_signature) —
|
||||
Gemini 3 thinking models 400 without it replayed on the next turn.
|
||||
"""
|
||||
agent = _make_agent(monkeypatch, "openrouter", model="google/gemini-3-pro-preview")
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Checking now.",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_123",
|
||||
"call_id": "call_123",
|
||||
"response_item_id": "fc_123",
|
||||
"type": "function",
|
||||
"function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"},
|
||||
"extra_content": {"google": {"thought_signature": "opaque"}},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_123", "content": "/tmp"},
|
||||
]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
tool_call = kwargs["messages"][1]["tool_calls"][0]
|
||||
assert tool_call["extra_content"] == {"google": {"thought_signature": "opaque"}}
|
||||
# call_id/response_item_id still stripped regardless of model
|
||||
assert "call_id" not in tool_call
|
||||
assert "response_item_id" not in tool_call
|
||||
|
||||
# Original stored history must remain unchanged for Responses replay mode.
|
||||
assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
|
||||
assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
|
||||
assert "codex_reasoning_items" in messages[1]
|
||||
assert messages[1]["tool_calls"][0]["extra_content"] == {
|
||||
"google": {"thought_signature": "opaque"}
|
||||
}
|
||||
|
||||
def test_gemini_native_passes_base_url_for_top_level_thinking_config(self, monkeypatch):
|
||||
agent = _make_agent(
|
||||
@ -204,6 +247,47 @@ class TestBuildApiKwargsOpenRouter:
|
||||
anthropic_agent.api_mode = "anthropic_messages"
|
||||
assert anthropic_agent._should_sanitize_tool_calls() is True
|
||||
|
||||
def _api_msg_with_extra_content(self):
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{"id": "call_1", "call_id": "call_1", "type": "function",
|
||||
"extra_content": {"google": {"thought_signature": "SIG_123"}},
|
||||
"function": {"name": "t", "arguments": "{}"}},
|
||||
],
|
||||
}
|
||||
|
||||
def test_sanitize_tool_calls_strips_extra_content_for_strict_model(self, monkeypatch):
|
||||
"""Strict providers reject extra_content; strip it for non-Gemini models."""
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
api_msg = self._api_msg_with_extra_content()
|
||||
result = agent._sanitize_tool_calls_for_strict_api(
|
||||
api_msg, model="accounts/fireworks/models/llama-v3p1-70b"
|
||||
)
|
||||
assert "extra_content" not in result["tool_calls"][0]
|
||||
assert "call_id" not in result["tool_calls"][0]
|
||||
|
||||
def test_sanitize_tool_calls_strips_extra_content_when_model_none(self, monkeypatch):
|
||||
"""Default (no model) strips extra_content — safe for strict providers."""
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
api_msg = self._api_msg_with_extra_content()
|
||||
result = agent._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
assert "extra_content" not in result["tool_calls"][0]
|
||||
|
||||
def test_sanitize_tool_calls_keeps_extra_content_for_gemini(self, monkeypatch):
|
||||
"""Gemini thinking models 400 without the replayed thought_signature."""
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
api_msg = self._api_msg_with_extra_content()
|
||||
result = agent._sanitize_tool_calls_for_strict_api(
|
||||
api_msg, model="google/gemini-3-pro-preview"
|
||||
)
|
||||
assert result["tool_calls"][0]["extra_content"] == {
|
||||
"google": {"thought_signature": "SIG_123"}
|
||||
}
|
||||
# call_id/response_item_id still stripped regardless of model
|
||||
assert "call_id" not in result["tool_calls"][0]
|
||||
|
||||
|
||||
class TestDeveloperRoleSwap:
|
||||
"""GPT-5 and Codex models should get 'developer' instead of 'system' role."""
|
||||
|
||||
Reference in New Issue
Block a user