From 0b46c4163aa7975f1a446012754a0bd467da16fb Mon Sep 17 00:00:00 2001 From: kyssta-exe Date: Tue, 2 Jun 2026 11:32:51 +0000 Subject: [PATCH] fix(vision): convert video_url blocks to Anthropic input_video format for MiniMax providers The video_analyze tool sends OpenAI-style 'video_url' content blocks, which breaks Anthropic-protocol providers (minimax, minimax-cn). These providers expect 'input_video' blocks with base64 data instead of data: URLs. Extends _convert_openai_images_to_anthropic() to also handle video_url blocks, converting them to Anthropic's input_video format when targeting Anthropic-compatible endpoints. Fixes #37219 --- agent/auxiliary_client.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 961e30313..b6b77d122 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -4756,10 +4756,14 @@ def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool: def _convert_openai_images_to_anthropic(messages: list) -> list: - """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks. + """Convert OpenAI ``image_url`` and ``video_url`` content blocks to Anthropic format. - Only touches messages that have list-type content with ``image_url`` blocks; - plain text messages pass through unchanged. + Converts: + - ``image_url`` blocks → Anthropic ``image`` blocks + - ``video_url`` blocks → Anthropic ``input_video`` blocks + + Only touches messages that have list-type content with ``image_url`` or + ``video_url`` blocks; plain text messages pass through unchanged. """ converted = [] for msg in messages: @@ -4796,6 +4800,28 @@ def _convert_openai_images_to_anthropic(messages: list) -> list: }, }) changed = True + elif block.get("type") == "video_url": + video_url_val = (block.get("video_url") or {}).get("url", "") + if video_url_val.startswith("data:"): + # Parse data URI: data:;base64, + header, _, b64data = video_url_val.partition(",") + media_type = "video/mp4" + if ":" in header and ";" in header: + media_type = header.split(":", 1)[1].split(";", 1)[0] + new_content.append({ + "type": "input_video", + "source": { + "type": "base64", + "media_type": media_type, + "data": b64data, + }, + }) + else: + # URL-based video — Anthropic protocol doesn't have a + # native URL-based video block; pass through as-is and + # let the provider handle (or reject) it. + new_content.append(block) + changed = True else: new_content.append(block) converted.append({**msg, "content": new_content} if changed else msg)