diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index b6b77d122..360b487b6 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -4756,11 +4756,11 @@ def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool: def _convert_openai_images_to_anthropic(messages: list) -> list: - """Convert OpenAI ``image_url`` and ``video_url`` content blocks to Anthropic format. + """Convert OpenAI ``image_url``/``video_url`` blocks to Anthropic format. Converts: - - ``image_url`` blocks → Anthropic ``image`` blocks - - ``video_url`` blocks → Anthropic ``input_video`` blocks + - ``image_url`` blocks to Anthropic ``image`` blocks + - ``video_url`` blocks to Anthropic ``video`` blocks (MiniMax M3 compat) Only touches messages that have list-type content with ``image_url`` or ``video_url`` blocks; plain text messages pass through unchanged. @@ -4801,6 +4801,13 @@ def _convert_openai_images_to_anthropic(messages: list) -> list: }) changed = True elif block.get("type") == "video_url": + # MiniMax's Anthropic-compatible endpoint expects a "video" + # block (not OpenAI's "video_url", and not "input_video"). + # See https://platform.minimax.io/docs/api-reference/text-anthropic-api + # — the Messages-field table lists type="video" (M3 only, + # URL/base64/mm_file://). The source shape mirrors the "image" + # block: base64 → {type:"base64", media_type, data}, URL → + # {type:"url", url}. video_url_val = (block.get("video_url") or {}).get("url", "") if video_url_val.startswith("data:"): # Parse data URI: data:;base64, @@ -4809,7 +4816,7 @@ def _convert_openai_images_to_anthropic(messages: list) -> list: if ":" in header and ";" in header: media_type = header.split(":", 1)[1].split(";", 1)[0] new_content.append({ - "type": "input_video", + "type": "video", "source": { "type": "base64", "media_type": media_type, @@ -4817,10 +4824,14 @@ def _convert_openai_images_to_anthropic(messages: list) -> list: }, }) else: - # URL-based video — Anthropic protocol doesn't have a - # native URL-based video block; pass through as-is and - # let the provider handle (or reject) it. - new_content.append(block) + # URL-based video + new_content.append({ + "type": "video", + "source": { + "type": "url", + "url": video_url_val, + }, + }) changed = True else: new_content.append(block) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index e0c348378..074372d1c 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -2178,6 +2178,61 @@ class TestAnthropicCompatImageConversion: result = _convert_openai_images_to_anthropic(messages) assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg" + def test_base64_video_converted_to_video_block(self): + # MiniMax M3's Anthropic-compatible endpoint expects type="video" + # (not OpenAI's "video_url", not "input_video"). + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "text", "text": "What happens in this clip?"}, + {"type": "video_url", "video_url": {"url": "data:video/mp4;base64,AAAA"}}, + ], + }] + result = _convert_openai_images_to_anthropic(messages) + vid_block = result[0]["content"][1] + assert vid_block["type"] == "video" + assert vid_block["source"]["type"] == "base64" + assert vid_block["source"]["media_type"] == "video/mp4" + assert vid_block["source"]["data"] == "AAAA" + + def test_video_media_type_parsed_from_data_uri(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "video_url", "video_url": {"url": "data:video/quicktime;base64,QQ=="}} + ], + }] + result = _convert_openai_images_to_anthropic(messages) + assert result[0]["content"][0]["source"]["media_type"] == "video/quicktime" + + def test_url_video_converted_to_video_block(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "video_url", "video_url": {"url": "https://example.com/clip.mp4"}} + ], + }] + result = _convert_openai_images_to_anthropic(messages) + vid_block = result[0]["content"][0] + assert vid_block["type"] == "video" + assert vid_block["source"] == {"type": "url", "url": "https://example.com/clip.mp4"} + + def test_mixed_image_and_video_both_converted(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR"}}, + {"type": "video_url", "video_url": {"url": "data:video/mp4;base64,AAAA"}}, + ], + }] + result = _convert_openai_images_to_anthropic(messages) + assert result[0]["content"][0]["type"] == "image" + assert result[0]["content"][1]["type"] == "video" + class _AuxAuth401(Exception): status_code = 401