fix(vision): use MiniMax type="video" block (not input_video) + tests

The salvaged conversion emitted type:"input_video", which MiniMax M3 rejects
just like the original video_url block. Per MiniMax's Anthropic-compat docs,
the video content block is type:"video" with an image-style source (base64 or
url). Fixes the block type, converts URL-based videos too, and adds 4 video
conversion tests (none shipped with the original PR).
This commit is contained in:
teknium
2026-06-04 05:24:50 -07:00
committed by Teknium
parent 0b46c4163a
commit 153fe28474
2 changed files with 74 additions and 8 deletions

View File

@ -4756,11 +4756,11 @@ def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
def _convert_openai_images_to_anthropic(messages: list) -> list:
"""Convert OpenAI ``image_url`` and ``video_url`` content blocks to Anthropic format.
"""Convert OpenAI ``image_url``/``video_url`` blocks to Anthropic format.
Converts:
- ``image_url`` blocks Anthropic ``image`` blocks
- ``video_url`` blocks Anthropic ``input_video`` blocks
- ``image_url`` blocks to Anthropic ``image`` blocks
- ``video_url`` blocks to Anthropic ``video`` blocks (MiniMax M3 compat)
Only touches messages that have list-type content with ``image_url`` or
``video_url`` blocks; plain text messages pass through unchanged.
@ -4801,6 +4801,13 @@ def _convert_openai_images_to_anthropic(messages: list) -> list:
})
changed = True
elif block.get("type") == "video_url":
# MiniMax's Anthropic-compatible endpoint expects a "video"
# block (not OpenAI's "video_url", and not "input_video").
# See https://platform.minimax.io/docs/api-reference/text-anthropic-api
# — the Messages-field table lists type="video" (M3 only,
# URL/base64/mm_file://). The source shape mirrors the "image"
# block: base64 → {type:"base64", media_type, data}, URL →
# {type:"url", url}.
video_url_val = (block.get("video_url") or {}).get("url", "")
if video_url_val.startswith("data:"):
# Parse data URI: data:<media_type>;base64,<data>
@ -4809,7 +4816,7 @@ def _convert_openai_images_to_anthropic(messages: list) -> list:
if ":" in header and ";" in header:
media_type = header.split(":", 1)[1].split(";", 1)[0]
new_content.append({
"type": "input_video",
"type": "video",
"source": {
"type": "base64",
"media_type": media_type,
@ -4817,10 +4824,14 @@ def _convert_openai_images_to_anthropic(messages: list) -> list:
},
})
else:
# URL-based video — Anthropic protocol doesn't have a
# native URL-based video block; pass through as-is and
# let the provider handle (or reject) it.
new_content.append(block)
# URL-based video
new_content.append({
"type": "video",
"source": {
"type": "url",
"url": video_url_val,
},
})
changed = True
else:
new_content.append(block)

View File

@ -2178,6 +2178,61 @@ class TestAnthropicCompatImageConversion:
result = _convert_openai_images_to_anthropic(messages)
assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
def test_base64_video_converted_to_video_block(self):
# MiniMax M3's Anthropic-compatible endpoint expects type="video"
# (not OpenAI's "video_url", not "input_video").
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "text", "text": "What happens in this clip?"},
{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,AAAA"}},
],
}]
result = _convert_openai_images_to_anthropic(messages)
vid_block = result[0]["content"][1]
assert vid_block["type"] == "video"
assert vid_block["source"]["type"] == "base64"
assert vid_block["source"]["media_type"] == "video/mp4"
assert vid_block["source"]["data"] == "AAAA"
def test_video_media_type_parsed_from_data_uri(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "video_url", "video_url": {"url": "data:video/quicktime;base64,QQ=="}}
],
}]
result = _convert_openai_images_to_anthropic(messages)
assert result[0]["content"][0]["source"]["media_type"] == "video/quicktime"
def test_url_video_converted_to_video_block(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "video_url", "video_url": {"url": "https://example.com/clip.mp4"}}
],
}]
result = _convert_openai_images_to_anthropic(messages)
vid_block = result[0]["content"][0]
assert vid_block["type"] == "video"
assert vid_block["source"] == {"type": "url", "url": "https://example.com/clip.mp4"}
def test_mixed_image_and_video_both_converted(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR"}},
{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,AAAA"}},
],
}]
result = _convert_openai_images_to_anthropic(messages)
assert result[0]["content"][0]["type"] == "image"
assert result[0]["content"][1]["type"] == "video"
class _AuxAuth401(Exception):
status_code = 401