fix(vision): cap embedded image size before it wedges a session (#35732)

Resize vision tool-result images down to a 4 MB embed cap at load time,
not just at the 20 MB hard ceiling. A 5-20 MB image previously sailed
through the native fast path and got baked into conversation history,
where Anthropic's 5 MB per-image base64 limit rejected every subsequent
turn with a 400 — and because history is immutable, retries could never
clear it, permanently wedging the session.

Also harden the reactive shrink-recovery: it now returns False (don't
retry) when any oversized image part can't be brought under target, so
the single retry isn't burned re-sending a payload that will fail
identically. Previously it returned True after shrinking *any* part,
even when the actual oversized culprit survived.
This commit is contained in:
Teknium
2026-05-31 00:12:09 -07:00
committed by GitHub
parent d4e7b2fc19
commit 0ffbcbbe7d
4 changed files with 134 additions and 4 deletions

View File

@ -644,6 +644,12 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
# after a confirmed provider rejection, so the alternative is failure.
target_bytes = 4 * 1024 * 1024
changed_count = 0
# Track parts that are over the target but could NOT be shrunk under it.
# If any survive, retrying is pointless — the same oversized payload will
# be re-sent and rejected again, wasting the single retry budget. We only
# report success (caller retries) when every over-threshold image was
# actually brought under the target.
unshrinkable_oversized = 0
def _shrink_data_url(url: str) -> Optional[str]:
"""Return a smaller data URL, or None if shrink can't help."""
@ -710,17 +716,34 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
if resized:
image_value["url"] = resized
changed_count += 1
elif isinstance(url, str) and url.startswith("data:") \
and len(url) > target_bytes:
unshrinkable_oversized += 1
elif isinstance(image_value, str):
resized = _shrink_data_url(image_value)
if resized:
part["image_url"] = resized
changed_count += 1
elif image_value.startswith("data:") \
and len(image_value) > target_bytes:
unshrinkable_oversized += 1
if changed_count:
logger.info(
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
changed_count, target_bytes / (1024 * 1024),
)
if unshrinkable_oversized:
# At least one oversized image could not be shrunk under the target.
# Retrying would re-send it and fail identically, so signal "no
# progress" even if other parts shrank — the caller will surface the
# original error rather than burning its single retry on a no-op.
logger.warning(
"image-shrink recovery: %d oversized image part(s) could not be "
"shrunk under %.0f MB — not retrying (would re-send rejected payload)",
unshrinkable_oversized, target_bytes / (1024 * 1024),
)
return False
return changed_count > 0