diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 253856b9b..0def28142 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -541,8 +541,7 @@ def _supports_media_in_tool_results(provider: str, model: str) -> bool:
 
     For unknown / legacy providers we conservatively return False — the
     caller falls back to the legacy aux-LLM text path.  The check is relaxed
-    when the provider's ``ProviderProfile`` declares ``supports_vision=True``
-    or when ``get_model_capabilities`` reports vision support for the model.
+    when the provider's ``ProviderProfile`` declares ``supports_vision=True``.
     """
     if not isinstance(provider, str):
         return False
@@ -590,16 +589,6 @@ def _supports_media_in_tool_results(provider: str, model: str) -> bool:
     except Exception:
         pass
 
-    # Check model capabilities from the models.dev catalog as a final
-    # fallback for custom providers whose models happen to be registered.
-    try:
-        from agent.models_dev import get_model_capabilities
-        caps = get_model_capabilities(provider, model)
-        if caps is not None and bool(getattr(caps, "supports_vision", False)):
-            return True
-    except Exception:
-        pass
-
     # Other vision-capable provider stacks. Conservative default: False.
     # Add explicit entries here as we verify each provider's tool-result
     # multimodal support empirically.