fix(xai): route video models by modality
This commit is contained in:
@ -21,9 +21,12 @@ delivers it.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
import logging
|
import logging
|
||||||
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -42,7 +45,9 @@ logger = logging.getLogger(__name__)
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
|
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
|
||||||
DEFAULT_MODEL = "grok-imagine-video"
|
DEFAULT_TEXT_TO_VIDEO_MODEL = "grok-imagine-video"
|
||||||
|
DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5-preview"
|
||||||
|
DEFAULT_MODEL = DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||||
DEFAULT_DURATION = 8
|
DEFAULT_DURATION = 8
|
||||||
DEFAULT_ASPECT_RATIO = "16:9"
|
DEFAULT_ASPECT_RATIO = "16:9"
|
||||||
DEFAULT_RESOLUTION = "720p"
|
DEFAULT_RESOLUTION = "720p"
|
||||||
@ -58,10 +63,18 @@ _MODELS: Dict[str, Dict[str, Any]] = {
|
|||||||
"grok-imagine-video": {
|
"grok-imagine-video": {
|
||||||
"display": "Grok Imagine Video",
|
"display": "Grok Imagine Video",
|
||||||
"speed": "~60-240s",
|
"speed": "~60-240s",
|
||||||
"strengths": "Text-to-video + image-to-video; up to 7 reference images for style/character.",
|
"strengths": "Text-to-video; legacy image-to-video fallback.",
|
||||||
"price": "see https://docs.x.ai/docs/models",
|
"price": "see https://docs.x.ai/developers/models/grok-imagine-video",
|
||||||
"modalities": ["text", "image"],
|
"modalities": ["text", "image"],
|
||||||
},
|
},
|
||||||
|
"grok-imagine-video-1.5-preview": {
|
||||||
|
"display": "Grok Imagine Video 1.5 Preview",
|
||||||
|
"speed": "~60-240s",
|
||||||
|
"strengths": "Latest xAI image-to-video model.",
|
||||||
|
"price": "see https://docs.x.ai/developers/models/grok-imagine-video-1.5-preview",
|
||||||
|
"modalities": ["image"],
|
||||||
|
"aliases": ["grok-imagine-video-1.5-2026-05-30"],
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -111,10 +124,31 @@ def _xai_headers(api_key: str) -> Dict[str, str]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _image_ref_to_xai_url(value: str) -> str:
|
||||||
|
"""Return a URL/data URI accepted by xAI for image inputs."""
|
||||||
|
ref = (value or "").strip()
|
||||||
|
if not ref:
|
||||||
|
return ""
|
||||||
|
lower = ref.lower()
|
||||||
|
if lower.startswith(("http://", "https://", "data:image/")):
|
||||||
|
return ref
|
||||||
|
|
||||||
|
path = Path(ref).expanduser()
|
||||||
|
if not path.is_file():
|
||||||
|
return ref
|
||||||
|
|
||||||
|
mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
|
||||||
|
if not mime.startswith("image/"):
|
||||||
|
return ref
|
||||||
|
|
||||||
|
encoded = base64.b64encode(path.read_bytes()).decode("ascii")
|
||||||
|
return f"data:{mime};base64,{encoded}"
|
||||||
|
|
||||||
|
|
||||||
def _normalize_reference_images(reference_image_urls: Optional[List[str]]):
|
def _normalize_reference_images(reference_image_urls: Optional[List[str]]):
|
||||||
refs = []
|
refs = []
|
||||||
for url in reference_image_urls or []:
|
for url in reference_image_urls or []:
|
||||||
normalized = (url or "").strip()
|
normalized = _image_ref_to_xai_url(url)
|
||||||
if normalized:
|
if normalized:
|
||||||
refs.append({"url": normalized})
|
refs.append({"url": normalized})
|
||||||
return refs or None
|
return refs or None
|
||||||
@ -131,6 +165,28 @@ def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int:
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_model_for_modality(
|
||||||
|
model: Optional[str],
|
||||||
|
*,
|
||||||
|
modality: str,
|
||||||
|
explicit_model: bool,
|
||||||
|
) -> str:
|
||||||
|
"""Select xAI's text/video model without treating config as a prompt override.
|
||||||
|
|
||||||
|
``grok-imagine-video-1.5-preview`` currently rejects text-only video
|
||||||
|
generation, but it is the desired image-to-video backend. Explicit tool
|
||||||
|
``model=`` still wins for users who intentionally request another model.
|
||||||
|
"""
|
||||||
|
requested = (model or "").strip()
|
||||||
|
if explicit_model and requested:
|
||||||
|
return requested
|
||||||
|
if modality == "image":
|
||||||
|
return DEFAULT_IMAGE_TO_VIDEO_MODEL
|
||||||
|
if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL:
|
||||||
|
return DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||||
|
return requested or DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||||
|
|
||||||
|
|
||||||
async def _submit(
|
async def _submit(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
payload: Dict[str, Any],
|
payload: Dict[str, Any],
|
||||||
@ -192,7 +248,7 @@ async def _poll(
|
|||||||
|
|
||||||
|
|
||||||
class XAIVideoGenProvider(VideoGenProvider):
|
class XAIVideoGenProvider(VideoGenProvider):
|
||||||
"""xAI grok-imagine-video backend (text-to-video + image-to-video)."""
|
"""xAI Grok Imagine video backend (text-to-video + image-to-video)."""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
@ -222,7 +278,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
return {
|
return {
|
||||||
"name": "xAI Grok Imagine",
|
"name": "xAI Grok Imagine",
|
||||||
"badge": "paid",
|
"badge": "paid",
|
||||||
"tag": "grok-imagine-video — text-to-video & image-to-video; uses xAI Grok OAuth or XAI_API_KEY",
|
"tag": "grok-imagine-video for text-to-video; grok-imagine-video-1.5-preview for image-to-video; uses xAI Grok OAuth or XAI_API_KEY",
|
||||||
"env_vars": [],
|
"env_vars": [],
|
||||||
"post_setup": "xai_grok",
|
"post_setup": "xai_grok",
|
||||||
}
|
}
|
||||||
@ -260,6 +316,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
return loop.run_until_complete(self._generate_async(
|
return loop.run_until_complete(self._generate_async(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
model=model,
|
model=model,
|
||||||
|
explicit_model=bool(kwargs.get("_model_override_explicit")),
|
||||||
image_url=image_url,
|
image_url=image_url,
|
||||||
reference_image_urls=reference_image_urls,
|
reference_image_urls=reference_image_urls,
|
||||||
duration=duration,
|
duration=duration,
|
||||||
@ -284,6 +341,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
*,
|
*,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
model: Optional[str],
|
model: Optional[str],
|
||||||
|
explicit_model: bool,
|
||||||
image_url: Optional[str],
|
image_url: Optional[str],
|
||||||
reference_image_urls: Optional[List[str]],
|
reference_image_urls: Optional[List[str]],
|
||||||
duration: Optional[int],
|
duration: Optional[int],
|
||||||
@ -303,10 +361,15 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
)
|
)
|
||||||
|
|
||||||
prompt = (prompt or "").strip()
|
prompt = (prompt or "").strip()
|
||||||
image_url_norm = (image_url or "").strip() or None
|
image_url_norm = _image_ref_to_xai_url(image_url or "") or None
|
||||||
normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
|
normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
|
||||||
normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
|
normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
|
||||||
modality_used = "image" if image_url_norm else "text"
|
modality_used = "image" if image_url_norm else "text"
|
||||||
|
resolved_model = _resolve_model_for_modality(
|
||||||
|
model,
|
||||||
|
modality=modality_used,
|
||||||
|
explicit_model=explicit_model,
|
||||||
|
)
|
||||||
|
|
||||||
if not prompt:
|
if not prompt:
|
||||||
return error_response(
|
return error_response(
|
||||||
@ -340,7 +403,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
normalized_resolution = DEFAULT_RESOLUTION
|
normalized_resolution = DEFAULT_RESOLUTION
|
||||||
|
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"model": model or DEFAULT_MODEL,
|
"model": resolved_model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"duration": clamped_duration,
|
"duration": clamped_duration,
|
||||||
"aspect_ratio": normalized_aspect_ratio,
|
"aspect_ratio": normalized_aspect_ratio,
|
||||||
@ -366,7 +429,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
|
error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
|
||||||
error_type="api_error",
|
error_type="api_error",
|
||||||
provider="xai",
|
provider="xai",
|
||||||
model=model or DEFAULT_MODEL,
|
model=resolved_model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -388,7 +451,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
error="xAI video generation completed without a video URL",
|
error="xAI video generation completed without a video URL",
|
||||||
error_type="empty_response",
|
error_type="empty_response",
|
||||||
provider="xai",
|
provider="xai",
|
||||||
model=body.get("model") or model or DEFAULT_MODEL,
|
model=body.get("model") or resolved_model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
)
|
)
|
||||||
extra: Dict[str, Any] = {
|
extra: Dict[str, Any] = {
|
||||||
@ -399,7 +462,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
extra["usage"] = body["usage"]
|
extra["usage"] = body["usage"]
|
||||||
return success_response(
|
return success_response(
|
||||||
video=url,
|
video=url,
|
||||||
model=body.get("model") or model or DEFAULT_MODEL,
|
model=body.get("model") or resolved_model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
modality=modality_used,
|
modality=modality_used,
|
||||||
aspect_ratio=normalized_aspect_ratio,
|
aspect_ratio=normalized_aspect_ratio,
|
||||||
@ -413,7 +476,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
error=f"Timed out waiting for video generation after {DEFAULT_TIMEOUT_SECONDS}s",
|
error=f"Timed out waiting for video generation after {DEFAULT_TIMEOUT_SECONDS}s",
|
||||||
error_type="timeout",
|
error_type="timeout",
|
||||||
provider="xai",
|
provider="xai",
|
||||||
model=model or DEFAULT_MODEL,
|
model=resolved_model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -426,7 +489,7 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||||||
error=message,
|
error=message,
|
||||||
error_type=f"xai_{status}",
|
error_type=f"xai_{status}",
|
||||||
provider="xai",
|
provider="xai",
|
||||||
model=model or DEFAULT_MODEL,
|
model=resolved_model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
name: xai
|
name: xai
|
||||||
version: 1.0.0
|
version: 1.0.0
|
||||||
description: "xAI Grok-Imagine video generation backend. Supports text-to-video, image-to-video, reference-image-guided generation, video edit, and video extend via the xAI async videos API."
|
description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, and reference-image-guided generation via the xAI async videos API."
|
||||||
author: NousResearch
|
author: NousResearch
|
||||||
kind: backend
|
kind: backend
|
||||||
requires_env:
|
requires_env:
|
||||||
|
|||||||
@ -25,6 +25,43 @@ def test_xai_provider_registers():
|
|||||||
assert provider.default_model() == "grok-imagine-video"
|
assert provider.default_model() == "grok-imagine-video"
|
||||||
|
|
||||||
|
|
||||||
|
def test_xai_provider_lists_text_and_current_image_video_models():
|
||||||
|
from plugins.video_gen.xai import XAIVideoGenProvider
|
||||||
|
|
||||||
|
models = XAIVideoGenProvider().list_models()
|
||||||
|
ids = [model["id"] for model in models]
|
||||||
|
|
||||||
|
assert ids[0] == "grok-imagine-video"
|
||||||
|
assert ids[1] == "grok-imagine-video-1.5-preview"
|
||||||
|
assert models[1]["modalities"] == ["image"]
|
||||||
|
assert models[1]["aliases"] == ["grok-imagine-video-1.5-2026-05-30"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_xai_routes_default_models_by_modality():
|
||||||
|
from plugins.video_gen.xai import _resolve_model_for_modality
|
||||||
|
|
||||||
|
assert _resolve_model_for_modality(
|
||||||
|
"grok-imagine-video",
|
||||||
|
modality="text",
|
||||||
|
explicit_model=False,
|
||||||
|
) == "grok-imagine-video"
|
||||||
|
assert _resolve_model_for_modality(
|
||||||
|
"grok-imagine-video",
|
||||||
|
modality="image",
|
||||||
|
explicit_model=False,
|
||||||
|
) == "grok-imagine-video-1.5-preview"
|
||||||
|
assert _resolve_model_for_modality(
|
||||||
|
"grok-imagine-video-1.5-preview",
|
||||||
|
modality="text",
|
||||||
|
explicit_model=False,
|
||||||
|
) == "grok-imagine-video"
|
||||||
|
assert _resolve_model_for_modality(
|
||||||
|
"grok-imagine-video-1.5-preview",
|
||||||
|
modality="text",
|
||||||
|
explicit_model=True,
|
||||||
|
) == "grok-imagine-video-1.5-preview"
|
||||||
|
|
||||||
|
|
||||||
def test_xai_capabilities_text_and_image_only():
|
def test_xai_capabilities_text_and_image_only():
|
||||||
"""xAI was previously advertised with edit/extend operations. The
|
"""xAI was previously advertised with edit/extend operations. The
|
||||||
simplified surface only exposes text-to-video and image-to-video —
|
simplified surface only exposes text-to-video and image-to-video —
|
||||||
|
|||||||
@ -56,7 +56,7 @@ class _FakeAsyncClient:
|
|||||||
return _FakeResponse(200, {
|
return _FakeResponse(200, {
|
||||||
"status": "done",
|
"status": "done",
|
||||||
"video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
|
"video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
|
||||||
"model": "grok-imagine-video",
|
"model": self.posts[-1]["json"]["model"],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@ -113,6 +113,7 @@ class TestXAIPayload:
|
|||||||
provider, captured = xai_provider
|
provider, captured = xai_provider
|
||||||
provider.generate("a dog at sunset")
|
provider.generate("a dog at sunset")
|
||||||
payload = _last_post(captured)["json"]
|
payload = _last_post(captured)["json"]
|
||||||
|
assert payload["model"] == "grok-imagine-video"
|
||||||
assert payload["prompt"] == "a dog at sunset"
|
assert payload["prompt"] == "a dog at sunset"
|
||||||
assert "image" not in payload
|
assert "image" not in payload
|
||||||
assert "reference_images" not in payload
|
assert "reference_images" not in payload
|
||||||
@ -121,8 +122,31 @@ class TestXAIPayload:
|
|||||||
provider, captured = xai_provider
|
provider, captured = xai_provider
|
||||||
provider.generate("animate this", image_url="https://example.com/cat.png")
|
provider.generate("animate this", image_url="https://example.com/cat.png")
|
||||||
payload = _last_post(captured)["json"]
|
payload = _last_post(captured)["json"]
|
||||||
|
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||||
assert payload["image"] == {"url": "https://example.com/cat.png"}
|
assert payload["image"] == {"url": "https://example.com/cat.png"}
|
||||||
|
|
||||||
|
def test_local_image_path_is_sent_as_data_uri(self, xai_provider, tmp_path):
|
||||||
|
provider, captured = xai_provider
|
||||||
|
image_path = tmp_path / "frame.png"
|
||||||
|
image_path.write_bytes(b"\x89PNG\r\n\x1a\nfake")
|
||||||
|
|
||||||
|
provider.generate("animate this", image_url=str(image_path))
|
||||||
|
|
||||||
|
payload = _last_post(captured)["json"]
|
||||||
|
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||||
|
assert payload["image"]["url"].startswith("data:image/png;base64,")
|
||||||
|
|
||||||
|
def test_explicit_model_override_is_honored_for_image(self, xai_provider):
|
||||||
|
provider, captured = xai_provider
|
||||||
|
provider.generate(
|
||||||
|
"animate this",
|
||||||
|
image_url="https://example.com/cat.png",
|
||||||
|
model="grok-imagine-video",
|
||||||
|
_model_override_explicit=True,
|
||||||
|
)
|
||||||
|
payload = _last_post(captured)["json"]
|
||||||
|
assert payload["model"] == "grok-imagine-video"
|
||||||
|
|
||||||
def test_reference_images_payload(self, xai_provider):
|
def test_reference_images_payload(self, xai_provider):
|
||||||
provider, captured = xai_provider
|
provider, captured = xai_provider
|
||||||
provider.generate(
|
provider.generate(
|
||||||
|
|||||||
@ -82,7 +82,7 @@ def matrix_env(tmp_path, monkeypatch):
|
|||||||
return _Resp({
|
return _Resp({
|
||||||
"status": "done",
|
"status": "done",
|
||||||
"video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
|
"video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
|
||||||
"model": "grok-imagine-video",
|
"model": xai_calls[-1]["json"].get("model", "grok-imagine-video"),
|
||||||
})
|
})
|
||||||
import plugins.video_gen.xai as xai_plugin
|
import plugins.video_gen.xai as xai_plugin
|
||||||
monkeypatch.setattr(xai_plugin.httpx, "AsyncClient", lambda: _Client())
|
monkeypatch.setattr(xai_plugin.httpx, "AsyncClient", lambda: _Client())
|
||||||
@ -202,6 +202,7 @@ def test_xai_text_only_via_tool_surface(matrix_env):
|
|||||||
assert len(xai_calls) == 1
|
assert len(xai_calls) == 1
|
||||||
assert xai_calls[0]["url"].endswith("/videos/generations")
|
assert xai_calls[0]["url"].endswith("/videos/generations")
|
||||||
payload = xai_calls[0]["json"] or {}
|
payload = xai_calls[0]["json"] or {}
|
||||||
|
assert payload["model"] == "grok-imagine-video"
|
||||||
assert "image" not in payload
|
assert "image" not in payload
|
||||||
assert "reference_images" not in payload
|
assert "reference_images" not in payload
|
||||||
|
|
||||||
@ -221,6 +222,26 @@ def test_xai_text_plus_image_via_tool_surface(matrix_env):
|
|||||||
assert len(xai_calls) == 1
|
assert len(xai_calls) == 1
|
||||||
assert xai_calls[0]["url"].endswith("/videos/generations")
|
assert xai_calls[0]["url"].endswith("/videos/generations")
|
||||||
payload = xai_calls[0]["json"] or {}
|
payload = xai_calls[0]["json"] or {}
|
||||||
|
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||||
|
assert payload["image"] == {"url": "https://example.com/img.png"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_xai_explicit_model_override_via_tool_surface(matrix_env):
|
||||||
|
home, _, xai_calls = matrix_env
|
||||||
|
|
||||||
|
result = _invoke_tool(
|
||||||
|
home,
|
||||||
|
{"video_gen": {"provider": "xai"}},
|
||||||
|
{
|
||||||
|
"prompt": "animate this",
|
||||||
|
"image_url": "https://example.com/img.png",
|
||||||
|
"model": "grok-imagine-video",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
payload = xai_calls[0]["json"] or {}
|
||||||
|
assert payload["model"] == "grok-imagine-video"
|
||||||
assert payload["image"] == {"url": "https://example.com/img.png"}
|
assert payload["image"] == {"url": "https://example.com/img.png"}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -336,6 +336,7 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
|
|||||||
|
|
||||||
kwargs: Dict[str, Any] = {
|
kwargs: Dict[str, Any] = {
|
||||||
"model": model,
|
"model": model,
|
||||||
|
"_model_override_explicit": bool(model_override),
|
||||||
"image_url": image_url,
|
"image_url": image_url,
|
||||||
"reference_image_urls": reference_image_urls,
|
"reference_image_urls": reference_image_urls,
|
||||||
"duration": duration,
|
"duration": duration,
|
||||||
|
|||||||
@ -182,7 +182,8 @@ The `x_search` toolset auto-enables whenever xAI credentials (a SuperGrok / X Pr
|
|||||||
| Chat | `grok-4.20-multi-agent-0309` | Multi-agent variant |
|
| Chat | `grok-4.20-multi-agent-0309` | Multi-agent variant |
|
||||||
| Image | `grok-imagine-image` | Default; ~5–10 s |
|
| Image | `grok-imagine-image` | Default; ~5–10 s |
|
||||||
| Image | `grok-imagine-image-quality` | Higher fidelity; ~10–20 s |
|
| Image | `grok-imagine-image-quality` | Higher fidelity; ~10–20 s |
|
||||||
| Video | `grok-imagine-video` | Text-to-video and image-to-video; up to 7 reference images |
|
| Video | `grok-imagine-video` | Text-to-video |
|
||||||
|
| Video | `grok-imagine-video-1.5-preview` | Image-to-video; dated alias `grok-imagine-video-1.5-2026-05-30` |
|
||||||
| TTS | (default voice) | xAI `/v1/tts` endpoint |
|
| TTS | (default voice) | xAI `/v1/tts` endpoint |
|
||||||
|
|
||||||
The chat catalog is derived live from the on-disk `models.dev` cache; new xAI releases appear automatically once that cache refreshes. `grok-4.3` is always pinned to the top of the list.
|
The chat catalog is derived live from the on-disk `models.dev` cache; new xAI releases appear automatically once that cache refreshes. `grok-4.3` is always pinned to the top of the list.
|
||||||
|
|||||||
@ -180,7 +180,8 @@ hermes tools
|
|||||||
| 对话 | `grok-4.20-multi-agent-0309` | 多 agent 变体 |
|
| 对话 | `grok-4.20-multi-agent-0309` | 多 agent 变体 |
|
||||||
| 图像 | `grok-imagine-image` | 默认;约 5–10 秒 |
|
| 图像 | `grok-imagine-image` | 默认;约 5–10 秒 |
|
||||||
| 图像 | `grok-imagine-image-quality` | 更高保真度;约 10–20 秒 |
|
| 图像 | `grok-imagine-image-quality` | 更高保真度;约 10–20 秒 |
|
||||||
| 视频 | `grok-imagine-video` | 文本转视频和图像转视频;最多 7 张参考图像 |
|
| 视频 | `grok-imagine-video` | 文本转视频 |
|
||||||
|
| 视频 | `grok-imagine-video-1.5-preview` | 图像转视频;日期别名 `grok-imagine-video-1.5-2026-05-30` |
|
||||||
| TTS | (默认音色) | xAI `/v1/tts` 端点 |
|
| TTS | (默认音色) | xAI `/v1/tts` 端点 |
|
||||||
|
|
||||||
对话模型目录从磁盘上的 `models.dev` 缓存实时获取;缓存刷新后,新的 xAI 模型会自动出现。`grok-4.3` 始终固定在列表顶部。
|
对话模型目录从磁盘上的 `models.dev` 缓存实时获取;缓存刷新后,新的 xAI 模型会自动出现。`grok-4.3` 始终固定在列表顶部。
|
||||||
@ -266,4 +267,4 @@ hermes auth logout xai-oauth
|
|||||||
- [AI Providers 参考](../integrations/providers.md)
|
- [AI Providers 参考](../integrations/providers.md)
|
||||||
- [环境变量](../reference/environment-variables.md)
|
- [环境变量](../reference/environment-variables.md)
|
||||||
- [配置](../user-guide/configuration.md)
|
- [配置](../user-guide/configuration.md)
|
||||||
- [语音与 TTS](../user-guide/features/tts.md)
|
- [语音与 TTS](../user-guide/features/tts.md)
|
||||||
|
|||||||
Reference in New Issue
Block a user