fix(gateway): route /background result media by type

Background-task (/background, /btw) result media now routes to the type-specific sender — TTS clip → voice bubble, video → send_video, image → send_image_file — instead of forcing everything through send_document. Mirrors the streaming + kanban delivery paths and reuses base.should_send_media_as_audio for the Telegram OGG nuance. Co-authored-by: LJ Li <liliangjya@gmail.com> Co-authored-by: Kolektori <256073454+Kolektori@users.noreply.github.com>
2026-06-02 12:50:36 -07:00
parent 30a7a94120
commit 082025abcd
3 changed files with 116 additions and 6 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -12508,14 +12508,41 @@ class GatewayRunner:
                    except Exception:
                        pass

-                # Send media files
+                # Send media files, routing each by type so a TTS clip
+                # arrives as a voice bubble / a clip as a video rather than
+                # a generic document. Mirrors the streaming + kanban paths.
+                from gateway.platforms.base import (
+                    should_send_media_as_audio as _should_send_media_as_audio,
+                )
+                _IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
+                _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
                for media_path, _is_voice in (media_files or []):
+                    _ext = os.path.splitext(media_path)[1].lower()
                    try:
-                        await adapter.send_document(
-                            chat_id=source.chat_id,
-                            file_path=media_path,
-                            metadata=_thread_metadata,
-                        )
+                        if _should_send_media_as_audio(source.platform, _ext, _is_voice):
+                            await adapter.send_voice(
+                                chat_id=source.chat_id,
+                                audio_path=media_path,
+                                metadata=_thread_metadata,
+                            )
+                        elif _ext in _VIDEO_EXTS:
+                            await adapter.send_video(
+                                chat_id=source.chat_id,
+                                video_path=media_path,
+                                metadata=_thread_metadata,
+                            )
+                        elif _ext in _IMAGE_EXTS:
+                            await adapter.send_image_file(
+                                chat_id=source.chat_id,
+                                image_path=media_path,
+                                metadata=_thread_metadata,
+                            )
+                        else:
+                            await adapter.send_document(
+                                chat_id=source.chat_id,
+                                file_path=media_path,
+                                metadata=_thread_metadata,
+                            )
                    except Exception:
                        pass
            else:
--- a/scripts/release.py
+++ b/scripts/release.py
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"

 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "liliangjya@gmail.com": "truenorth-lj",
    "ben.bartholomew@vectorize.io": "benfrank241",
    "74339271+SaguaroDev@users.noreply.github.com": "SaguaroDev",
    "subw3@mail2.sysu.edu.cn": "Subway2023",
--- a/tests/gateway/test_background_command.py
+++ b/tests/gateway/test_background_command.py
@ -267,6 +267,88 @@ class TestRunBackgroundTask:
        mock_agent_instance.shutdown_memory_provider.assert_called_once()
        mock_agent_instance.close.assert_called_once()

+    @pytest.mark.asyncio
+    async def test_media_files_routed_by_type(self, monkeypatch):
+        """Result media is routed to the type-specific sender, not send_document.
+
+        A TTS clip should arrive as a voice bubble, a video as a video, an
+        image as a native image, and everything else as a document.
+        """
+        from gateway import run as gateway_run
+
+        runner = _make_runner()
+        runner._resolve_session_agent_runtime = MagicMock(
+            return_value=("test-model", {"api_key": "test-key"})
+        )
+        runner._resolve_session_reasoning_config = MagicMock(return_value=None)
+        runner._load_service_tier = MagicMock(return_value=None)
+        runner._resolve_turn_agent_config = MagicMock(
+            return_value={
+                "model": "test-model",
+                "runtime": {"api_key": "test-key"},
+                "request_overrides": None,
+            }
+        )
+        runner._run_in_executor_with_context = AsyncMock(
+            return_value={"final_response": "see attached", "messages": []}
+        )
+        monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
+
+        # Four real files so the media-delivery path validator accepts them
+        # (default mode requires the file to exist as a regular file).
+        import os as _os
+        import tempfile as _tempfile
+        _tmpdir = _tempfile.mkdtemp(prefix="bg_media_")
+        _ogg = _os.path.join(_tmpdir, "clip.ogg")
+        _mp4 = _os.path.join(_tmpdir, "render.mp4")
+        _png = _os.path.join(_tmpdir, "chart.png")
+        _pdf = _os.path.join(_tmpdir, "report.pdf")
+        for _p in (_ogg, _mp4, _png, _pdf):
+            with open(_p, "wb") as _fh:
+                _fh.write(b"x")
+        # ogg flagged as voice, mp4 video, png image, pdf doc.
+        media = [
+            (_ogg, True),
+            (_mp4, False),
+            (_png, False),
+            (_pdf, False),
+        ]
+
+        mock_adapter = AsyncMock()
+        mock_adapter.send = AsyncMock()
+        mock_adapter.send_voice = AsyncMock()
+        mock_adapter.send_video = AsyncMock()
+        mock_adapter.send_image_file = AsyncMock()
+        mock_adapter.send_document = AsyncMock()
+        mock_adapter.send_image = AsyncMock()
+        # No text, no markdown images — just the four media attachments.
+        mock_adapter.extract_media = MagicMock(return_value=(media, ""))
+        mock_adapter.extract_images = MagicMock(return_value=([], ""))
+        # Non-telegram platform so every audio ext routes through send_voice.
+        runner.adapters[Platform.DISCORD] = mock_adapter
+
+        source = SessionSource(
+            platform=Platform.DISCORD,
+            user_id="12345",
+            chat_id="67890",
+            user_name="testuser",
+        )
+
+        try:
+            await runner._run_background_task("make stuff", source, "bg_test")
+
+            mock_adapter.send_voice.assert_called_once()
+            assert mock_adapter.send_voice.call_args.kwargs["audio_path"] == _ogg
+            mock_adapter.send_video.assert_called_once()
+            assert mock_adapter.send_video.call_args.kwargs["video_path"] == _mp4
+            mock_adapter.send_image_file.assert_called_once()
+            assert mock_adapter.send_image_file.call_args.kwargs["image_path"] == _png
+            mock_adapter.send_document.assert_called_once()
+            assert mock_adapter.send_document.call_args.kwargs["file_path"] == _pdf
+        finally:
+            import shutil as _shutil
+            _shutil.rmtree(_tmpdir, ignore_errors=True)
+
    @pytest.mark.asyncio
    async def test_telegram_dm_topic_completion_preserves_reply_anchor_metadata(self, monkeypatch):
        """Background completion metadata must let Telegram send thread id plus reply id."""