From d4e7b2fc198383d536f5e59173f822e652eda049 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 31 May 2026 00:11:52 -0700 Subject: [PATCH] fix(voice): allow /voice over SSH when a sound server is reachable (#35719) SSH sessions hard-failed voice mode on the presence of SSH_* env vars alone, even when a PulseAudio/PipeWire server is running on the host and audio works (ffplay/aplay/pw-play -> pulseaudio). Probe the default sound-server sockets (PULSE_SERVER unix path, PULSE_RUNTIME_PATH/native, $XDG_RUNTIME_DIR/{pulse/native,pipewire-0}) and actually connect() so a stale socket doesn't count; downgrade the SSH branch to a notice when audio is reachable. Mirrors the existing Docker/WSL forwarding handling. Fixes #35622 --- tests/tools/test_voice_mode.py | 97 +++++++++++++++++++++++++++++++++- tools/voice_mode.py | 72 +++++++++++++++++++++++-- 2 files changed, 165 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py index 2a2b77bae..8f6a8e677 100644 --- a/tests/tools/test_voice_mode.py +++ b/tests/tools/test_voice_mode.py @@ -72,6 +72,62 @@ def mock_sd(monkeypatch): # detect_audio_environment — WSL / SSH / Docker detection # ============================================================================ +class TestPulseSocketReachable: + def test_no_env_no_socket(self, monkeypatch): + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False) + monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False) + from tools.voice_mode import _pulse_socket_reachable + assert _pulse_socket_reachable() is False + + def test_stale_socket_file_not_reachable(self, monkeypatch, tmp_path): + """A socket file with no listener should not count as reachable.""" + import socket as _socket + sock_path = tmp_path / "pulse" / "native" + sock_path.parent.mkdir(parents=True) + # Create + bind, then close so the path is a stale socket file. + s = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) + s.bind(str(sock_path)) + s.close() + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False) + monkeypatch.setenv("XDG_RUNTIME_DIR", str(tmp_path)) + from tools.voice_mode import _pulse_socket_reachable + assert _pulse_socket_reachable() is False + + def test_listening_socket_reachable_via_xdg_runtime(self, monkeypatch, tmp_path): + """A live PulseAudio-style socket under XDG_RUNTIME_DIR is reachable (#35622).""" + import socket as _socket + sock_path = tmp_path / "pulse" / "native" + sock_path.parent.mkdir(parents=True) + server = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) + server.bind(str(sock_path)) + server.listen(1) + try: + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False) + monkeypatch.setenv("XDG_RUNTIME_DIR", str(tmp_path)) + from tools.voice_mode import _pulse_socket_reachable + assert _pulse_socket_reachable() is True + finally: + server.close() + + def test_listening_socket_reachable_via_pulse_server_env(self, monkeypatch, tmp_path): + import socket as _socket + sock_path = tmp_path / "native" + server = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) + server.bind(str(sock_path)) + server.listen(1) + try: + monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False) + monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False) + monkeypatch.setenv("PULSE_SERVER", f"unix:{sock_path}") + from tools.voice_mode import _pulse_socket_reachable + assert _pulse_socket_reachable() is True + finally: + server.close() + + class TestDetectAudioEnvironment: def test_clean_environment_is_available(self, monkeypatch): """No SSH, Docker, or WSL — should be available.""" @@ -88,8 +144,11 @@ class TestDetectAudioEnvironment: assert result["warnings"] == [] def test_ssh_blocks_voice(self, monkeypatch): - """SSH environment should block voice mode.""" + """SSH environment without a reachable sound server should block voice mode.""" monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22") + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False) + monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False) monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (MagicMock(), MagicMock())) @@ -98,12 +157,46 @@ class TestDetectAudioEnvironment: assert result["available"] is False assert any("SSH" in w for w in result["warnings"]) + def test_ssh_with_pulse_server_allows_voice(self, monkeypatch): + """SSH with PULSE_SERVER set should NOT block voice mode (#35622).""" + monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22") + monkeypatch.setenv("PULSE_SERVER", "unix:/run/user/1002/pulse/native") + monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False) + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open)) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + assert result["available"] is True + assert result["warnings"] == [] + assert any("SSH" in n for n in result.get("notices", [])) + + def test_ssh_with_reachable_pulse_socket_allows_voice(self, monkeypatch): + """SSH with a reachable PulseAudio socket (no env vars) allows voice (#35622).""" + monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22") + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False) + # User runs `pulseaudio &` locally on the SSH host: the default socket + # is reachable even though PULSE_SERVER is unset. + monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: True) + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open)) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + assert result["available"] is True + assert result["warnings"] == [] + assert any("SSH" in n for n in result.get("notices", [])) + def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path): """WSL without PULSE_SERVER should block voice mode.""" monkeypatch.delenv("SSH_CLIENT", raising=False) monkeypatch.delenv("SSH_TTY", raising=False) monkeypatch.delenv("SSH_CONNECTION", raising=False) monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False) monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (MagicMock(), MagicMock())) @@ -184,6 +277,7 @@ class TestDetectAudioEnvironment: monkeypatch.delenv("SSH_TTY", raising=False) monkeypatch.delenv("SSH_CONNECTION", raising=False) monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False) mock_sd = MagicMock() mock_sd.query_devices.side_effect = Exception("device query failed") @@ -312,6 +406,7 @@ class TestDetectAudioEnvironment: monkeypatch.delenv("SSH_CONNECTION", raising=False) monkeypatch.delenv("PULSE_SERVER", raising=False) monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False) + monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False) monkeypatch.setattr("hermes_constants.is_container", lambda: True) monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (MagicMock(), MagicMock())) diff --git a/tools/voice_mode.py b/tools/voice_mode.py index e98fcef88..5d75f3c20 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -85,6 +85,59 @@ def _termux_voice_capture_available() -> bool: return _termux_microphone_command() is not None and _termux_api_app_installed() +def _pulse_socket_reachable() -> bool: + """Return True if a PulseAudio/PipeWire socket is reachable on disk. + + Covers the common case where a sound server runs locally (e.g. on a + remote SSH host) without ``PULSE_SERVER``/``PIPEWIRE_REMOTE`` being set -- + the client just connects to the default socket under the runtime dir. + We look at ``PULSE_SERVER`` unix paths, ``PULSE_RUNTIME_PATH``, and + ``XDG_RUNTIME_DIR`` for a ``pulse/native`` or ``pipewire-0`` socket + (issue #35622). + """ + import socket + import stat + + candidates: List[str] = [] + + pulse_server = os.environ.get('PULSE_SERVER', '') + # PULSE_SERVER may be "unix:/path", "unix:/path;..." or a bare path. + for part in pulse_server.split(';'): + part = part.strip() + if part.startswith('unix:'): + candidates.append(part[len('unix:'):]) + + pulse_runtime = os.environ.get('PULSE_RUNTIME_PATH') + if pulse_runtime: + candidates.append(os.path.join(pulse_runtime, 'native')) + + xdg_runtime = os.environ.get('XDG_RUNTIME_DIR') + if xdg_runtime: + candidates.append(os.path.join(xdg_runtime, 'pulse', 'native')) + candidates.append(os.path.join(xdg_runtime, 'pipewire-0')) + + for path in candidates: + if not path: + continue + try: + if not stat.S_ISSOCK(os.stat(path).st_mode): + continue + except OSError: + continue + # Confirm the socket actually accepts a connection -- a stale socket + # file left by a dead server should not count as reachable. + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + sock.settimeout(0.5) + sock.connect(path) + return True + except OSError: + continue + finally: + sock.close() + return False + + def detect_audio_environment() -> dict: """Detect if the current environment supports audio I/O. @@ -98,12 +151,25 @@ def detect_audio_environment() -> dict: termux_app_installed = _termux_api_app_installed() termux_capture = bool(termux_mic_cmd and termux_app_installed) has_forwarded_audio = bool( - os.environ.get('PULSE_SERVER') or os.environ.get('PIPEWIRE_REMOTE') + os.environ.get('PULSE_SERVER') + or os.environ.get('PIPEWIRE_REMOTE') + or _pulse_socket_reachable() ) - # SSH detection + # SSH detection -- normally no audio devices, but honor a reachable + # sound server (PulseAudio/PipeWire socket or forwarding env vars), which + # works fine over SSH (issue #35622). if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')): - warnings.append("Running over SSH -- no audio devices available") + if has_forwarded_audio: + notices.append("Running over SSH with a reachable PulseAudio/PipeWire sound server") + else: + warnings.append( + "Running over SSH -- no audio devices available.\n" + " If a sound server (PulseAudio/PipeWire) is running on this host,\n" + " point Hermes at it, e.g.:\n" + " export XDG_RUNTIME_DIR=/run/user/$(id -u)\n" + " # or: export PULSE_SERVER=unix:$XDG_RUNTIME_DIR/pulse/native" + ) # Docker/Podman container detection — honor host audio forwarding. # When the user mounts a PulseAudio/PipeWire socket into the container