From d4e7b2fc198383d536f5e59173f822e652eda049 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 31 May 2026 00:11:52 -0700
Subject: [PATCH] fix(voice): allow /voice over SSH when a sound server is
 reachable (#35719)

SSH sessions hard-failed voice mode on the presence of SSH_* env vars
alone, even when a PulseAudio/PipeWire server is running on the host and
audio works (ffplay/aplay/pw-play -> pulseaudio). Probe the default
sound-server sockets (PULSE_SERVER unix path, PULSE_RUNTIME_PATH/native,
$XDG_RUNTIME_DIR/{pulse/native,pipewire-0}) and actually connect() so a
stale socket doesn't count; downgrade the SSH branch to a notice when
audio is reachable. Mirrors the existing Docker/WSL forwarding handling.

Fixes #35622
---
 tests/tools/test_voice_mode.py | 97 +++++++++++++++++++++++++++++++++-
 tools/voice_mode.py            | 72 +++++++++++++++++++++++--
 2 files changed, 165 insertions(+), 4 deletions(-)

diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py
index 2a2b77bae..8f6a8e677 100644
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@@ -72,6 +72,62 @@ def mock_sd(monkeypatch):
 # detect_audio_environment — WSL / SSH / Docker detection
 # ============================================================================
 
+class TestPulseSocketReachable:
+    def test_no_env_no_socket(self, monkeypatch):
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False)
+        monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False)
+        from tools.voice_mode import _pulse_socket_reachable
+        assert _pulse_socket_reachable() is False
+
+    def test_stale_socket_file_not_reachable(self, monkeypatch, tmp_path):
+        """A socket file with no listener should not count as reachable."""
+        import socket as _socket
+        sock_path = tmp_path / "pulse" / "native"
+        sock_path.parent.mkdir(parents=True)
+        # Create + bind, then close so the path is a stale socket file.
+        s = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM)
+        s.bind(str(sock_path))
+        s.close()
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False)
+        monkeypatch.setenv("XDG_RUNTIME_DIR", str(tmp_path))
+        from tools.voice_mode import _pulse_socket_reachable
+        assert _pulse_socket_reachable() is False
+
+    def test_listening_socket_reachable_via_xdg_runtime(self, monkeypatch, tmp_path):
+        """A live PulseAudio-style socket under XDG_RUNTIME_DIR is reachable (#35622)."""
+        import socket as _socket
+        sock_path = tmp_path / "pulse" / "native"
+        sock_path.parent.mkdir(parents=True)
+        server = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM)
+        server.bind(str(sock_path))
+        server.listen(1)
+        try:
+            monkeypatch.delenv("PULSE_SERVER", raising=False)
+            monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False)
+            monkeypatch.setenv("XDG_RUNTIME_DIR", str(tmp_path))
+            from tools.voice_mode import _pulse_socket_reachable
+            assert _pulse_socket_reachable() is True
+        finally:
+            server.close()
+
+    def test_listening_socket_reachable_via_pulse_server_env(self, monkeypatch, tmp_path):
+        import socket as _socket
+        sock_path = tmp_path / "native"
+        server = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM)
+        server.bind(str(sock_path))
+        server.listen(1)
+        try:
+            monkeypatch.delenv("PULSE_RUNTIME_PATH", raising=False)
+            monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False)
+            monkeypatch.setenv("PULSE_SERVER", f"unix:{sock_path}")
+            from tools.voice_mode import _pulse_socket_reachable
+            assert _pulse_socket_reachable() is True
+        finally:
+            server.close()
+
+
 class TestDetectAudioEnvironment:
     def test_clean_environment_is_available(self, monkeypatch):
         """No SSH, Docker, or WSL — should be available."""
@@ -88,8 +144,11 @@ class TestDetectAudioEnvironment:
         assert result["warnings"] == []
 
     def test_ssh_blocks_voice(self, monkeypatch):
-        """SSH environment should block voice mode."""
+        """SSH environment without a reachable sound server should block voice mode."""
         monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22")
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False)
+        monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False)
         monkeypatch.setattr("tools.voice_mode._import_audio",
                             lambda: (MagicMock(), MagicMock()))
 
@@ -98,12 +157,46 @@ class TestDetectAudioEnvironment:
         assert result["available"] is False
         assert any("SSH" in w for w in result["warnings"])
 
+    def test_ssh_with_pulse_server_allows_voice(self, monkeypatch):
+        """SSH with PULSE_SERVER set should NOT block voice mode (#35622)."""
+        monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22")
+        monkeypatch.setenv("PULSE_SERVER", "unix:/run/user/1002/pulse/native")
+        monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+        monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("SSH" in n for n in result.get("notices", []))
+
+    def test_ssh_with_reachable_pulse_socket_allows_voice(self, monkeypatch):
+        """SSH with a reachable PulseAudio socket (no env vars) allows voice (#35622)."""
+        monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22")
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False)
+        # User runs `pulseaudio &` locally on the SSH host: the default socket
+        # is reachable even though PULSE_SERVER is unset.
+        monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: True)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+        monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("SSH" in n for n in result.get("notices", []))
+
     def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path):
         """WSL without PULSE_SERVER should block voice mode."""
         monkeypatch.delenv("SSH_CLIENT", raising=False)
         monkeypatch.delenv("SSH_TTY", raising=False)
         monkeypatch.delenv("SSH_CONNECTION", raising=False)
         monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False)
         monkeypatch.setattr("tools.voice_mode._import_audio",
                             lambda: (MagicMock(), MagicMock()))
 
@@ -184,6 +277,7 @@ class TestDetectAudioEnvironment:
         monkeypatch.delenv("SSH_TTY", raising=False)
         monkeypatch.delenv("SSH_CONNECTION", raising=False)
         monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False)
 
         mock_sd = MagicMock()
         mock_sd.query_devices.side_effect = Exception("device query failed")
@@ -312,6 +406,7 @@ class TestDetectAudioEnvironment:
         monkeypatch.delenv("SSH_CONNECTION", raising=False)
         monkeypatch.delenv("PULSE_SERVER", raising=False)
         monkeypatch.delenv("PIPEWIRE_REMOTE", raising=False)
+        monkeypatch.setattr("tools.voice_mode._pulse_socket_reachable", lambda: False)
         monkeypatch.setattr("hermes_constants.is_container", lambda: True)
         monkeypatch.setattr("tools.voice_mode._import_audio",
                             lambda: (MagicMock(), MagicMock()))
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index e98fcef88..5d75f3c20 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -85,6 +85,59 @@ def _termux_voice_capture_available() -> bool:
     return _termux_microphone_command() is not None and _termux_api_app_installed()
 
 
+def _pulse_socket_reachable() -> bool:
+    """Return True if a PulseAudio/PipeWire socket is reachable on disk.
+
+    Covers the common case where a sound server runs locally (e.g. on a
+    remote SSH host) without ``PULSE_SERVER``/``PIPEWIRE_REMOTE`` being set --
+    the client just connects to the default socket under the runtime dir.
+    We look at ``PULSE_SERVER`` unix paths, ``PULSE_RUNTIME_PATH``, and
+    ``XDG_RUNTIME_DIR`` for a ``pulse/native`` or ``pipewire-0`` socket
+    (issue #35622).
+    """
+    import socket
+    import stat
+
+    candidates: List[str] = []
+
+    pulse_server = os.environ.get('PULSE_SERVER', '')
+    # PULSE_SERVER may be "unix:/path", "unix:/path;..." or a bare path.
+    for part in pulse_server.split(';'):
+        part = part.strip()
+        if part.startswith('unix:'):
+            candidates.append(part[len('unix:'):])
+
+    pulse_runtime = os.environ.get('PULSE_RUNTIME_PATH')
+    if pulse_runtime:
+        candidates.append(os.path.join(pulse_runtime, 'native'))
+
+    xdg_runtime = os.environ.get('XDG_RUNTIME_DIR')
+    if xdg_runtime:
+        candidates.append(os.path.join(xdg_runtime, 'pulse', 'native'))
+        candidates.append(os.path.join(xdg_runtime, 'pipewire-0'))
+
+    for path in candidates:
+        if not path:
+            continue
+        try:
+            if not stat.S_ISSOCK(os.stat(path).st_mode):
+                continue
+        except OSError:
+            continue
+        # Confirm the socket actually accepts a connection -- a stale socket
+        # file left by a dead server should not count as reachable.
+        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        try:
+            sock.settimeout(0.5)
+            sock.connect(path)
+            return True
+        except OSError:
+            continue
+        finally:
+            sock.close()
+    return False
+
+
 def detect_audio_environment() -> dict:
     """Detect if the current environment supports audio I/O.
 
@@ -98,12 +151,25 @@ def detect_audio_environment() -> dict:
     termux_app_installed = _termux_api_app_installed()
     termux_capture = bool(termux_mic_cmd and termux_app_installed)
     has_forwarded_audio = bool(
-        os.environ.get('PULSE_SERVER') or os.environ.get('PIPEWIRE_REMOTE')
+        os.environ.get('PULSE_SERVER')
+        or os.environ.get('PIPEWIRE_REMOTE')
+        or _pulse_socket_reachable()
     )
 
-    # SSH detection
+    # SSH detection -- normally no audio devices, but honor a reachable
+    # sound server (PulseAudio/PipeWire socket or forwarding env vars), which
+    # works fine over SSH (issue #35622).
     if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
-        warnings.append("Running over SSH -- no audio devices available")
+        if has_forwarded_audio:
+            notices.append("Running over SSH with a reachable PulseAudio/PipeWire sound server")
+        else:
+            warnings.append(
+                "Running over SSH -- no audio devices available.\n"
+                "  If a sound server (PulseAudio/PipeWire) is running on this host,\n"
+                "  point Hermes at it, e.g.:\n"
+                "    export XDG_RUNTIME_DIR=/run/user/$(id -u)\n"
+                "    # or: export PULSE_SERVER=unix:$XDG_RUNTIME_DIR/pulse/native"
+            )
 
     # Docker/Podman container detection — honor host audio forwarding.
     # When the user mounts a PulseAudio/PipeWire socket into the container