From 899ee8c23dfd029fdfd7b669ac3ac82fd8388f55 Mon Sep 17 00:00:00 2001 From: Brian Doherty <76168809+brian-doherty@users.noreply.github.com> Date: Thu, 4 Jun 2026 19:08:54 -0700 Subject: [PATCH] fix(gateway): tolerate non-UTF-8 status/pid files in gateway status reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_read_json_file` caught OSError but not UnicodeDecodeError, so a status file holding binary/non-UTF-8 bytes (truncated or clobbered write) would crash the gateway status path instead of being treated as unreadable. UnicodeDecodeError is a ValueError subclass, not an OSError, so it escaped the existing guard. Widen the catch to (OSError, UnicodeDecodeError) at both read sites in gateway/status.py — `_read_json_file` and the sibling `_read_pid_record`, which had the identical gap. Adds tests covering binary input (returns None) and valid input (still parses) for both. Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- gateway/status.py | 10 +++++++--- tests/gateway/test_status.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/gateway/status.py b/gateway/status.py index 935758b90..8d2640af0 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -227,7 +227,10 @@ def _read_json_file(path: Path) -> Optional[dict[str, Any]]: return None try: raw = path.read_text(encoding="utf-8").strip() - except OSError: + except (OSError, UnicodeDecodeError): + # OSError: file vanished or permission flipped between exists() and + # read. UnicodeDecodeError: file holds non-UTF-8 / binary garbage + # (a truncated or clobbered status file). Either way it's unusable. return None if not raw: return None @@ -249,8 +252,9 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]: try: raw = pid_path.read_text().strip() - except OSError: - # File was deleted between exists() and read_text(), or permission flipped. + except (OSError, UnicodeDecodeError): + # File was deleted between exists() and read_text(), permission + # flipped, or it holds non-UTF-8 / binary garbage. return None if not raw: return None diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py index ae378e0b7..bbf9d9570 100644 --- a/tests/gateway/test_status.py +++ b/tests/gateway/test_status.py @@ -1036,3 +1036,28 @@ class TestReadProcessCmdlinePsFallback: ) result = status._read_process_cmdline(12345) assert "hermes_cli/main.py" in result + + +class TestCorruptStatusFiles: + """A status / pid file holding non-UTF-8 (binary) bytes must read as + None, not crash the gateway status path with UnicodeDecodeError.""" + + def test_read_json_file_returns_none_on_binary_garbage(self, tmp_path): + p = tmp_path / "runtime.json" + p.write_bytes(b"\xff\xfe\x00\x80not utf-8\x81") + assert status._read_json_file(p) is None + + def test_read_json_file_still_parses_valid_json(self, tmp_path): + p = tmp_path / "runtime.json" + p.write_text(json.dumps({"pid": 7}), encoding="utf-8") + assert status._read_json_file(p) == {"pid": 7} + + def test_read_pid_record_returns_none_on_binary_garbage(self, tmp_path): + p = tmp_path / "gateway.pid" + p.write_bytes(b"\xff\xfe\x00\x80\x81") + assert status._read_pid_record(p) is None + + def test_read_pid_record_still_parses_bare_pid(self, tmp_path): + p = tmp_path / "gateway.pid" + p.write_text("4242", encoding="utf-8") + assert status._read_pid_record(p) == {"pid": 4242}