From e3313c50a7333b96c6a695f2e00b41f87ce4ad59 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 3 Jun 2026 19:37:04 -0700 Subject: [PATCH] feat(dashboard): add Debug Share to the System page (#38600) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Port from google-gemini/gemini-cli#21541: back up corrupted config.yaml When config.yaml fails to parse, load_config() silently falls back to DEFAULT_CONFIG and leaves the broken file on disk. If the user then re-runs the setup wizard or hermes config set (both rewrite config.yaml), their broken-but-recoverable overrides are lost for good. Adapts the policy-file recovery from gemini-cli#21541: on the first parse warning for a given broken file, snapshot it to config.yaml.corrupt..bak (best-effort, symlink-guarded, size-deduped) and tell the user where it landed. Unlike Gemini's version we deliberately do NOT reset config.yaml to a clean state — hermes never silently mutates user config, and leaving it means a hand-fixed file is re-read on the next load. Tests: 3 new cases (backup created + content preserved + original untouched; same-size backup dedup; symlink not copied). E2E verified with isolated HERMES_HOME and a real tab-indented broken config. * feat(dashboard): add Debug Share to the System page Surface `hermes debug share` in the dashboard. The System > Operations section gets a dedicated card that uploads a redacted report + full logs and returns the paste URLs as real, copyable links instead of a log tail. - debug.py: factor a pure build_debug_share() returning structured {urls, failures, redacted, auto_delete_seconds}; run_debug_share now calls it (CLI output unchanged). - web_server.py: POST /api/ops/debug-share runs the share core in a worker thread and returns the structured payload synchronously (the URLs are the whole point — not a backgrounded action). - api.ts: runDebugShare() + DebugShareResponse. - SystemPage.tsx: share card with a redaction toggle (on by default), per-link + copy-all buttons, and the 6h auto-delete countdown. - tests: build_debug_share core + endpoint (redact toggle, failure 502, token gate). --- hermes_cli/config.py | 66 +++++- hermes_cli/debug.py | 190 ++++++++++++------ hermes_cli/web_server.py | 45 +++++ tests/hermes_cli/test_config.py | 64 ++++++ .../test_dashboard_admin_endpoints.py | 93 +++++++++ tests/hermes_cli/test_debug.py | 107 ++++++++++ web/src/lib/api.ts | 19 ++ web/src/pages/SystemPage.tsx | 177 ++++++++++++++++ 8 files changed, 696 insertions(+), 65 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 61996a8fd..1b72c3152 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -17,11 +17,13 @@ import logging import os import platform import re +import shutil import stat import subprocess import sys import tempfile import threading +import time from dataclasses import dataclass from pathlib import Path from typing import Dict, Any, Optional, List, Tuple @@ -36,6 +38,60 @@ logger = logging.getLogger(__name__) _CONFIG_PARSE_WARNED: set = set() +def _backup_corrupt_config(config_path: Path) -> Optional[Path]: + """Preserve a corrupted ``config.yaml`` by copying it to a timestamped ``.bak``. + + When the YAML can't be parsed, ``load_config()`` silently falls back to + ``DEFAULT_CONFIG`` and the user's broken file stays on disk untouched. + That file is still the user's only copy of their intended overrides — if + they re-run the setup wizard or ``hermes config set`` (which rewrites + ``config.yaml``), the broken-but-recoverable content is gone for good. + + This snapshots the corrupted file to ``config.yaml.corrupt..bak`` so + the user can diff/repair it. Unlike Gemini CLI's policy-file recovery + (which resets the live file to a clean state), we deliberately leave + ``config.yaml`` in place: hermes never silently mutates the user's config, + and leaving it means a hand-fixed file is re-read on the next load. The + backup is best-effort — any failure (permissions, symlink, disk full) is + swallowed so config loading is never blocked by backup problems. + + Returns the backup path on success, else ``None``. Symlinks are not + followed/copied (mirrors the Gemini #21541 lstat guard) to avoid + clobbering whatever a malicious/misconfigured symlink points at. + """ + try: + if config_path.is_symlink(): + return None + st = config_path.stat() + if st.st_size == 0: + # Empty file isn't worth preserving and yaml.safe_load returns {} + # for it anyway (so it wouldn't reach here), but guard regardless. + return None + ts = time.strftime("%Y%m%d-%H%M%S") + backup_path = config_path.with_name(f"{config_path.name}.corrupt.{ts}.bak") + # Don't clobber an existing backup from the same second; if there's + # already a corrupt backup for this exact mtime, assume we've snapshotted + # this corruption already and skip (the dedup cache normally prevents a + # second call, but a process restart can clear it). + sibling_baks = list( + config_path.parent.glob(f"{config_path.name}.corrupt.*.bak") + ) + for existing in sibling_baks: + try: + if existing.stat().st_size == st.st_size: + # Same size as the current broken file — likely the same + # corruption already preserved. Avoid backup churn. + return None + except OSError: + continue + if backup_path.exists(): + return None + shutil.copy2(config_path, backup_path) + return backup_path + except Exception: + return None + + def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None: """Surface a config.yaml parse failure to user, log, and stderr. @@ -48,7 +104,11 @@ def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None: Now: warn once per (path, mtime_ns, size) on stderr **and** in ``agent.log`` / ``errors.log`` at WARNING level so ``hermes logs`` surfaces it. Re-warns automatically if the file changes (different - mtime/size), so users editing the config see the next failure. + mtime/size), so users editing the config see the next failure. On the + first warning for a given broken file we also snapshot it to a + timestamped ``.bak`` (best-effort) so the user's recoverable content + survives any later rewrite of ``config.yaml`` by the setup wizard or + ``hermes config set``. """ try: st = config_path.stat() @@ -59,12 +119,16 @@ def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None: return _CONFIG_PARSE_WARNED.add(key) + backup_path = _backup_corrupt_config(config_path) + msg = ( f"Failed to parse {config_path}: {exc}. " f"Falling back to default config — every user override " f"(auxiliary providers, fallback chain, model settings) is being IGNORED. " f"Fix the YAML and restart." ) + if backup_path is not None: + msg += f" A copy of the corrupted file was saved to {backup_path}." logger.warning(msg) try: sys.stderr.write(f"⚠️ hermes config: {msg}\n") diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py index 78ba31983..809676d1f 100644 --- a/hermes_cli/debug.py +++ b/hermes_cli/debug.py @@ -585,20 +585,41 @@ def collect_debug_report( # CLI entry points # --------------------------------------------------------------------------- -def run_debug_share(args): - """Collect debug report + full logs, upload each, print URLs.""" +@dataclass +class DebugShareResult: + """Structured outcome of a ``debug share`` upload. + + Returned by :func:`build_debug_share` so non-CLI callers (the dashboard + web server, gateway) can render the uploaded paste URLs as real links + instead of scraping printed text. + """ + + urls: dict # label -> paste URL (e.g. {"Report": "...", "agent.log": "..."}) + failures: list # human-readable "label: error" strings for optional uploads + redacted: bool # whether force-mode redaction was applied before upload + auto_delete_seconds: int # how long until the pastes auto-delete + report: str = "" # the summary report text (kept for local fallback) + + +def build_debug_share( + *, + log_lines: int = 200, + expiry: int = 7, + redact: bool = True, +) -> DebugShareResult: + """Collect the debug report + full logs, upload each, return the URLs. + + This is the shared core behind ``hermes debug share`` (CLI) and the + dashboard ``POST /api/ops/debug-share`` endpoint. It performs blocking + network I/O (paste uploads) — callers inside an event loop must run it in + a worker thread. + + The summary report upload is required: on failure this raises + ``RuntimeError``. Full-log uploads are best-effort; their errors are + collected into ``failures`` rather than raised. + """ _best_effort_sweep_expired_pastes() - log_lines = getattr(args, "lines", 200) - expiry = getattr(args, "expire", 7) - local_only = getattr(args, "local", False) - redact = not getattr(args, "no_redact", False) - - if not local_only: - print(_PRIVACY_NOTICE) - - print("Collecting debug report...") - # Capture dump once — prepended to every paste for context. # The dump is already redacted at extract time via dump.py:_redact; # log_snapshots are redacted by _capture_default_log_snapshots when @@ -639,71 +660,112 @@ def run_debug_share(args): if desktop_log: desktop_log = _REDACTION_BANNER + desktop_log - if local_only: - print(report) - if agent_log: - print(f"\n\n{'=' * 60}") - print("FULL agent.log") - print(f"{'=' * 60}\n") - print(agent_log) - if gateway_log: - print(f"\n\n{'=' * 60}") - print("FULL gateway.log") - print(f"{'=' * 60}\n") - print(gateway_log) - if desktop_log: - print(f"\n\n{'=' * 60}") - print("FULL desktop.log") - print(f"{'=' * 60}\n") - print(desktop_log) - return - - print("Uploading...") urls: dict[str, str] = {} failures: list[str] = [] - # 1. Summary report (required) + # 1. Summary report (required — raises on failure so callers can fall back) + urls["Report"] = upload_to_pastebin(report, expiry_days=expiry) + + # 2-4. Full logs (optional — failures are collected, not raised) + for label, content in ( + ("agent.log", agent_log), + ("gateway.log", gateway_log), + ("desktop.log", desktop_log), + ): + if not content: + continue + try: + urls[label] = upload_to_pastebin(content, expiry_days=expiry) + except Exception as exc: + failures.append(f"{label}: {exc}") + + # Schedule auto-deletion after 6 hours. + _schedule_auto_delete(list(urls.values())) + + return DebugShareResult( + urls=urls, + failures=failures, + redacted=redact, + auto_delete_seconds=_AUTO_DELETE_SECONDS, + report=report, + ) + + +def run_debug_share(args): + """Collect debug report + full logs, upload each, print URLs.""" + log_lines = getattr(args, "lines", 200) + expiry = getattr(args, "expire", 7) + local_only = getattr(args, "local", False) + redact = not getattr(args, "no_redact", False) + + if local_only: + # Local-only path never uploads — render the report to stdout and bail + # before any network I/O. Mirrors the upload path's collection logic. + _best_effort_sweep_expired_pastes() + print("Collecting debug report...") + dump_text = _capture_dump() + log_snapshots = _capture_default_log_snapshots(log_lines, redact=redact) + report = collect_debug_report( + log_lines=log_lines, + dump_text=dump_text, + log_snapshots=log_snapshots, + ) + agent_log = log_snapshots["agent"].full_text + gateway_log = log_snapshots["gateway"].full_text + desktop_log = log_snapshots["desktop"].full_text + if agent_log: + agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log + if gateway_log: + gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log + if desktop_log: + desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log + if redact: + report = _REDACTION_BANNER + report + if agent_log: + agent_log = _REDACTION_BANNER + agent_log + if gateway_log: + gateway_log = _REDACTION_BANNER + gateway_log + if desktop_log: + desktop_log = _REDACTION_BANNER + desktop_log + print(report) + for title, body in ( + ("FULL agent.log", agent_log), + ("FULL gateway.log", gateway_log), + ("FULL desktop.log", desktop_log), + ): + if body: + print(f"\n\n{'=' * 60}") + print(title) + print(f"{'=' * 60}\n") + print(body) + return + + print(_PRIVACY_NOTICE) + print("Collecting debug report...") + print("Uploading...") + try: - urls["Report"] = upload_to_pastebin(report, expiry_days=expiry) + result = build_debug_share( + log_lines=log_lines, + expiry=expiry, + redact=redact, + ) except RuntimeError as exc: print(f"\nUpload failed: {exc}", file=sys.stderr) - print("\nFull report printed below — copy-paste it manually:\n") - print(report) + print("\nRun `hermes debug share --local` to print the report instead.\n") sys.exit(1) - # 2. Full agent.log (optional) - if agent_log: - try: - urls["agent.log"] = upload_to_pastebin(agent_log, expiry_days=expiry) - except Exception as exc: - failures.append(f"agent.log: {exc}") - - # 3. Full gateway.log (optional) - if gateway_log: - try: - urls["gateway.log"] = upload_to_pastebin(gateway_log, expiry_days=expiry) - except Exception as exc: - failures.append(f"gateway.log: {exc}") - - # 4. Full desktop.log (optional — Electron app boot + backend output) - if desktop_log: - try: - urls["desktop.log"] = upload_to_pastebin(desktop_log, expiry_days=expiry) - except Exception as exc: - failures.append(f"desktop.log: {exc}") - # Print results - label_width = max(len(k) for k in urls) + label_width = max(len(k) for k in result.urls) print(f"\nDebug report uploaded:") - for label, url in urls.items(): + for label, url in result.urls.items(): print(f" {label:<{label_width}} {url}") - if failures: - print(f"\n (failed to upload: {', '.join(failures)})") + if result.failures: + print(f"\n (failed to upload: {', '.join(result.failures)})") - # Schedule auto-deletion after 6 hours - _schedule_auto_delete(list(urls.values())) - print(f"\n⏱ Pastes will auto-delete in 6 hours.") + hours = result.auto_delete_seconds // 3600 + print(f"\n⏱ Pastes will auto-delete in {hours} hours.") # Manual delete fallback print(f"To delete now: hermes debug delete ") diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index eb7b230dc..825b5d2b1 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -1016,6 +1016,51 @@ async def run_config_migrate(): return {"ok": True, "pid": proc.pid, "name": "config-migrate"} +class DebugShareRequest(BaseModel): + # Redaction is ON by default — force-mode scrubs credential-shaped tokens + # out of log content before it leaves the machine. The toggle exists so an + # operator who knows the logs are clean can opt out for fuller fidelity. + redact: bool = True + # Recent log lines included in the summary tail (full logs are separate). + lines: int = 200 + + +@app.post("/api/ops/debug-share") +async def run_debug_share_endpoint(body: DebugShareRequest | None = None): + """Upload a redacted debug report + full logs and return the paste URLs. + + Unlike the other diagnostics actions (doctor, dump, prompt-size) this is + *synchronous*: the whole point of ``debug share`` is the set of shareable + URLs it produces, so we run the upload in a worker thread and return the + structured ``{urls, failures, redacted, ...}`` payload directly. The + dashboard renders those as real, copyable links instead of scraping a log + tail. Pastes auto-delete after 6 hours (handled inside the share core). + """ + from hermes_cli.debug import build_debug_share + + req = body or DebugShareRequest() + try: + result = await asyncio.to_thread( + build_debug_share, + log_lines=max(1, min(int(req.lines), 5000)), + redact=bool(req.redact), + ) + except RuntimeError as exc: + # Required summary-report upload failed (offline / paste service down). + raise HTTPException(status_code=502, detail=f"Upload failed: {exc}") + except Exception as exc: + _log.exception("debug share failed") + raise HTTPException(status_code=500, detail=f"Failed: {exc}") + + return { + "ok": True, + "urls": result.urls, + "failures": result.failures, + "redacted": result.redacted, + "auto_delete_seconds": result.auto_delete_seconds, + } + + # --------------------------------------------------------------------------- # Gateway + update actions (invoked from the Status page). # diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 4659934db..288a5a257 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -157,6 +157,70 @@ class TestLoadConfigParseFailure: after_edit = capsys.readouterr().err assert "hermes config:" in after_edit, "edited file should re-warn" + def test_corrupt_config_is_backed_up(self, tmp_path, capsys): + """A broken config.yaml is snapshotted to a timestamped .bak so the + user's recoverable overrides survive a later wizard/config-set rewrite. + + Ported from google-gemini/gemini-cli#21541 (policy-file TOML recovery), + adapted: we back up but deliberately do NOT reset config.yaml. + """ + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + broken = "\tmodel: test/custom\nbroken indent:\n" + (tmp_path / "config.yaml").write_text(broken) + + load_config() + err = capsys.readouterr().err + + baks = list(tmp_path.glob("config.yaml.corrupt.*.bak")) + assert len(baks) == 1, f"expected one backup, got {baks}" + # Backup preserves the original broken content verbatim + assert baks[0].read_text() == broken + # Original config.yaml is left untouched (not reset to clean state) + assert (tmp_path / "config.yaml").read_text() == broken + # User is told where the backup landed + assert str(baks[0]) in err + + def test_backup_skips_when_same_size_bak_exists(self, tmp_path, capsys): + """Don't churn backups: if a corrupt backup of the same size already + exists (same corruption already preserved), skip making another.""" + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + broken = "\tbroken:\n" + cfg = tmp_path / "config.yaml" + cfg.write_text(broken) + + # Pre-existing backup of identical size simulates an earlier snapshot. + (tmp_path / "config.yaml.corrupt.20260101-000000.bak").write_text(broken) + + load_config() + + baks = list(tmp_path.glob("config.yaml.corrupt.*.bak")) + assert len(baks) == 1, f"should not add a second same-size backup, got {baks}" + + def test_corrupt_symlink_config_not_backed_up(self, tmp_path): + """Symlinked config.yaml is not copied (mirrors Gemini #21541 lstat + guard) — avoids clobbering whatever the symlink points at.""" + import sys as _sys + if _sys.platform == "win32": + pytest.skip("symlink creation requires privileges on Windows") + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + real = tmp_path / "real_config.yaml" + real.write_text("\tbroken:\n") + link = tmp_path / "config.yaml" + link.symlink_to(real) + + load_config() + + assert not list(tmp_path.glob("config.yaml.corrupt.*.bak")) + class TestSaveAndLoadRoundtrip: def test_roundtrip(self, tmp_path): diff --git a/tests/hermes_cli/test_dashboard_admin_endpoints.py b/tests/hermes_cli/test_dashboard_admin_endpoints.py index ec16734e7..1dec745b2 100644 --- a/tests/hermes_cli/test_dashboard_admin_endpoints.py +++ b/tests/hermes_cli/test_dashboard_admin_endpoints.py @@ -498,3 +498,96 @@ class TestUpdateCheckEndpoint: assert body["update_available"] is False assert body["message"] + +class TestDebugShareEndpoint: + """POST /api/ops/debug-share returns the paste URLs synchronously so the + dashboard can render them as copyable links (not a backgrounded log tail).""" + + @pytest.fixture(autouse=True) + def _setup(self, _isolate_hermes_home): + self.client, self.header = _client() + from hermes_constants import get_hermes_home + + logs = get_hermes_home() / "logs" + logs.mkdir(parents=True, exist_ok=True) + (logs / "agent.log").write_text("agent line\n") + (logs / "errors.log").write_text("err line\n") + (logs / "gateway.log").write_text("gw line\n") + + def test_returns_structured_urls(self, monkeypatch): + import hermes_cli.debug as dbg + + count = [0] + + def _upload(content, expiry_days=7): + count[0] += 1 + return f"https://paste.rs/p{count[0]}" + + monkeypatch.setattr(dbg, "upload_to_pastebin", _upload) + monkeypatch.setattr(dbg, "_schedule_auto_delete", lambda *a, **k: None) + monkeypatch.setattr(dbg, "_best_effort_sweep_expired_pastes", lambda: None) + monkeypatch.setattr("hermes_cli.dump.run_dump", lambda a: None) + + r = self.client.post("/api/ops/debug-share", json={"redact": True}) + assert r.status_code == 200 + body = r.json() + assert body["ok"] is True + assert "Report" in body["urls"] + assert body["redacted"] is True + assert body["auto_delete_seconds"] == 21600 + assert isinstance(body["failures"], list) + + def test_redact_false_is_honored(self, monkeypatch): + import hermes_cli.debug as dbg + + monkeypatch.setattr( + dbg, "upload_to_pastebin", lambda c, expiry_days=7: "https://paste.rs/x" + ) + monkeypatch.setattr(dbg, "_schedule_auto_delete", lambda *a, **k: None) + monkeypatch.setattr(dbg, "_best_effort_sweep_expired_pastes", lambda: None) + monkeypatch.setattr("hermes_cli.dump.run_dump", lambda a: None) + + r = self.client.post("/api/ops/debug-share", json={"redact": False}) + assert r.status_code == 200 + assert r.json()["redacted"] is False + + def test_default_body_redacts(self, monkeypatch): + import hermes_cli.debug as dbg + + monkeypatch.setattr( + dbg, "upload_to_pastebin", lambda c, expiry_days=7: "https://paste.rs/x" + ) + monkeypatch.setattr(dbg, "_schedule_auto_delete", lambda *a, **k: None) + monkeypatch.setattr(dbg, "_best_effort_sweep_expired_pastes", lambda: None) + monkeypatch.setattr("hermes_cli.dump.run_dump", lambda a: None) + + # No JSON body at all — should default redact=True. + r = self.client.post("/api/ops/debug-share") + assert r.status_code == 200 + assert r.json()["redacted"] is True + + def test_upload_failure_returns_502(self, monkeypatch): + import hermes_cli.debug as dbg + + monkeypatch.setattr( + dbg, + "upload_to_pastebin", + lambda c, expiry_days=7: (_ for _ in ()).throw(RuntimeError("down")), + ) + monkeypatch.setattr(dbg, "_schedule_auto_delete", lambda *a, **k: None) + monkeypatch.setattr(dbg, "_best_effort_sweep_expired_pastes", lambda: None) + monkeypatch.setattr("hermes_cli.dump.run_dump", lambda a: None) + + r = self.client.post("/api/ops/debug-share", json={"redact": True}) + assert r.status_code == 502 + + def test_requires_session_token(self): + # Drop the token header and confirm the global auth gate rejects it. + bare = self.client + r = bare.post( + "/api/ops/debug-share", + json={"redact": True}, + headers={self.header: "wrong-token"}, + ) + assert r.status_code == 401 + diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py index 427f90655..615e379f7 100644 --- a/tests/hermes_cli/test_debug.py +++ b/tests/hermes_cli/test_debug.py @@ -1273,3 +1273,110 @@ class TestShareIncludesAutoDelete: out = capsys.readouterr().out assert "public paste service" not in out + + +# --------------------------------------------------------------------------- +# build_debug_share — structured core used by the dashboard endpoint +# --------------------------------------------------------------------------- + + +class TestBuildDebugShare: + """The shared core that returns structured paste URLs (not printed text). + + Backs both ``hermes debug share`` (CLI) and ``POST /api/ops/debug-share`` + (dashboard). The dashboard renders ``urls`` as real, copyable links, so the + contract here is the return value, not stdout. + """ + + def test_returns_structured_urls(self, hermes_home): + from hermes_cli.debug import build_debug_share, DebugShareResult + + count = [0] + + def _upload(content, expiry_days=7): + count[0] += 1 + return f"https://paste.rs/p{count[0]}" + + with patch("hermes_cli.dump.run_dump"), patch( + "hermes_cli.debug.upload_to_pastebin", side_effect=_upload + ), patch("hermes_cli.debug._schedule_auto_delete"): + result = build_debug_share(log_lines=50, redact=True) + + assert isinstance(result, DebugShareResult) + # All four seeded logs (agent/gateway/desktop) + the summary report. + assert "Report" in result.urls + assert "agent.log" in result.urls + assert "gateway.log" in result.urls + assert "desktop.log" in result.urls + assert result.failures == [] + assert result.redacted is True + assert result.auto_delete_seconds == 21600 + + def test_skips_missing_logs_without_failure(self, hermes_home): + from hermes_cli.debug import build_debug_share + + # Remove desktop.log so it should be neither uploaded nor reported failed. + (hermes_home / "logs" / "desktop.log").unlink() + + with patch("hermes_cli.dump.run_dump"), patch( + "hermes_cli.debug.upload_to_pastebin", + side_effect=lambda c, expiry_days=7: "https://paste.rs/x", + ), patch("hermes_cli.debug._schedule_auto_delete"): + result = build_debug_share(log_lines=50, redact=True) + + assert "desktop.log" not in result.urls + assert result.failures == [] + + def test_redaction_keeps_secrets_out_of_payload(self, hermes_home): + from hermes_cli.debug import build_debug_share + + secret = "sk-proj-SUPERSECRETtoken1234567890" + (hermes_home / "logs" / "agent.log").write_text( + f"line one\nauthorization token={secret}\nline three\n" + ) + + uploaded = [] + + def _upload(content, expiry_days=7): + uploaded.append(content) + return "https://paste.rs/x" + + with patch("hermes_cli.dump.run_dump"), patch( + "hermes_cli.debug.upload_to_pastebin", side_effect=_upload + ), patch("hermes_cli.debug._schedule_auto_delete"): + result = build_debug_share(log_lines=50, redact=True) + + assert result.redacted is True + joined = "\n".join(uploaded) + assert secret not in joined, "secret leaked into upload payload" + + def test_optional_log_failure_is_collected_not_raised(self, hermes_home): + from hermes_cli.debug import build_debug_share + + count = [0] + + def _upload(content, expiry_days=7): + count[0] += 1 + # First call (the required Report) succeeds; a later one fails. + if count[0] == 2: + raise RuntimeError("paste service hiccup") + return f"https://paste.rs/p{count[0]}" + + with patch("hermes_cli.dump.run_dump"), patch( + "hermes_cli.debug.upload_to_pastebin", side_effect=_upload + ), patch("hermes_cli.debug._schedule_auto_delete"): + result = build_debug_share(log_lines=50, redact=True) + + assert "Report" in result.urls + assert len(result.failures) == 1 + assert "paste service hiccup" in result.failures[0] + + def test_required_report_failure_raises(self, hermes_home): + from hermes_cli.debug import build_debug_share + + with patch("hermes_cli.dump.run_dump"), patch( + "hermes_cli.debug.upload_to_pastebin", + side_effect=RuntimeError("all paste services down"), + ), patch("hermes_cli.debug._schedule_auto_delete"): + with pytest.raises(RuntimeError, match="all paste services down"): + build_debug_share(log_lines=50, redact=True) diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 70914c9e7..2f59f095e 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -853,6 +853,15 @@ export const api = { runDump: () => fetchJSON("/api/ops/dump", { method: "POST" }), runConfigMigrate: () => fetchJSON("/api/ops/config-migrate", { method: "POST" }), + runDebugShare: (opts?: { redact?: boolean; lines?: number }) => + fetchJSON("/api/ops/debug-share", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + redact: opts?.redact ?? true, + lines: opts?.lines ?? 200, + }), + }), getCheckpoints: () => fetchJSON("/api/ops/checkpoints"), @@ -906,6 +915,16 @@ export interface ActionResponse { update_command?: string; } +export interface DebugShareResponse { + ok: boolean; + // label -> paste URL, e.g. { Report: "https://paste.rs/abc", "agent.log": "..." } + urls: Record; + // "label: error" strings for optional full-log uploads that failed. + failures: string[]; + redacted: boolean; + auto_delete_seconds: number; +} + export interface SessionStoreStats { total: number; active_store: number; diff --git a/web/src/pages/SystemPage.tsx b/web/src/pages/SystemPage.tsx index 667da4fa4..e78c177d4 100644 --- a/web/src/pages/SystemPage.tsx +++ b/web/src/pages/SystemPage.tsx @@ -3,17 +3,22 @@ import { Link } from "react-router-dom"; import { Activity, Brain, + Check, + Clock, + Copy, Cpu, Database, Download, Globe, HardDrive, KeyRound, + Link2, Play, Plus, Power, RotateCw, Server, + Share2, ShieldCheck, Sparkles, Stethoscope, @@ -48,6 +53,7 @@ import type { UpdateCheckResponse, CuratorStatus, PortalStatus, + DebugShareResponse, } from "@/lib/api"; function formatBytes(n: number): string { @@ -324,6 +330,54 @@ export default function SystemPage() { } }; + // ── Debug share ──────────────────────────────────────────────────── + // Unlike the fire-and-forget ops above, `debug share` produces shareable + // paste URLs that are the whole point — so we surface them as real, + // copyable links rather than a log tail. + const [shareRedact, setShareRedact] = useState(true); + const [sharing, setSharing] = useState(false); + const [shareResult, setShareResult] = useState( + null, + ); + const [copiedLabel, setCopiedLabel] = useState(null); + + const copyToClipboard = useCallback( + async (text: string, label: string) => { + try { + await navigator.clipboard.writeText(text); + setCopiedLabel(label); + setTimeout( + () => setCopiedLabel((cur) => (cur === label ? null : cur)), + 1500, + ); + } catch { + showToast("Couldn't copy to clipboard", "error"); + } + }, + [showToast], + ); + + const runDebugShare = useCallback(async () => { + setSharing(true); + setShareResult(null); + try { + const res = await api.runDebugShare({ redact: shareRedact }); + setShareResult(res); + const n = Object.keys(res.urls).length; + showToast( + `Uploaded ${n} paste${n === 1 ? "" : "s"}${ + res.redacted ? " (redacted)" : "" + }`, + "success", + ); + } catch (e) { + showToast(`Debug share failed: ${e}`, "error"); + } finally { + setSharing(false); + } + }, [shareRedact, showToast]); + + // ── Update check / apply ─────────────────────────────────────────── const checkForUpdate = useCallback( async (force = false) => { @@ -992,6 +1046,129 @@ export default function SystemPage() { + + {/* Debug share — uploads a redacted report + logs, returns shareable + links. Separated from the buttons above because its output is + persistent, copyable URLs, not a fire-and-forget log tail. */} + + +
+
+ +
+ Share debug report + + Uploads system info + logs to a public paste service and + returns links to send the Hermes team. Pastes auto-delete + after 6 hours. + +
+
+ +
+ + + + {shareResult && ( +
+
+
+ uploaded + {shareResult.redacted ? ( + redacted + ) : ( + not redacted + )} + + + auto-deletes in{" "} + {Math.round(shareResult.auto_delete_seconds / 3600)}h + +
+ {Object.keys(shareResult.urls).length > 1 && ( + + )} +
+ + {Object.entries(shareResult.urls).map(([label, url]) => ( +
+ + + {label} + + + {url} + + +
+ ))} + + {shareResult.failures.length > 0 && ( + + Some logs failed to upload: {shareResult.failures.join("; ")} + + )} +
+ )} +
+