feat(cli): add hermes prompt-size diagnostic (#35276)

Adds a 'hermes prompt-size' command that reports the fixed prompt budget for a fresh session: system prompt total, skills index, memory, user profile, prompt tiers, and tool-schema JSON bytes. Runs offline (dummy credentials force the direct-construction path, no network call). Lets users see which block dominates their per-call payload — the skills index is often the largest single block when many skills are installed (issue #34667). Zero model-tool footprint: it's a top-level CLI subcommand, not an agent tool. --platform <name> simulates a channel's platform hint; --json emits a machine-readable breakdown. Closes #34667
2026-05-30 02:53:42 -07:00
parent cbf851ae1d
commit 61268ff7a9
4 changed files with 348 additions and 0 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -11118,6 +11118,13 @@ def cmd_completion(args, parser=None):
        print(generate_bash(parser))


+def cmd_prompt_size(args):
+    """Show a byte/char breakdown of the system prompt + tool schemas."""
+    from hermes_cli.prompt_size import cmd_prompt_size as _impl
+
+    _impl(args)
+
+
 def cmd_logs(args):
    """View and filter Hermes log files."""
    from hermes_cli.logs import tail_log, list_logs
@ -11154,6 +11161,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "dump", "fallback", "gateway", "hooks", "import", "insights",
        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
        "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
+        "prompt-size",
        "send", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
        "version", "webhook", "whatsapp", "chat", "secrets", "security",
@ -14387,6 +14395,30 @@ Examples:
    )
    logs_parser.set_defaults(func=cmd_logs)

+    # =========================================================================
+    # prompt-size command
+    # =========================================================================
+    prompt_size_parser = subparsers.add_parser(
+        "prompt-size",
+        help="Show a byte breakdown of the system prompt + tool schemas",
+        description=(
+            "Report the fixed prompt budget for a fresh session: system "
+            "prompt total, skills index, memory, user profile, and tool-schema "
+            "JSON. Runs offline (no API call)."
+        ),
+    )
+    prompt_size_parser.add_argument(
+        "--platform",
+        default="cli",
+        help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
+    )
+    prompt_size_parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit the breakdown as JSON",
+    )
+    prompt_size_parser.set_defaults(func=cmd_prompt_size)
+
    # =========================================================================
    # Parse and execute
    # =========================================================================
--- a/hermes_cli/prompt_size.py
+++ b/hermes_cli/prompt_size.py
@ -0,0 +1,153 @@
+"""Prompt-size diagnostic: ``hermes prompt-size``.
+
+Reports a byte/char breakdown of the system prompt the agent would build for
+a fresh session — system prompt total, the ``<available_skills>`` index,
+memory + user profile, and tool-schema JSON. Lets users see where their fixed
+prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
+
+The diagnostic builds a real inspection agent (so the numbers match what
+actually ships on the wire) but never makes a network call: it passes dummy
+credentials so ``AIAgent.__init__`` takes the direct-construction path, then
+calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Any, Dict, List, Tuple
+
+# The skills index is wrapped in this tag pair inside the stable tier.
+_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
+
+
+def _bytes(s: str) -> int:
+    return len(s.encode("utf-8"))
+
+
+def _build_inspection_agent(platform: str) -> Any:
+    """Construct an offline AIAgent for prompt inspection.
+
+    Dummy ``api_key`` + ``base_url`` force the direct-construction path in
+    ``run_agent.py`` (no provider auto-detection, no network). Toolsets and
+    platform come from the caller so the breakdown matches a real session.
+    """
+    from run_agent import AIAgent
+    from hermes_cli.config import load_config
+
+    cfg = load_config()
+    model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
+    model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    return AIAgent(
+        model=model,
+        api_key="inspect-only",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        save_trajectories=False,
+        platform=platform,
+    )
+
+
+def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
+    """Return a dict of prompt-size measurements for a fresh session.
+
+    Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
+    ``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
+    of (label, chars, bytes) for the three prompt tiers).
+    """
+    from agent.system_prompt import build_system_prompt, build_system_prompt_parts
+
+    agent = _build_inspection_agent(platform)
+
+    parts = build_system_prompt_parts(agent)
+    full = build_system_prompt(agent)
+
+    stable = parts.get("stable", "")
+    context = parts.get("context", "")
+    volatile = parts.get("volatile", "")
+
+    # Skills index — the <available_skills> block (the largest single block
+    # when many skills are installed). Measured inside the stable tier.
+    skills_match = _SKILLS_BLOCK_RE.search(stable)
+    skills_index = skills_match.group(0) if skills_match else ""
+
+    # Memory + user profile live in the volatile tier. We re-derive their
+    # blocks directly from the memory store so the numbers are attributable
+    # even though they're joined into ``volatile``.
+    memory_block = ""
+    user_block = ""
+    store = getattr(agent, "_memory_store", None)
+    if store is not None:
+        try:
+            if getattr(agent, "_memory_enabled", True):
+                memory_block = store.format_for_system_prompt("memory") or ""
+            if getattr(agent, "_user_profile_enabled", True):
+                user_block = store.format_for_system_prompt("user") or ""
+        except Exception:
+            pass
+
+    # Tool-schema JSON — the other half of the fixed per-call payload.
+    tools = getattr(agent, "tools", None) or []
+    tools_json = json.dumps(tools, ensure_ascii=False)
+
+    sections: List[Tuple[str, int, int]] = [
+        ("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
+        ("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
+        ("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
+    ]
+
+    return {
+        "platform": platform,
+        "model": getattr(agent, "model", "") or "",
+        "system_prompt": {"chars": len(full), "bytes": _bytes(full)},
+        "skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
+        "memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
+        "user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
+        "tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
+        "sections": sections,
+    }
+
+
+def _fmt_kb(n: int) -> str:
+    return f"{n / 1024:.1f} KB"
+
+
+def render_breakdown(data: Dict[str, Any]) -> str:
+    """Render the breakdown as plain text suitable for a terminal."""
+    lines: List[str] = []
+    sp = data["system_prompt"]
+    lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
+    lines.append("")
+    lines.append(f"  System prompt total : {sp['bytes']:>8,} B  ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
+    lines.append("")
+    lines.append("  Major blocks:")
+    si = data["skills_index"]
+    mem = data["memory"]
+    up = data["user_profile"]
+    lines.append(f"    skills index       : {si['bytes']:>8,} B  ({_fmt_kb(si['bytes'])})")
+    lines.append(f"    memory             : {mem['bytes']:>8,} B  ({_fmt_kb(mem['bytes'])})")
+    lines.append(f"    user profile       : {up['bytes']:>8,} B  ({_fmt_kb(up['bytes'])})")
+    lines.append("")
+    lines.append("  Prompt tiers:")
+    for label, chars, byts in data["sections"]:
+        lines.append(f"    {label:<36}: {byts:>8,} B  ({_fmt_kb(byts)})")
+    lines.append("")
+    tools = data["tools"]
+    lines.append(f"  Tool schemas         : {tools['json_bytes']:>8,} B  ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
+    return "\n".join(lines)
+
+
+def cmd_prompt_size(args: Any) -> None:
+    """Entry point for ``hermes prompt-size``."""
+    platform = getattr(args, "platform", "cli") or "cli"
+    as_json = getattr(args, "json", False)
+    try:
+        data = compute_prompt_breakdown(platform)
+    except Exception as e:
+        print(f"Could not compute prompt-size breakdown: {e}")
+        return
+    if as_json:
+        print(json.dumps(data, ensure_ascii=False, indent=2))
+    else:
+        print(render_breakdown(data))
--- a/tests/hermes_cli/test_prompt_size.py
+++ b/tests/hermes_cli/test_prompt_size.py
@ -0,0 +1,118 @@
+"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
+
+import json
+
+import pytest
+
+from hermes_cli.prompt_size import (
+    _SKILLS_BLOCK_RE,
+    compute_prompt_breakdown,
+    render_breakdown,
+)
+
+
+def _seed_memory(hermes_home, memory_text="", user_text=""):
+    mem_dir = hermes_home / "memories"
+    mem_dir.mkdir(parents=True, exist_ok=True)
+    if memory_text:
+        (mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
+    if user_text:
+        (mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
+
+
+def _seed_skill(hermes_home, name, description):
+    skill_dir = hermes_home / "skills" / "demo" / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
+        encoding="utf-8",
+    )
+
+
+@pytest.fixture
+def isolated_home(tmp_path, monkeypatch):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.chdir(tmp_path)  # avoid picking up the repo's AGENTS.md
+    return hermes_home
+
+
+def test_breakdown_keys_and_shape(isolated_home):
+    """The breakdown exposes every documented key with int byte/char counts."""
+    data = compute_prompt_breakdown("cli")
+    assert set(data) >= {
+        "platform",
+        "model",
+        "system_prompt",
+        "skills_index",
+        "memory",
+        "user_profile",
+        "tools",
+        "sections",
+    }
+    assert data["platform"] == "cli"
+    for key in ("system_prompt", "skills_index", "memory", "user_profile"):
+        assert data[key]["bytes"] >= 0
+        assert data[key]["chars"] >= 0
+    assert data["tools"]["count"] >= 0
+    assert data["tools"]["json_bytes"] >= 0
+    # System prompt is non-trivial even with empty home (identity + guidance).
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_runs_offline_without_credentials(isolated_home, monkeypatch):
+    """No provider credentials configured → still produces a breakdown."""
+    for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
+                "ANTHROPIC_API_KEY"):
+        monkeypatch.delenv(var, raising=False)
+    data = compute_prompt_breakdown("cli")
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_skills_index_reflects_installed_skills(isolated_home):
+    """Installing a skill makes the skills-index block non-empty.
+
+    Note: the skills prompt is cached per-process (in-process LRU + disk
+    snapshot), so we seed the skill BEFORE the first build rather than
+    comparing before/after within one process.
+    """
+    _seed_skill(isolated_home, "hello", "a demo skill for size testing")
+    data = compute_prompt_breakdown("cli")
+    assert data["skills_index"]["bytes"] > 0
+
+
+def test_memory_and_profile_are_attributed(isolated_home):
+    """Memory and user-profile blocks are measured separately."""
+    _seed_memory(
+        isolated_home,
+        memory_text="Project uses pytest.\n",
+        user_text="User is a developer.\n",
+    )
+    data = compute_prompt_breakdown("cli")
+    assert data["memory"]["bytes"] > 0
+    assert data["user_profile"]["bytes"] > 0
+
+
+def test_skills_block_regex_matches_tagged_block():
+    text = "preamble\n<available_skills>\n  cat:\n    - a: b\n</available_skills>\ntail"
+    m = _SKILLS_BLOCK_RE.search(text)
+    assert m is not None
+    assert m.group(0).startswith("<available_skills>")
+    assert m.group(0).endswith("</available_skills>")
+
+
+def test_render_breakdown_is_plain_text(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    out = render_breakdown(data)
+    assert "System prompt total" in out
+    assert "skills index" in out
+    assert "Tool schemas" in out
+    # Plain text — no JSON braces leaking in.
+    assert not out.strip().startswith("{")
+
+
+def test_json_serializable(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    # Round-trips cleanly for ``--json`` output.
+    assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@ -58,6 +58,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes doctor` | Diagnose config and dependency issues. |
 | `hermes security audit` | On-demand supply-chain audit (OSV.dev) for the venv, plugin requirements, and pinned MCP servers. |
 | `hermes dump` | Copy-pasteable setup summary for support/debugging. |
+| `hermes prompt-size` | Show a byte breakdown of the system prompt + tool schemas (skills index, memory, profile). Runs offline. |
 | `hermes debug` | Debug tools — upload logs and system info for support. |
 | `hermes backup` | Back up Hermes home directory to a zip file. |
 | `hermes checkpoints` | Inspect / prune / clear `~/.hermes/checkpoints/` (the shadow store used by `/rollback`). Run with no args for a status overview. |
@ -886,6 +887,50 @@ Lines without a parseable timestamp are included when `--since` is active (they

 Hermes uses Python's `RotatingFileHandler`. Old logs are rotated automatically — look for `agent.log.1`, `agent.log.2`, etc. The `hermes logs list` subcommand shows all log files including rotated ones.

+
+## `hermes prompt-size`
+
+```bash
+hermes prompt-size [--platform <name>] [--json]
+```
+
+Reports the fixed prompt budget for a fresh session — what gets sent on every
+API call *before* any conversation content. Useful when a downstream adapter or
+proxy has a tighter prompt budget than the model's context window, or when you
+want to see which block (skills index, memory, profile) dominates.
+
+It builds the same system prompt the agent would, then breaks it down:
+
+- **System prompt total** — full assembled prompt (identity, guidance, skills
+  index, context files, memory, profile, timestamp).
+- **Skills index** — the `<available_skills>` block. This is often the largest
+  single block when many skills are installed.
+- **Memory** and **user profile** — your `MEMORY.md` / `USER.md` snapshots.
+- **Prompt tiers** — stable / context / volatile, matching how Hermes layers
+  the prompt for cache-friendliness.
+- **Tool schemas** — the JSON for all enabled tools (the other half of the
+  fixed per-call payload).
+
+Runs entirely offline — no API call, works with no credentials configured.
+
+```bash
+# Human-readable breakdown for the CLI platform (default)
+hermes prompt-size
+
+# Simulate a messaging platform's prompt (different platform hint)
+hermes prompt-size --platform telegram
+
+# Machine-readable output for scripts
+hermes prompt-size --json
+```
+
+:::tip
+The skills index and tool schemas scale with how many skills and tools you have
+enabled. To shrink the prompt, disable unused toolsets (`hermes tools`) or
+uninstall skills you don't need (`hermes skills`). Context files (AGENTS.md,
+.cursorrules) in your current directory also count toward the total.
+:::
+
 ## `hermes config`

 ```bash