diff --git a/hermes_cli/main.py b/hermes_cli/main.py index e039ee51c..0cfcd03d1 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -11118,6 +11118,13 @@ def cmd_completion(args, parser=None): print(generate_bash(parser)) +def cmd_prompt_size(args): + """Show a byte/char breakdown of the system prompt + tool schemas.""" + from hermes_cli.prompt_size import cmd_prompt_size as _impl + + _impl(args) + + def cmd_logs(args): """View and filter Hermes log files.""" from hermes_cli.logs import tail_log, list_logs @@ -11154,6 +11161,7 @@ _BUILTIN_SUBCOMMANDS = frozenset( "dump", "fallback", "gateway", "hooks", "import", "insights", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate", "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy", + "prompt-size", "send", "sessions", "setup", "skills", "slack", "status", "tools", "uninstall", "update", "version", "webhook", "whatsapp", "chat", "secrets", "security", @@ -14387,6 +14395,30 @@ Examples: ) logs_parser.set_defaults(func=cmd_logs) + # ========================================================================= + # prompt-size command + # ========================================================================= + prompt_size_parser = subparsers.add_parser( + "prompt-size", + help="Show a byte breakdown of the system prompt + tool schemas", + description=( + "Report the fixed prompt budget for a fresh session: system " + "prompt total, skills index, memory, user profile, and tool-schema " + "JSON. Runs offline (no API call)." + ), + ) + prompt_size_parser.add_argument( + "--platform", + default="cli", + help="Platform to simulate (cli, telegram, discord, ...). Default: cli", + ) + prompt_size_parser.add_argument( + "--json", + action="store_true", + help="Emit the breakdown as JSON", + ) + prompt_size_parser.set_defaults(func=cmd_prompt_size) + # ========================================================================= # Parse and execute # ========================================================================= diff --git a/hermes_cli/prompt_size.py b/hermes_cli/prompt_size.py new file mode 100644 index 000000000..913beb18b --- /dev/null +++ b/hermes_cli/prompt_size.py @@ -0,0 +1,153 @@ +"""Prompt-size diagnostic: ``hermes prompt-size``. + +Reports a byte/char breakdown of the system prompt the agent would build for +a fresh session — system prompt total, the ```` index, +memory + user profile, and tool-schema JSON. Lets users see where their fixed +prompt budget goes (issue #34667) without parsing a saved session JSON by hand. + +The diagnostic builds a real inspection agent (so the numbers match what +actually ships on the wire) but never makes a network call: it passes dummy +credentials so ``AIAgent.__init__`` takes the direct-construction path, then +calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline. +""" + +from __future__ import annotations + +import json +import re +from typing import Any, Dict, List, Tuple + +# The skills index is wrapped in this tag pair inside the stable tier. +_SKILLS_BLOCK_RE = re.compile(r".*?", re.DOTALL) + + +def _bytes(s: str) -> int: + return len(s.encode("utf-8")) + + +def _build_inspection_agent(platform: str) -> Any: + """Construct an offline AIAgent for prompt inspection. + + Dummy ``api_key`` + ``base_url`` force the direct-construction path in + ``run_agent.py`` (no provider auto-detection, no network). Toolsets and + platform come from the caller so the breakdown matches a real session. + """ + from run_agent import AIAgent + from hermes_cli.config import load_config + + cfg = load_config() + model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} + model = model_cfg.get("default") or model_cfg.get("model") or "" + + return AIAgent( + model=model, + api_key="inspect-only", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + save_trajectories=False, + platform=platform, + ) + + +def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]: + """Return a dict of prompt-size measurements for a fresh session. + + Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``, + ``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list + of (label, chars, bytes) for the three prompt tiers). + """ + from agent.system_prompt import build_system_prompt, build_system_prompt_parts + + agent = _build_inspection_agent(platform) + + parts = build_system_prompt_parts(agent) + full = build_system_prompt(agent) + + stable = parts.get("stable", "") + context = parts.get("context", "") + volatile = parts.get("volatile", "") + + # Skills index — the block (the largest single block + # when many skills are installed). Measured inside the stable tier. + skills_match = _SKILLS_BLOCK_RE.search(stable) + skills_index = skills_match.group(0) if skills_match else "" + + # Memory + user profile live in the volatile tier. We re-derive their + # blocks directly from the memory store so the numbers are attributable + # even though they're joined into ``volatile``. + memory_block = "" + user_block = "" + store = getattr(agent, "_memory_store", None) + if store is not None: + try: + if getattr(agent, "_memory_enabled", True): + memory_block = store.format_for_system_prompt("memory") or "" + if getattr(agent, "_user_profile_enabled", True): + user_block = store.format_for_system_prompt("user") or "" + except Exception: + pass + + # Tool-schema JSON — the other half of the fixed per-call payload. + tools = getattr(agent, "tools", None) or [] + tools_json = json.dumps(tools, ensure_ascii=False) + + sections: List[Tuple[str, int, int]] = [ + ("stable (identity/guidance/skills)", len(stable), _bytes(stable)), + ("context (AGENTS.md/cwd files)", len(context), _bytes(context)), + ("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)), + ] + + return { + "platform": platform, + "model": getattr(agent, "model", "") or "", + "system_prompt": {"chars": len(full), "bytes": _bytes(full)}, + "skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)}, + "memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)}, + "user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)}, + "tools": {"count": len(tools), "json_bytes": _bytes(tools_json)}, + "sections": sections, + } + + +def _fmt_kb(n: int) -> str: + return f"{n / 1024:.1f} KB" + + +def render_breakdown(data: Dict[str, Any]) -> str: + """Render the breakdown as plain text suitable for a terminal.""" + lines: List[str] = [] + sp = data["system_prompt"] + lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})") + lines.append("") + lines.append(f" System prompt total : {sp['bytes']:>8,} B ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)") + lines.append("") + lines.append(" Major blocks:") + si = data["skills_index"] + mem = data["memory"] + up = data["user_profile"] + lines.append(f" skills index : {si['bytes']:>8,} B ({_fmt_kb(si['bytes'])})") + lines.append(f" memory : {mem['bytes']:>8,} B ({_fmt_kb(mem['bytes'])})") + lines.append(f" user profile : {up['bytes']:>8,} B ({_fmt_kb(up['bytes'])})") + lines.append("") + lines.append(" Prompt tiers:") + for label, chars, byts in data["sections"]: + lines.append(f" {label:<36}: {byts:>8,} B ({_fmt_kb(byts)})") + lines.append("") + tools = data["tools"] + lines.append(f" Tool schemas : {tools['json_bytes']:>8,} B ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)") + return "\n".join(lines) + + +def cmd_prompt_size(args: Any) -> None: + """Entry point for ``hermes prompt-size``.""" + platform = getattr(args, "platform", "cli") or "cli" + as_json = getattr(args, "json", False) + try: + data = compute_prompt_breakdown(platform) + except Exception as e: + print(f"Could not compute prompt-size breakdown: {e}") + return + if as_json: + print(json.dumps(data, ensure_ascii=False, indent=2)) + else: + print(render_breakdown(data)) diff --git a/tests/hermes_cli/test_prompt_size.py b/tests/hermes_cli/test_prompt_size.py new file mode 100644 index 000000000..bd75c6df1 --- /dev/null +++ b/tests/hermes_cli/test_prompt_size.py @@ -0,0 +1,118 @@ +"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667).""" + +import json + +import pytest + +from hermes_cli.prompt_size import ( + _SKILLS_BLOCK_RE, + compute_prompt_breakdown, + render_breakdown, +) + + +def _seed_memory(hermes_home, memory_text="", user_text=""): + mem_dir = hermes_home / "memories" + mem_dir.mkdir(parents=True, exist_ok=True) + if memory_text: + (mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8") + if user_text: + (mem_dir / "USER.md").write_text(user_text, encoding="utf-8") + + +def _seed_skill(hermes_home, name, description): + skill_dir = hermes_home / "skills" / "demo" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n", + encoding="utf-8", + ) + + +@pytest.fixture +def isolated_home(tmp_path, monkeypatch): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.chdir(tmp_path) # avoid picking up the repo's AGENTS.md + return hermes_home + + +def test_breakdown_keys_and_shape(isolated_home): + """The breakdown exposes every documented key with int byte/char counts.""" + data = compute_prompt_breakdown("cli") + assert set(data) >= { + "platform", + "model", + "system_prompt", + "skills_index", + "memory", + "user_profile", + "tools", + "sections", + } + assert data["platform"] == "cli" + for key in ("system_prompt", "skills_index", "memory", "user_profile"): + assert data[key]["bytes"] >= 0 + assert data[key]["chars"] >= 0 + assert data["tools"]["count"] >= 0 + assert data["tools"]["json_bytes"] >= 0 + # System prompt is non-trivial even with empty home (identity + guidance). + assert data["system_prompt"]["bytes"] > 0 + + +def test_runs_offline_without_credentials(isolated_home, monkeypatch): + """No provider credentials configured → still produces a breakdown.""" + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY", + "ANTHROPIC_API_KEY"): + monkeypatch.delenv(var, raising=False) + data = compute_prompt_breakdown("cli") + assert data["system_prompt"]["bytes"] > 0 + + +def test_skills_index_reflects_installed_skills(isolated_home): + """Installing a skill makes the skills-index block non-empty. + + Note: the skills prompt is cached per-process (in-process LRU + disk + snapshot), so we seed the skill BEFORE the first build rather than + comparing before/after within one process. + """ + _seed_skill(isolated_home, "hello", "a demo skill for size testing") + data = compute_prompt_breakdown("cli") + assert data["skills_index"]["bytes"] > 0 + + +def test_memory_and_profile_are_attributed(isolated_home): + """Memory and user-profile blocks are measured separately.""" + _seed_memory( + isolated_home, + memory_text="Project uses pytest.\n", + user_text="User is a developer.\n", + ) + data = compute_prompt_breakdown("cli") + assert data["memory"]["bytes"] > 0 + assert data["user_profile"]["bytes"] > 0 + + +def test_skills_block_regex_matches_tagged_block(): + text = "preamble\n\n cat:\n - a: b\n\ntail" + m = _SKILLS_BLOCK_RE.search(text) + assert m is not None + assert m.group(0).startswith("") + assert m.group(0).endswith("") + + +def test_render_breakdown_is_plain_text(isolated_home): + data = compute_prompt_breakdown("cli") + out = render_breakdown(data) + assert "System prompt total" in out + assert "skills index" in out + assert "Tool schemas" in out + # Plain text — no JSON braces leaking in. + assert not out.strip().startswith("{") + + +def test_json_serializable(isolated_home): + data = compute_prompt_breakdown("cli") + # Round-trips cleanly for ``--json`` output. + assert json.loads(json.dumps(data)) == json.loads(json.dumps(data)) diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 5882d4aaa..b8b41a621 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -58,6 +58,7 @@ hermes [global-options] [subcommand/options] | `hermes doctor` | Diagnose config and dependency issues. | | `hermes security audit` | On-demand supply-chain audit (OSV.dev) for the venv, plugin requirements, and pinned MCP servers. | | `hermes dump` | Copy-pasteable setup summary for support/debugging. | +| `hermes prompt-size` | Show a byte breakdown of the system prompt + tool schemas (skills index, memory, profile). Runs offline. | | `hermes debug` | Debug tools — upload logs and system info for support. | | `hermes backup` | Back up Hermes home directory to a zip file. | | `hermes checkpoints` | Inspect / prune / clear `~/.hermes/checkpoints/` (the shadow store used by `/rollback`). Run with no args for a status overview. | @@ -886,6 +887,50 @@ Lines without a parseable timestamp are included when `--since` is active (they Hermes uses Python's `RotatingFileHandler`. Old logs are rotated automatically — look for `agent.log.1`, `agent.log.2`, etc. The `hermes logs list` subcommand shows all log files including rotated ones. + +## `hermes prompt-size` + +```bash +hermes prompt-size [--platform ] [--json] +``` + +Reports the fixed prompt budget for a fresh session — what gets sent on every +API call *before* any conversation content. Useful when a downstream adapter or +proxy has a tighter prompt budget than the model's context window, or when you +want to see which block (skills index, memory, profile) dominates. + +It builds the same system prompt the agent would, then breaks it down: + +- **System prompt total** — full assembled prompt (identity, guidance, skills + index, context files, memory, profile, timestamp). +- **Skills index** — the `` block. This is often the largest + single block when many skills are installed. +- **Memory** and **user profile** — your `MEMORY.md` / `USER.md` snapshots. +- **Prompt tiers** — stable / context / volatile, matching how Hermes layers + the prompt for cache-friendliness. +- **Tool schemas** — the JSON for all enabled tools (the other half of the + fixed per-call payload). + +Runs entirely offline — no API call, works with no credentials configured. + +```bash +# Human-readable breakdown for the CLI platform (default) +hermes prompt-size + +# Simulate a messaging platform's prompt (different platform hint) +hermes prompt-size --platform telegram + +# Machine-readable output for scripts +hermes prompt-size --json +``` + +:::tip +The skills index and tool schemas scale with how many skills and tools you have +enabled. To shrink the prompt, disable unused toolsets (`hermes tools`) or +uninstall skills you don't need (`hermes skills`). Context files (AGENTS.md, +.cursorrules) in your current directory also count toward the total. +::: + ## `hermes config` ```bash