feat(cli): add hermes prompt-size diagnostic (#35276)

Adds a 'hermes prompt-size' command that reports the fixed prompt budget
for a fresh session: system prompt total, skills index, memory, user
profile, prompt tiers, and tool-schema JSON bytes. Runs offline (dummy
credentials force the direct-construction path, no network call).

Lets users see which block dominates their per-call payload — the skills
index is often the largest single block when many skills are installed
(issue #34667). Zero model-tool footprint: it's a top-level CLI
subcommand, not an agent tool.

--platform <name> simulates a channel's platform hint; --json emits a
machine-readable breakdown.

Closes #34667
This commit is contained in:
Teknium
2026-05-30 02:53:42 -07:00
committed by GitHub
parent cbf851ae1d
commit 61268ff7a9
4 changed files with 348 additions and 0 deletions

View File

@ -11118,6 +11118,13 @@ def cmd_completion(args, parser=None):
print(generate_bash(parser))
def cmd_prompt_size(args):
"""Show a byte/char breakdown of the system prompt + tool schemas."""
from hermes_cli.prompt_size import cmd_prompt_size as _impl
_impl(args)
def cmd_logs(args):
"""View and filter Hermes log files."""
from hermes_cli.logs import tail_log, list_logs
@ -11154,6 +11161,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
"dump", "fallback", "gateway", "hooks", "import", "insights",
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
"model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
"prompt-size",
"send", "sessions", "setup",
"skills", "slack", "status", "tools", "uninstall", "update",
"version", "webhook", "whatsapp", "chat", "secrets", "security",
@ -14387,6 +14395,30 @@ Examples:
)
logs_parser.set_defaults(func=cmd_logs)
# =========================================================================
# prompt-size command
# =========================================================================
prompt_size_parser = subparsers.add_parser(
"prompt-size",
help="Show a byte breakdown of the system prompt + tool schemas",
description=(
"Report the fixed prompt budget for a fresh session: system "
"prompt total, skills index, memory, user profile, and tool-schema "
"JSON. Runs offline (no API call)."
),
)
prompt_size_parser.add_argument(
"--platform",
default="cli",
help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
)
prompt_size_parser.add_argument(
"--json",
action="store_true",
help="Emit the breakdown as JSON",
)
prompt_size_parser.set_defaults(func=cmd_prompt_size)
# =========================================================================
# Parse and execute
# =========================================================================

153
hermes_cli/prompt_size.py Normal file
View File

@ -0,0 +1,153 @@
"""Prompt-size diagnostic: ``hermes prompt-size``.
Reports a byte/char breakdown of the system prompt the agent would build for
a fresh session — system prompt total, the ``<available_skills>`` index,
memory + user profile, and tool-schema JSON. Lets users see where their fixed
prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
The diagnostic builds a real inspection agent (so the numbers match what
actually ships on the wire) but never makes a network call: it passes dummy
credentials so ``AIAgent.__init__`` takes the direct-construction path, then
calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
"""
from __future__ import annotations
import json
import re
from typing import Any, Dict, List, Tuple
# The skills index is wrapped in this tag pair inside the stable tier.
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
def _bytes(s: str) -> int:
return len(s.encode("utf-8"))
def _build_inspection_agent(platform: str) -> Any:
"""Construct an offline AIAgent for prompt inspection.
Dummy ``api_key`` + ``base_url`` force the direct-construction path in
``run_agent.py`` (no provider auto-detection, no network). Toolsets and
platform come from the caller so the breakdown matches a real session.
"""
from run_agent import AIAgent
from hermes_cli.config import load_config
cfg = load_config()
model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
model = model_cfg.get("default") or model_cfg.get("model") or ""
return AIAgent(
model=model,
api_key="inspect-only",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
save_trajectories=False,
platform=platform,
)
def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
"""Return a dict of prompt-size measurements for a fresh session.
Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
of (label, chars, bytes) for the three prompt tiers).
"""
from agent.system_prompt import build_system_prompt, build_system_prompt_parts
agent = _build_inspection_agent(platform)
parts = build_system_prompt_parts(agent)
full = build_system_prompt(agent)
stable = parts.get("stable", "")
context = parts.get("context", "")
volatile = parts.get("volatile", "")
# Skills index — the <available_skills> block (the largest single block
# when many skills are installed). Measured inside the stable tier.
skills_match = _SKILLS_BLOCK_RE.search(stable)
skills_index = skills_match.group(0) if skills_match else ""
# Memory + user profile live in the volatile tier. We re-derive their
# blocks directly from the memory store so the numbers are attributable
# even though they're joined into ``volatile``.
memory_block = ""
user_block = ""
store = getattr(agent, "_memory_store", None)
if store is not None:
try:
if getattr(agent, "_memory_enabled", True):
memory_block = store.format_for_system_prompt("memory") or ""
if getattr(agent, "_user_profile_enabled", True):
user_block = store.format_for_system_prompt("user") or ""
except Exception:
pass
# Tool-schema JSON — the other half of the fixed per-call payload.
tools = getattr(agent, "tools", None) or []
tools_json = json.dumps(tools, ensure_ascii=False)
sections: List[Tuple[str, int, int]] = [
("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
]
return {
"platform": platform,
"model": getattr(agent, "model", "") or "",
"system_prompt": {"chars": len(full), "bytes": _bytes(full)},
"skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
"memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
"user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
"tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
"sections": sections,
}
def _fmt_kb(n: int) -> str:
return f"{n / 1024:.1f} KB"
def render_breakdown(data: Dict[str, Any]) -> str:
"""Render the breakdown as plain text suitable for a terminal."""
lines: List[str] = []
sp = data["system_prompt"]
lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
lines.append("")
lines.append(f" System prompt total : {sp['bytes']:>8,} B ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
lines.append("")
lines.append(" Major blocks:")
si = data["skills_index"]
mem = data["memory"]
up = data["user_profile"]
lines.append(f" skills index : {si['bytes']:>8,} B ({_fmt_kb(si['bytes'])})")
lines.append(f" memory : {mem['bytes']:>8,} B ({_fmt_kb(mem['bytes'])})")
lines.append(f" user profile : {up['bytes']:>8,} B ({_fmt_kb(up['bytes'])})")
lines.append("")
lines.append(" Prompt tiers:")
for label, chars, byts in data["sections"]:
lines.append(f" {label:<36}: {byts:>8,} B ({_fmt_kb(byts)})")
lines.append("")
tools = data["tools"]
lines.append(f" Tool schemas : {tools['json_bytes']:>8,} B ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
return "\n".join(lines)
def cmd_prompt_size(args: Any) -> None:
"""Entry point for ``hermes prompt-size``."""
platform = getattr(args, "platform", "cli") or "cli"
as_json = getattr(args, "json", False)
try:
data = compute_prompt_breakdown(platform)
except Exception as e:
print(f"Could not compute prompt-size breakdown: {e}")
return
if as_json:
print(json.dumps(data, ensure_ascii=False, indent=2))
else:
print(render_breakdown(data))

View File

@ -0,0 +1,118 @@
"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
import json
import pytest
from hermes_cli.prompt_size import (
_SKILLS_BLOCK_RE,
compute_prompt_breakdown,
render_breakdown,
)
def _seed_memory(hermes_home, memory_text="", user_text=""):
mem_dir = hermes_home / "memories"
mem_dir.mkdir(parents=True, exist_ok=True)
if memory_text:
(mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
if user_text:
(mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
def _seed_skill(hermes_home, name, description):
skill_dir = hermes_home / "skills" / "demo" / name
skill_dir.mkdir(parents=True, exist_ok=True)
(skill_dir / "SKILL.md").write_text(
f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
encoding="utf-8",
)
@pytest.fixture
def isolated_home(tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.chdir(tmp_path) # avoid picking up the repo's AGENTS.md
return hermes_home
def test_breakdown_keys_and_shape(isolated_home):
"""The breakdown exposes every documented key with int byte/char counts."""
data = compute_prompt_breakdown("cli")
assert set(data) >= {
"platform",
"model",
"system_prompt",
"skills_index",
"memory",
"user_profile",
"tools",
"sections",
}
assert data["platform"] == "cli"
for key in ("system_prompt", "skills_index", "memory", "user_profile"):
assert data[key]["bytes"] >= 0
assert data[key]["chars"] >= 0
assert data["tools"]["count"] >= 0
assert data["tools"]["json_bytes"] >= 0
# System prompt is non-trivial even with empty home (identity + guidance).
assert data["system_prompt"]["bytes"] > 0
def test_runs_offline_without_credentials(isolated_home, monkeypatch):
"""No provider credentials configured → still produces a breakdown."""
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
"ANTHROPIC_API_KEY"):
monkeypatch.delenv(var, raising=False)
data = compute_prompt_breakdown("cli")
assert data["system_prompt"]["bytes"] > 0
def test_skills_index_reflects_installed_skills(isolated_home):
"""Installing a skill makes the skills-index block non-empty.
Note: the skills prompt is cached per-process (in-process LRU + disk
snapshot), so we seed the skill BEFORE the first build rather than
comparing before/after within one process.
"""
_seed_skill(isolated_home, "hello", "a demo skill for size testing")
data = compute_prompt_breakdown("cli")
assert data["skills_index"]["bytes"] > 0
def test_memory_and_profile_are_attributed(isolated_home):
"""Memory and user-profile blocks are measured separately."""
_seed_memory(
isolated_home,
memory_text="Project uses pytest.\n",
user_text="User is a developer.\n",
)
data = compute_prompt_breakdown("cli")
assert data["memory"]["bytes"] > 0
assert data["user_profile"]["bytes"] > 0
def test_skills_block_regex_matches_tagged_block():
text = "preamble\n<available_skills>\n cat:\n - a: b\n</available_skills>\ntail"
m = _SKILLS_BLOCK_RE.search(text)
assert m is not None
assert m.group(0).startswith("<available_skills>")
assert m.group(0).endswith("</available_skills>")
def test_render_breakdown_is_plain_text(isolated_home):
data = compute_prompt_breakdown("cli")
out = render_breakdown(data)
assert "System prompt total" in out
assert "skills index" in out
assert "Tool schemas" in out
# Plain text — no JSON braces leaking in.
assert not out.strip().startswith("{")
def test_json_serializable(isolated_home):
data = compute_prompt_breakdown("cli")
# Round-trips cleanly for ``--json`` output.
assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))

View File

@ -58,6 +58,7 @@ hermes [global-options] <command> [subcommand/options]
| `hermes doctor` | Diagnose config and dependency issues. |
| `hermes security audit` | On-demand supply-chain audit (OSV.dev) for the venv, plugin requirements, and pinned MCP servers. |
| `hermes dump` | Copy-pasteable setup summary for support/debugging. |
| `hermes prompt-size` | Show a byte breakdown of the system prompt + tool schemas (skills index, memory, profile). Runs offline. |
| `hermes debug` | Debug tools — upload logs and system info for support. |
| `hermes backup` | Back up Hermes home directory to a zip file. |
| `hermes checkpoints` | Inspect / prune / clear `~/.hermes/checkpoints/` (the shadow store used by `/rollback`). Run with no args for a status overview. |
@ -886,6 +887,50 @@ Lines without a parseable timestamp are included when `--since` is active (they
Hermes uses Python's `RotatingFileHandler`. Old logs are rotated automatically — look for `agent.log.1`, `agent.log.2`, etc. The `hermes logs list` subcommand shows all log files including rotated ones.
## `hermes prompt-size`
```bash
hermes prompt-size [--platform <name>] [--json]
```
Reports the fixed prompt budget for a fresh session — what gets sent on every
API call *before* any conversation content. Useful when a downstream adapter or
proxy has a tighter prompt budget than the model's context window, or when you
want to see which block (skills index, memory, profile) dominates.
It builds the same system prompt the agent would, then breaks it down:
- **System prompt total** — full assembled prompt (identity, guidance, skills
index, context files, memory, profile, timestamp).
- **Skills index** — the `<available_skills>` block. This is often the largest
single block when many skills are installed.
- **Memory** and **user profile** — your `MEMORY.md` / `USER.md` snapshots.
- **Prompt tiers** — stable / context / volatile, matching how Hermes layers
the prompt for cache-friendliness.
- **Tool schemas** — the JSON for all enabled tools (the other half of the
fixed per-call payload).
Runs entirely offline — no API call, works with no credentials configured.
```bash
# Human-readable breakdown for the CLI platform (default)
hermes prompt-size
# Simulate a messaging platform's prompt (different platform hint)
hermes prompt-size --platform telegram
# Machine-readable output for scripts
hermes prompt-size --json
```
:::tip
The skills index and tool schemas scale with how many skills and tools you have
enabled. To shrink the prompt, disable unused toolsets (`hermes tools`) or
uninstall skills you don't need (`hermes skills`). Context files (AGENTS.md,
.cursorrules) in your current directory also count toward the total.
:::
## `hermes config`
```bash