feat(cli): add hermes prompt-size diagnostic (#35276)
Adds a 'hermes prompt-size' command that reports the fixed prompt budget for a fresh session: system prompt total, skills index, memory, user profile, prompt tiers, and tool-schema JSON bytes. Runs offline (dummy credentials force the direct-construction path, no network call). Lets users see which block dominates their per-call payload — the skills index is often the largest single block when many skills are installed (issue #34667). Zero model-tool footprint: it's a top-level CLI subcommand, not an agent tool. --platform <name> simulates a channel's platform hint; --json emits a machine-readable breakdown. Closes #34667
This commit is contained in:
@ -11118,6 +11118,13 @@ def cmd_completion(args, parser=None):
|
||||
print(generate_bash(parser))
|
||||
|
||||
|
||||
def cmd_prompt_size(args):
|
||||
"""Show a byte/char breakdown of the system prompt + tool schemas."""
|
||||
from hermes_cli.prompt_size import cmd_prompt_size as _impl
|
||||
|
||||
_impl(args)
|
||||
|
||||
|
||||
def cmd_logs(args):
|
||||
"""View and filter Hermes log files."""
|
||||
from hermes_cli.logs import tail_log, list_logs
|
||||
@ -11154,6 +11161,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
|
||||
"prompt-size",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat", "secrets", "security",
|
||||
@ -14387,6 +14395,30 @@ Examples:
|
||||
)
|
||||
logs_parser.set_defaults(func=cmd_logs)
|
||||
|
||||
# =========================================================================
|
||||
# prompt-size command
|
||||
# =========================================================================
|
||||
prompt_size_parser = subparsers.add_parser(
|
||||
"prompt-size",
|
||||
help="Show a byte breakdown of the system prompt + tool schemas",
|
||||
description=(
|
||||
"Report the fixed prompt budget for a fresh session: system "
|
||||
"prompt total, skills index, memory, user profile, and tool-schema "
|
||||
"JSON. Runs offline (no API call)."
|
||||
),
|
||||
)
|
||||
prompt_size_parser.add_argument(
|
||||
"--platform",
|
||||
default="cli",
|
||||
help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
|
||||
)
|
||||
prompt_size_parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit the breakdown as JSON",
|
||||
)
|
||||
prompt_size_parser.set_defaults(func=cmd_prompt_size)
|
||||
|
||||
# =========================================================================
|
||||
# Parse and execute
|
||||
# =========================================================================
|
||||
|
||||
153
hermes_cli/prompt_size.py
Normal file
153
hermes_cli/prompt_size.py
Normal file
@ -0,0 +1,153 @@
|
||||
"""Prompt-size diagnostic: ``hermes prompt-size``.
|
||||
|
||||
Reports a byte/char breakdown of the system prompt the agent would build for
|
||||
a fresh session — system prompt total, the ``<available_skills>`` index,
|
||||
memory + user profile, and tool-schema JSON. Lets users see where their fixed
|
||||
prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
|
||||
|
||||
The diagnostic builds a real inspection agent (so the numbers match what
|
||||
actually ships on the wire) but never makes a network call: it passes dummy
|
||||
credentials so ``AIAgent.__init__`` takes the direct-construction path, then
|
||||
calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
# The skills index is wrapped in this tag pair inside the stable tier.
|
||||
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
|
||||
|
||||
|
||||
def _bytes(s: str) -> int:
|
||||
return len(s.encode("utf-8"))
|
||||
|
||||
|
||||
def _build_inspection_agent(platform: str) -> Any:
|
||||
"""Construct an offline AIAgent for prompt inspection.
|
||||
|
||||
Dummy ``api_key`` + ``base_url`` force the direct-construction path in
|
||||
``run_agent.py`` (no provider auto-detection, no network). Toolsets and
|
||||
platform come from the caller so the breakdown matches a real session.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
|
||||
model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
return AIAgent(
|
||||
model=model,
|
||||
api_key="inspect-only",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
save_trajectories=False,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
|
||||
def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
|
||||
"""Return a dict of prompt-size measurements for a fresh session.
|
||||
|
||||
Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
|
||||
``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
|
||||
of (label, chars, bytes) for the three prompt tiers).
|
||||
"""
|
||||
from agent.system_prompt import build_system_prompt, build_system_prompt_parts
|
||||
|
||||
agent = _build_inspection_agent(platform)
|
||||
|
||||
parts = build_system_prompt_parts(agent)
|
||||
full = build_system_prompt(agent)
|
||||
|
||||
stable = parts.get("stable", "")
|
||||
context = parts.get("context", "")
|
||||
volatile = parts.get("volatile", "")
|
||||
|
||||
# Skills index — the <available_skills> block (the largest single block
|
||||
# when many skills are installed). Measured inside the stable tier.
|
||||
skills_match = _SKILLS_BLOCK_RE.search(stable)
|
||||
skills_index = skills_match.group(0) if skills_match else ""
|
||||
|
||||
# Memory + user profile live in the volatile tier. We re-derive their
|
||||
# blocks directly from the memory store so the numbers are attributable
|
||||
# even though they're joined into ``volatile``.
|
||||
memory_block = ""
|
||||
user_block = ""
|
||||
store = getattr(agent, "_memory_store", None)
|
||||
if store is not None:
|
||||
try:
|
||||
if getattr(agent, "_memory_enabled", True):
|
||||
memory_block = store.format_for_system_prompt("memory") or ""
|
||||
if getattr(agent, "_user_profile_enabled", True):
|
||||
user_block = store.format_for_system_prompt("user") or ""
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Tool-schema JSON — the other half of the fixed per-call payload.
|
||||
tools = getattr(agent, "tools", None) or []
|
||||
tools_json = json.dumps(tools, ensure_ascii=False)
|
||||
|
||||
sections: List[Tuple[str, int, int]] = [
|
||||
("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
|
||||
("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
|
||||
("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
|
||||
]
|
||||
|
||||
return {
|
||||
"platform": platform,
|
||||
"model": getattr(agent, "model", "") or "",
|
||||
"system_prompt": {"chars": len(full), "bytes": _bytes(full)},
|
||||
"skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
|
||||
"memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
|
||||
"user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
|
||||
"tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
|
||||
"sections": sections,
|
||||
}
|
||||
|
||||
|
||||
def _fmt_kb(n: int) -> str:
|
||||
return f"{n / 1024:.1f} KB"
|
||||
|
||||
|
||||
def render_breakdown(data: Dict[str, Any]) -> str:
|
||||
"""Render the breakdown as plain text suitable for a terminal."""
|
||||
lines: List[str] = []
|
||||
sp = data["system_prompt"]
|
||||
lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
|
||||
lines.append("")
|
||||
lines.append(f" System prompt total : {sp['bytes']:>8,} B ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
|
||||
lines.append("")
|
||||
lines.append(" Major blocks:")
|
||||
si = data["skills_index"]
|
||||
mem = data["memory"]
|
||||
up = data["user_profile"]
|
||||
lines.append(f" skills index : {si['bytes']:>8,} B ({_fmt_kb(si['bytes'])})")
|
||||
lines.append(f" memory : {mem['bytes']:>8,} B ({_fmt_kb(mem['bytes'])})")
|
||||
lines.append(f" user profile : {up['bytes']:>8,} B ({_fmt_kb(up['bytes'])})")
|
||||
lines.append("")
|
||||
lines.append(" Prompt tiers:")
|
||||
for label, chars, byts in data["sections"]:
|
||||
lines.append(f" {label:<36}: {byts:>8,} B ({_fmt_kb(byts)})")
|
||||
lines.append("")
|
||||
tools = data["tools"]
|
||||
lines.append(f" Tool schemas : {tools['json_bytes']:>8,} B ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def cmd_prompt_size(args: Any) -> None:
|
||||
"""Entry point for ``hermes prompt-size``."""
|
||||
platform = getattr(args, "platform", "cli") or "cli"
|
||||
as_json = getattr(args, "json", False)
|
||||
try:
|
||||
data = compute_prompt_breakdown(platform)
|
||||
except Exception as e:
|
||||
print(f"Could not compute prompt-size breakdown: {e}")
|
||||
return
|
||||
if as_json:
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print(render_breakdown(data))
|
||||
118
tests/hermes_cli/test_prompt_size.py
Normal file
118
tests/hermes_cli/test_prompt_size.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.prompt_size import (
|
||||
_SKILLS_BLOCK_RE,
|
||||
compute_prompt_breakdown,
|
||||
render_breakdown,
|
||||
)
|
||||
|
||||
|
||||
def _seed_memory(hermes_home, memory_text="", user_text=""):
|
||||
mem_dir = hermes_home / "memories"
|
||||
mem_dir.mkdir(parents=True, exist_ok=True)
|
||||
if memory_text:
|
||||
(mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
|
||||
if user_text:
|
||||
(mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
|
||||
|
||||
|
||||
def _seed_skill(hermes_home, name, description):
|
||||
skill_dir = hermes_home / "skills" / "demo" / name
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.chdir(tmp_path) # avoid picking up the repo's AGENTS.md
|
||||
return hermes_home
|
||||
|
||||
|
||||
def test_breakdown_keys_and_shape(isolated_home):
|
||||
"""The breakdown exposes every documented key with int byte/char counts."""
|
||||
data = compute_prompt_breakdown("cli")
|
||||
assert set(data) >= {
|
||||
"platform",
|
||||
"model",
|
||||
"system_prompt",
|
||||
"skills_index",
|
||||
"memory",
|
||||
"user_profile",
|
||||
"tools",
|
||||
"sections",
|
||||
}
|
||||
assert data["platform"] == "cli"
|
||||
for key in ("system_prompt", "skills_index", "memory", "user_profile"):
|
||||
assert data[key]["bytes"] >= 0
|
||||
assert data[key]["chars"] >= 0
|
||||
assert data["tools"]["count"] >= 0
|
||||
assert data["tools"]["json_bytes"] >= 0
|
||||
# System prompt is non-trivial even with empty home (identity + guidance).
|
||||
assert data["system_prompt"]["bytes"] > 0
|
||||
|
||||
|
||||
def test_runs_offline_without_credentials(isolated_home, monkeypatch):
|
||||
"""No provider credentials configured → still produces a breakdown."""
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
|
||||
"ANTHROPIC_API_KEY"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
data = compute_prompt_breakdown("cli")
|
||||
assert data["system_prompt"]["bytes"] > 0
|
||||
|
||||
|
||||
def test_skills_index_reflects_installed_skills(isolated_home):
|
||||
"""Installing a skill makes the skills-index block non-empty.
|
||||
|
||||
Note: the skills prompt is cached per-process (in-process LRU + disk
|
||||
snapshot), so we seed the skill BEFORE the first build rather than
|
||||
comparing before/after within one process.
|
||||
"""
|
||||
_seed_skill(isolated_home, "hello", "a demo skill for size testing")
|
||||
data = compute_prompt_breakdown("cli")
|
||||
assert data["skills_index"]["bytes"] > 0
|
||||
|
||||
|
||||
def test_memory_and_profile_are_attributed(isolated_home):
|
||||
"""Memory and user-profile blocks are measured separately."""
|
||||
_seed_memory(
|
||||
isolated_home,
|
||||
memory_text="Project uses pytest.\n",
|
||||
user_text="User is a developer.\n",
|
||||
)
|
||||
data = compute_prompt_breakdown("cli")
|
||||
assert data["memory"]["bytes"] > 0
|
||||
assert data["user_profile"]["bytes"] > 0
|
||||
|
||||
|
||||
def test_skills_block_regex_matches_tagged_block():
|
||||
text = "preamble\n<available_skills>\n cat:\n - a: b\n</available_skills>\ntail"
|
||||
m = _SKILLS_BLOCK_RE.search(text)
|
||||
assert m is not None
|
||||
assert m.group(0).startswith("<available_skills>")
|
||||
assert m.group(0).endswith("</available_skills>")
|
||||
|
||||
|
||||
def test_render_breakdown_is_plain_text(isolated_home):
|
||||
data = compute_prompt_breakdown("cli")
|
||||
out = render_breakdown(data)
|
||||
assert "System prompt total" in out
|
||||
assert "skills index" in out
|
||||
assert "Tool schemas" in out
|
||||
# Plain text — no JSON braces leaking in.
|
||||
assert not out.strip().startswith("{")
|
||||
|
||||
|
||||
def test_json_serializable(isolated_home):
|
||||
data = compute_prompt_breakdown("cli")
|
||||
# Round-trips cleanly for ``--json`` output.
|
||||
assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))
|
||||
@ -58,6 +58,7 @@ hermes [global-options] <command> [subcommand/options]
|
||||
| `hermes doctor` | Diagnose config and dependency issues. |
|
||||
| `hermes security audit` | On-demand supply-chain audit (OSV.dev) for the venv, plugin requirements, and pinned MCP servers. |
|
||||
| `hermes dump` | Copy-pasteable setup summary for support/debugging. |
|
||||
| `hermes prompt-size` | Show a byte breakdown of the system prompt + tool schemas (skills index, memory, profile). Runs offline. |
|
||||
| `hermes debug` | Debug tools — upload logs and system info for support. |
|
||||
| `hermes backup` | Back up Hermes home directory to a zip file. |
|
||||
| `hermes checkpoints` | Inspect / prune / clear `~/.hermes/checkpoints/` (the shadow store used by `/rollback`). Run with no args for a status overview. |
|
||||
@ -886,6 +887,50 @@ Lines without a parseable timestamp are included when `--since` is active (they
|
||||
|
||||
Hermes uses Python's `RotatingFileHandler`. Old logs are rotated automatically — look for `agent.log.1`, `agent.log.2`, etc. The `hermes logs list` subcommand shows all log files including rotated ones.
|
||||
|
||||
|
||||
## `hermes prompt-size`
|
||||
|
||||
```bash
|
||||
hermes prompt-size [--platform <name>] [--json]
|
||||
```
|
||||
|
||||
Reports the fixed prompt budget for a fresh session — what gets sent on every
|
||||
API call *before* any conversation content. Useful when a downstream adapter or
|
||||
proxy has a tighter prompt budget than the model's context window, or when you
|
||||
want to see which block (skills index, memory, profile) dominates.
|
||||
|
||||
It builds the same system prompt the agent would, then breaks it down:
|
||||
|
||||
- **System prompt total** — full assembled prompt (identity, guidance, skills
|
||||
index, context files, memory, profile, timestamp).
|
||||
- **Skills index** — the `<available_skills>` block. This is often the largest
|
||||
single block when many skills are installed.
|
||||
- **Memory** and **user profile** — your `MEMORY.md` / `USER.md` snapshots.
|
||||
- **Prompt tiers** — stable / context / volatile, matching how Hermes layers
|
||||
the prompt for cache-friendliness.
|
||||
- **Tool schemas** — the JSON for all enabled tools (the other half of the
|
||||
fixed per-call payload).
|
||||
|
||||
Runs entirely offline — no API call, works with no credentials configured.
|
||||
|
||||
```bash
|
||||
# Human-readable breakdown for the CLI platform (default)
|
||||
hermes prompt-size
|
||||
|
||||
# Simulate a messaging platform's prompt (different platform hint)
|
||||
hermes prompt-size --platform telegram
|
||||
|
||||
# Machine-readable output for scripts
|
||||
hermes prompt-size --json
|
||||
```
|
||||
|
||||
:::tip
|
||||
The skills index and tool schemas scale with how many skills and tools you have
|
||||
enabled. To shrink the prompt, disable unused toolsets (`hermes tools`) or
|
||||
uninstall skills you don't need (`hermes skills`). Context files (AGENTS.md,
|
||||
.cursorrules) in your current directory also count toward the total.
|
||||
:::
|
||||
|
||||
## `hermes config`
|
||||
|
||||
```bash
|
||||
|
||||
Reference in New Issue
Block a user