Files
hermes-agent/hermes_cli/prompt_size.py
Teknium 61268ff7a9 feat(cli): add hermes prompt-size diagnostic (#35276)
Adds a 'hermes prompt-size' command that reports the fixed prompt budget
for a fresh session: system prompt total, skills index, memory, user
profile, prompt tiers, and tool-schema JSON bytes. Runs offline (dummy
credentials force the direct-construction path, no network call).

Lets users see which block dominates their per-call payload — the skills
index is often the largest single block when many skills are installed
(issue #34667). Zero model-tool footprint: it's a top-level CLI
subcommand, not an agent tool.

--platform <name> simulates a channel's platform hint; --json emits a
machine-readable breakdown.

Closes #34667
2026-05-30 02:53:42 -07:00

154 lines
6.0 KiB
Python

"""Prompt-size diagnostic: ``hermes prompt-size``.
Reports a byte/char breakdown of the system prompt the agent would build for
a fresh session — system prompt total, the ``<available_skills>`` index,
memory + user profile, and tool-schema JSON. Lets users see where their fixed
prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
The diagnostic builds a real inspection agent (so the numbers match what
actually ships on the wire) but never makes a network call: it passes dummy
credentials so ``AIAgent.__init__`` takes the direct-construction path, then
calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
"""
from __future__ import annotations
import json
import re
from typing import Any, Dict, List, Tuple
# The skills index is wrapped in this tag pair inside the stable tier.
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
def _bytes(s: str) -> int:
return len(s.encode("utf-8"))
def _build_inspection_agent(platform: str) -> Any:
"""Construct an offline AIAgent for prompt inspection.
Dummy ``api_key`` + ``base_url`` force the direct-construction path in
``run_agent.py`` (no provider auto-detection, no network). Toolsets and
platform come from the caller so the breakdown matches a real session.
"""
from run_agent import AIAgent
from hermes_cli.config import load_config
cfg = load_config()
model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
model = model_cfg.get("default") or model_cfg.get("model") or ""
return AIAgent(
model=model,
api_key="inspect-only",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
save_trajectories=False,
platform=platform,
)
def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
"""Return a dict of prompt-size measurements for a fresh session.
Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
of (label, chars, bytes) for the three prompt tiers).
"""
from agent.system_prompt import build_system_prompt, build_system_prompt_parts
agent = _build_inspection_agent(platform)
parts = build_system_prompt_parts(agent)
full = build_system_prompt(agent)
stable = parts.get("stable", "")
context = parts.get("context", "")
volatile = parts.get("volatile", "")
# Skills index — the <available_skills> block (the largest single block
# when many skills are installed). Measured inside the stable tier.
skills_match = _SKILLS_BLOCK_RE.search(stable)
skills_index = skills_match.group(0) if skills_match else ""
# Memory + user profile live in the volatile tier. We re-derive their
# blocks directly from the memory store so the numbers are attributable
# even though they're joined into ``volatile``.
memory_block = ""
user_block = ""
store = getattr(agent, "_memory_store", None)
if store is not None:
try:
if getattr(agent, "_memory_enabled", True):
memory_block = store.format_for_system_prompt("memory") or ""
if getattr(agent, "_user_profile_enabled", True):
user_block = store.format_for_system_prompt("user") or ""
except Exception:
pass
# Tool-schema JSON — the other half of the fixed per-call payload.
tools = getattr(agent, "tools", None) or []
tools_json = json.dumps(tools, ensure_ascii=False)
sections: List[Tuple[str, int, int]] = [
("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
]
return {
"platform": platform,
"model": getattr(agent, "model", "") or "",
"system_prompt": {"chars": len(full), "bytes": _bytes(full)},
"skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
"memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
"user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
"tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
"sections": sections,
}
def _fmt_kb(n: int) -> str:
return f"{n / 1024:.1f} KB"
def render_breakdown(data: Dict[str, Any]) -> str:
"""Render the breakdown as plain text suitable for a terminal."""
lines: List[str] = []
sp = data["system_prompt"]
lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
lines.append("")
lines.append(f" System prompt total : {sp['bytes']:>8,} B ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
lines.append("")
lines.append(" Major blocks:")
si = data["skills_index"]
mem = data["memory"]
up = data["user_profile"]
lines.append(f" skills index : {si['bytes']:>8,} B ({_fmt_kb(si['bytes'])})")
lines.append(f" memory : {mem['bytes']:>8,} B ({_fmt_kb(mem['bytes'])})")
lines.append(f" user profile : {up['bytes']:>8,} B ({_fmt_kb(up['bytes'])})")
lines.append("")
lines.append(" Prompt tiers:")
for label, chars, byts in data["sections"]:
lines.append(f" {label:<36}: {byts:>8,} B ({_fmt_kb(byts)})")
lines.append("")
tools = data["tools"]
lines.append(f" Tool schemas : {tools['json_bytes']:>8,} B ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
return "\n".join(lines)
def cmd_prompt_size(args: Any) -> None:
"""Entry point for ``hermes prompt-size``."""
platform = getattr(args, "platform", "cli") or "cli"
as_json = getattr(args, "json", False)
try:
data = compute_prompt_breakdown(platform)
except Exception as e:
print(f"Could not compute prompt-size breakdown: {e}")
return
if as_json:
print(json.dumps(data, ensure_ascii=False, indent=2))
else:
print(render_breakdown(data))