fix(doctor): detect + repair stale HERMES_MAX_ITERATIONS .env ghost shadowing config.yaml (#38222)
* fix(doctor): detect + repair stale HERMES_MAX_ITERATIONS .env ghost shadowing config.yaml hermes doctor now flags when ~/.hermes/.env carries a HERMES_MAX_ITERATIONS value that disagrees with agent.max_turns in config.yaml, and 'hermes doctor --fix' removes the stale .env line so config.yaml is authoritative. 'hermes config show' surfaces the same drift inline under Max turns. The setup wizard stopped dual-writing this value, but users who edited only config.yaml from a pre-fix install keep a .env ghost. The gateway bridge normally overrides it at startup, but if the bridge bails on any earlier config-parse error the ghost silently wins — config says 400 while the gateway activity line reads N/90. The detector reads the .env FILE directly (load_env), not get_env_value/ os.environ, since the startup bridge may already have overwritten os.environ with the config value. Closes #17534. * fix(config): stop offering HERMES_MAX_ITERATIONS as an editable env var Removes HERMES_MAX_ITERATIONS from OPTIONAL_ENV_VARS so the dashboard env editor (PUT /api/env) and any env-var prompt no longer let a user write it to .env — which would recreate the stale ghost that shadows config.yaml's agent.max_turns (issue #17534). The iteration budget is configured only via config.yaml; the env var stays a read-only backward-compat fallback in the gateway/CLI, never a promoted write target. Regression test asserts it is absent from OPTIONAL_ENV_VARS.
This commit is contained in:
@ -3346,13 +3346,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": True,
|
||||
"category": "setting",
|
||||
},
|
||||
"HERMES_MAX_ITERATIONS": {
|
||||
"description": "Maximum tool-calling iterations per conversation (default: 90)",
|
||||
"prompt": "Max iterations",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "setting",
|
||||
},
|
||||
# HERMES_TOOL_PROGRESS and HERMES_TOOL_PROGRESS_MODE are deprecated —
|
||||
# now configured via display.tool_progress in config.yaml (off|new|all|verbose).
|
||||
# Gateway falls back to these env vars for backward compatibility.
|
||||
@ -5593,7 +5586,21 @@ def show_config():
|
||||
print()
|
||||
print(color("◆ Model", Colors.CYAN, Colors.BOLD))
|
||||
print(f" Model: {config.get('model', 'not set')}")
|
||||
print(f" Max turns: {config.get('agent', {}).get('max_turns', DEFAULT_CONFIG['agent']['max_turns'])}")
|
||||
_cfg_max_turns = config.get('agent', {}).get('max_turns', DEFAULT_CONFIG['agent']['max_turns'])
|
||||
print(f" Max turns: {_cfg_max_turns}")
|
||||
# Warn on stale HERMES_MAX_ITERATIONS ghost in .env that disagrees with
|
||||
# config.yaml (issue #17534). Read the .env FILE directly so we catch the
|
||||
# ghost even when the gateway bridge already overrode os.environ.
|
||||
try:
|
||||
_env_ghost = load_env().get("HERMES_MAX_ITERATIONS")
|
||||
if _env_ghost is not None and str(_env_ghost).strip() != str(_cfg_max_turns).strip():
|
||||
print(color(
|
||||
f" ⚠ .env has stale HERMES_MAX_ITERATIONS={_env_ghost} "
|
||||
f"(run 'hermes doctor --fix' to remove)",
|
||||
Colors.YELLOW,
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Display
|
||||
print()
|
||||
|
||||
@ -891,6 +891,63 @@ def run_doctor(args):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Detect stale HERMES_MAX_ITERATIONS ghost in .env shadowing
|
||||
# agent.max_turns in config.yaml (issue #17534). The setup wizard
|
||||
# used to dual-write the iteration budget to both stores; users who
|
||||
# later edit only config.yaml are left with a .env ghost. The gateway
|
||||
# bridge normally derives HERMES_MAX_ITERATIONS from agent.max_turns
|
||||
# at startup, but if that bridge bails (any earlier config-parse
|
||||
# error), the stale .env value silently wins and the agent runs at the
|
||||
# wrong budget — e.g. config says 400 but the activity line reads N/90.
|
||||
# Read the .env FILE directly (load_env), not get_env_value/os.environ,
|
||||
# which the startup bridge may already have overridden.
|
||||
try:
|
||||
import yaml
|
||||
from hermes_cli.config import load_env, remove_env_value
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
raw_config = yaml.safe_load(f) or {}
|
||||
agent_cfg = raw_config.get("agent")
|
||||
cfg_max_turns = (
|
||||
agent_cfg.get("max_turns")
|
||||
if isinstance(agent_cfg, dict)
|
||||
else None
|
||||
)
|
||||
# Legacy root-level key counts too.
|
||||
if cfg_max_turns is None:
|
||||
cfg_max_turns = raw_config.get("max_turns")
|
||||
env_ghost = load_env().get("HERMES_MAX_ITERATIONS")
|
||||
drift = (
|
||||
cfg_max_turns is not None
|
||||
and env_ghost is not None
|
||||
and str(cfg_max_turns).strip() != str(env_ghost).strip()
|
||||
)
|
||||
if drift:
|
||||
check_warn(
|
||||
f"HERMES_MAX_ITERATIONS={env_ghost} in .env shadows "
|
||||
f"agent.max_turns={cfg_max_turns} in config.yaml",
|
||||
"(stale ghost from an earlier `hermes setup` run)",
|
||||
)
|
||||
if should_fix:
|
||||
if remove_env_value("HERMES_MAX_ITERATIONS"):
|
||||
check_ok(
|
||||
"Removed stale HERMES_MAX_ITERATIONS from .env "
|
||||
f"(config.yaml agent.max_turns={cfg_max_turns} is now authoritative)"
|
||||
)
|
||||
fixed_count += 1
|
||||
else:
|
||||
check_warn("Could not remove HERMES_MAX_ITERATIONS from .env")
|
||||
manual_issues.append(
|
||||
"Manually delete the HERMES_MAX_ITERATIONS line from "
|
||||
f"{_DHH}/.env — config.yaml agent.max_turns is authoritative."
|
||||
)
|
||||
else:
|
||||
issues.append(
|
||||
"Stale HERMES_MAX_ITERATIONS in .env shadows config.yaml — "
|
||||
"run 'hermes doctor --fix'"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Validate config structure (catches malformed custom_providers, etc.)
|
||||
try:
|
||||
from hermes_cli.config import validate_config_structure
|
||||
|
||||
@ -486,6 +486,18 @@ class TestOptionalEnvVarsRegistry:
|
||||
all_vars.extend(vars_list)
|
||||
assert "TAVILY_API_KEY" in all_vars
|
||||
|
||||
def test_max_iterations_not_offered_as_env_var(self):
|
||||
"""HERMES_MAX_ITERATIONS must NOT be in OPTIONAL_ENV_VARS (issue #17534).
|
||||
|
||||
Offering it as an editable env var (dashboard, `hermes setup`) lets a
|
||||
user write it to .env, recreating the stale ghost that shadows
|
||||
config.yaml's agent.max_turns. The iteration budget is configured ONLY
|
||||
via config.yaml; HERMES_MAX_ITERATIONS remains a read-only backward-compat
|
||||
fallback in the gateway/CLI, never a promoted write target.
|
||||
"""
|
||||
from hermes_cli.config import OPTIONAL_ENV_VARS
|
||||
assert "HERMES_MAX_ITERATIONS" not in OPTIONAL_ENV_VARS
|
||||
|
||||
|
||||
class TestConfigMigrationSecretPrompts:
|
||||
def test_required_secret_env_prompt_uses_masked_prompt(self, tmp_path, monkeypatch):
|
||||
|
||||
@ -1274,3 +1274,87 @@ class TestDoctorCodexCliHintPlacement:
|
||||
minimax_idx = next(i for i, l in enumerate(lines) if "MiniMax OAuth" in l)
|
||||
assert self._hint_line() not in lines[minimax_idx - 1]
|
||||
assert minimax_idx + 1 >= len(lines) or self._hint_line() not in lines[minimax_idx + 1]
|
||||
|
||||
|
||||
class TestDoctorStaleMaxIterationsDrift:
|
||||
"""Regression for #17534: a stale HERMES_MAX_ITERATIONS in .env shadows
|
||||
agent.max_turns in config.yaml. The repro symptom is config.yaml saying
|
||||
400 while the gateway activity line reads N/90. Doctor must detect the
|
||||
drift, and `--fix` must remove the .env ghost (config.yaml wins).
|
||||
|
||||
The detector reads the .env FILE directly, NOT os.environ — the gateway
|
||||
startup bridge can already have overridden os.environ to the config value,
|
||||
so the ghost is only visible in the file.
|
||||
"""
|
||||
|
||||
def _run_config_section(self, monkeypatch, tmp_path, *, fix, ghost, cfg_turns,
|
||||
os_environ_value=None):
|
||||
import pathlib
|
||||
import contextlib
|
||||
import io
|
||||
from argparse import Namespace
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir(parents=True)
|
||||
(hermes_home / "config.yaml").write_text(
|
||||
f"agent:\n max_turns: {cfg_turns}\n", encoding="utf-8"
|
||||
)
|
||||
env_lines = ["OPENAI_API_KEY=sk-test\n"]
|
||||
if ghost is not None:
|
||||
env_lines.append(f"HERMES_MAX_ITERATIONS={ghost}\n")
|
||||
(hermes_home / ".env").write_text("".join(env_lines), encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
|
||||
monkeypatch.setattr(doctor_mod, "get_hermes_home", lambda: hermes_home)
|
||||
# Point the config helpers at the temp home.
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
if os_environ_value is not None:
|
||||
# Simulate the gateway bridge having already overridden os.environ.
|
||||
monkeypatch.setenv("HERMES_MAX_ITERATIONS", str(os_environ_value))
|
||||
else:
|
||||
monkeypatch.delenv("HERMES_MAX_ITERATIONS", raising=False)
|
||||
|
||||
# Short-circuit at the Tool Availability stage — the drift check runs
|
||||
# well before it in the Configuration Files section.
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
buf = io.StringIO()
|
||||
with contextlib.redirect_stdout(buf), pytest.raises(SystemExit):
|
||||
doctor_mod.run_doctor(Namespace(fix=fix))
|
||||
return buf.getvalue(), hermes_home
|
||||
|
||||
def test_detects_drift_warn_only(self, monkeypatch, tmp_path):
|
||||
out, hermes_home = self._run_config_section(
|
||||
monkeypatch, tmp_path, fix=False, ghost=90, cfg_turns=400,
|
||||
os_environ_value=400, # bridge contaminated os.environ
|
||||
)
|
||||
assert "HERMES_MAX_ITERATIONS=90" in out
|
||||
assert "shadows" in out
|
||||
# Warn-only must NOT mutate .env.
|
||||
assert "HERMES_MAX_ITERATIONS=90" in (hermes_home / ".env").read_text(encoding="utf-8")
|
||||
|
||||
def test_fix_removes_ghost(self, monkeypatch, tmp_path):
|
||||
out, hermes_home = self._run_config_section(
|
||||
monkeypatch, tmp_path, fix=True, ghost=90, cfg_turns=400,
|
||||
os_environ_value=400,
|
||||
)
|
||||
assert "Removed stale HERMES_MAX_ITERATIONS" in out
|
||||
env_after = (hermes_home / ".env").read_text(encoding="utf-8")
|
||||
assert "HERMES_MAX_ITERATIONS" not in env_after
|
||||
assert "OPENAI_API_KEY=sk-test" in env_after # other keys preserved
|
||||
|
||||
def test_no_drift_when_values_match(self, monkeypatch, tmp_path):
|
||||
out, _ = self._run_config_section(
|
||||
monkeypatch, tmp_path, fix=False, ghost=400, cfg_turns=400,
|
||||
)
|
||||
assert "shadows" not in out
|
||||
|
||||
def test_no_drift_when_ghost_absent(self, monkeypatch, tmp_path):
|
||||
out, _ = self._run_config_section(
|
||||
monkeypatch, tmp_path, fix=False, ghost=None, cfg_turns=400,
|
||||
)
|
||||
assert "shadows" not in out
|
||||
|
||||
Reference in New Issue
Block a user