From 6ee046a72fde50dc064b02c86ffe67ff112546ec Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 3 Jun 2026 06:38:40 -0700 Subject: [PATCH] fix(doctor): detect + repair stale HERMES_MAX_ITERATIONS .env ghost shadowing config.yaml (#38222) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(doctor): detect + repair stale HERMES_MAX_ITERATIONS .env ghost shadowing config.yaml hermes doctor now flags when ~/.hermes/.env carries a HERMES_MAX_ITERATIONS value that disagrees with agent.max_turns in config.yaml, and 'hermes doctor --fix' removes the stale .env line so config.yaml is authoritative. 'hermes config show' surfaces the same drift inline under Max turns. The setup wizard stopped dual-writing this value, but users who edited only config.yaml from a pre-fix install keep a .env ghost. The gateway bridge normally overrides it at startup, but if the bridge bails on any earlier config-parse error the ghost silently wins — config says 400 while the gateway activity line reads N/90. The detector reads the .env FILE directly (load_env), not get_env_value/ os.environ, since the startup bridge may already have overwritten os.environ with the config value. Closes #17534. * fix(config): stop offering HERMES_MAX_ITERATIONS as an editable env var Removes HERMES_MAX_ITERATIONS from OPTIONAL_ENV_VARS so the dashboard env editor (PUT /api/env) and any env-var prompt no longer let a user write it to .env — which would recreate the stale ghost that shadows config.yaml's agent.max_turns (issue #17534). The iteration budget is configured only via config.yaml; the env var stays a read-only backward-compat fallback in the gateway/CLI, never a promoted write target. Regression test asserts it is absent from OPTIONAL_ENV_VARS. --- hermes_cli/config.py | 23 +++++---- hermes_cli/doctor.py | 57 ++++++++++++++++++++++ tests/hermes_cli/test_config.py | 12 +++++ tests/hermes_cli/test_doctor.py | 84 +++++++++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 8 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 4cb822b19..447d5dc85 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -3346,13 +3346,6 @@ OPTIONAL_ENV_VARS = { "password": True, "category": "setting", }, - "HERMES_MAX_ITERATIONS": { - "description": "Maximum tool-calling iterations per conversation (default: 90)", - "prompt": "Max iterations", - "url": None, - "password": False, - "category": "setting", - }, # HERMES_TOOL_PROGRESS and HERMES_TOOL_PROGRESS_MODE are deprecated — # now configured via display.tool_progress in config.yaml (off|new|all|verbose). # Gateway falls back to these env vars for backward compatibility. @@ -5593,7 +5586,21 @@ def show_config(): print() print(color("◆ Model", Colors.CYAN, Colors.BOLD)) print(f" Model: {config.get('model', 'not set')}") - print(f" Max turns: {config.get('agent', {}).get('max_turns', DEFAULT_CONFIG['agent']['max_turns'])}") + _cfg_max_turns = config.get('agent', {}).get('max_turns', DEFAULT_CONFIG['agent']['max_turns']) + print(f" Max turns: {_cfg_max_turns}") + # Warn on stale HERMES_MAX_ITERATIONS ghost in .env that disagrees with + # config.yaml (issue #17534). Read the .env FILE directly so we catch the + # ghost even when the gateway bridge already overrode os.environ. + try: + _env_ghost = load_env().get("HERMES_MAX_ITERATIONS") + if _env_ghost is not None and str(_env_ghost).strip() != str(_cfg_max_turns).strip(): + print(color( + f" ⚠ .env has stale HERMES_MAX_ITERATIONS={_env_ghost} " + f"(run 'hermes doctor --fix' to remove)", + Colors.YELLOW, + )) + except Exception: + pass # Display print() diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 4971f1fae..910130ac4 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -891,6 +891,63 @@ def run_doctor(args): except Exception: pass + # Detect stale HERMES_MAX_ITERATIONS ghost in .env shadowing + # agent.max_turns in config.yaml (issue #17534). The setup wizard + # used to dual-write the iteration budget to both stores; users who + # later edit only config.yaml are left with a .env ghost. The gateway + # bridge normally derives HERMES_MAX_ITERATIONS from agent.max_turns + # at startup, but if that bridge bails (any earlier config-parse + # error), the stale .env value silently wins and the agent runs at the + # wrong budget — e.g. config says 400 but the activity line reads N/90. + # Read the .env FILE directly (load_env), not get_env_value/os.environ, + # which the startup bridge may already have overridden. + try: + import yaml + from hermes_cli.config import load_env, remove_env_value + with open(config_path, encoding="utf-8") as f: + raw_config = yaml.safe_load(f) or {} + agent_cfg = raw_config.get("agent") + cfg_max_turns = ( + agent_cfg.get("max_turns") + if isinstance(agent_cfg, dict) + else None + ) + # Legacy root-level key counts too. + if cfg_max_turns is None: + cfg_max_turns = raw_config.get("max_turns") + env_ghost = load_env().get("HERMES_MAX_ITERATIONS") + drift = ( + cfg_max_turns is not None + and env_ghost is not None + and str(cfg_max_turns).strip() != str(env_ghost).strip() + ) + if drift: + check_warn( + f"HERMES_MAX_ITERATIONS={env_ghost} in .env shadows " + f"agent.max_turns={cfg_max_turns} in config.yaml", + "(stale ghost from an earlier `hermes setup` run)", + ) + if should_fix: + if remove_env_value("HERMES_MAX_ITERATIONS"): + check_ok( + "Removed stale HERMES_MAX_ITERATIONS from .env " + f"(config.yaml agent.max_turns={cfg_max_turns} is now authoritative)" + ) + fixed_count += 1 + else: + check_warn("Could not remove HERMES_MAX_ITERATIONS from .env") + manual_issues.append( + "Manually delete the HERMES_MAX_ITERATIONS line from " + f"{_DHH}/.env — config.yaml agent.max_turns is authoritative." + ) + else: + issues.append( + "Stale HERMES_MAX_ITERATIONS in .env shadows config.yaml — " + "run 'hermes doctor --fix'" + ) + except Exception: + pass + # Validate config structure (catches malformed custom_providers, etc.) try: from hermes_cli.config import validate_config_structure diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index eb4827a41..ec0a6aea7 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -486,6 +486,18 @@ class TestOptionalEnvVarsRegistry: all_vars.extend(vars_list) assert "TAVILY_API_KEY" in all_vars + def test_max_iterations_not_offered_as_env_var(self): + """HERMES_MAX_ITERATIONS must NOT be in OPTIONAL_ENV_VARS (issue #17534). + + Offering it as an editable env var (dashboard, `hermes setup`) lets a + user write it to .env, recreating the stale ghost that shadows + config.yaml's agent.max_turns. The iteration budget is configured ONLY + via config.yaml; HERMES_MAX_ITERATIONS remains a read-only backward-compat + fallback in the gateway/CLI, never a promoted write target. + """ + from hermes_cli.config import OPTIONAL_ENV_VARS + assert "HERMES_MAX_ITERATIONS" not in OPTIONAL_ENV_VARS + class TestConfigMigrationSecretPrompts: def test_required_secret_env_prompt_uses_masked_prompt(self, tmp_path, monkeypatch): diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py index e1edc95af..9d832fa95 100644 --- a/tests/hermes_cli/test_doctor.py +++ b/tests/hermes_cli/test_doctor.py @@ -1274,3 +1274,87 @@ class TestDoctorCodexCliHintPlacement: minimax_idx = next(i for i, l in enumerate(lines) if "MiniMax OAuth" in l) assert self._hint_line() not in lines[minimax_idx - 1] assert minimax_idx + 1 >= len(lines) or self._hint_line() not in lines[minimax_idx + 1] + + +class TestDoctorStaleMaxIterationsDrift: + """Regression for #17534: a stale HERMES_MAX_ITERATIONS in .env shadows + agent.max_turns in config.yaml. The repro symptom is config.yaml saying + 400 while the gateway activity line reads N/90. Doctor must detect the + drift, and `--fix` must remove the .env ghost (config.yaml wins). + + The detector reads the .env FILE directly, NOT os.environ — the gateway + startup bridge can already have overridden os.environ to the config value, + so the ghost is only visible in the file. + """ + + def _run_config_section(self, monkeypatch, tmp_path, *, fix, ghost, cfg_turns, + os_environ_value=None): + import pathlib + import contextlib + import io + from argparse import Namespace + + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir(parents=True) + (hermes_home / "config.yaml").write_text( + f"agent:\n max_turns: {cfg_turns}\n", encoding="utf-8" + ) + env_lines = ["OPENAI_API_KEY=sk-test\n"] + if ghost is not None: + env_lines.append(f"HERMES_MAX_ITERATIONS={ghost}\n") + (hermes_home / ".env").write_text("".join(env_lines), encoding="utf-8") + + monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home) + monkeypatch.setattr(doctor_mod, "get_hermes_home", lambda: hermes_home) + # Point the config helpers at the temp home. + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + if os_environ_value is not None: + # Simulate the gateway bridge having already overridden os.environ. + monkeypatch.setenv("HERMES_MAX_ITERATIONS", str(os_environ_value)) + else: + monkeypatch.delenv("HERMES_MAX_ITERATIONS", raising=False) + + # Short-circuit at the Tool Availability stage — the drift check runs + # well before it in the Configuration Files section. + fake_model_tools = types.SimpleNamespace( + check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)), + TOOLSET_REQUIREMENTS={}, + ) + monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools) + + buf = io.StringIO() + with contextlib.redirect_stdout(buf), pytest.raises(SystemExit): + doctor_mod.run_doctor(Namespace(fix=fix)) + return buf.getvalue(), hermes_home + + def test_detects_drift_warn_only(self, monkeypatch, tmp_path): + out, hermes_home = self._run_config_section( + monkeypatch, tmp_path, fix=False, ghost=90, cfg_turns=400, + os_environ_value=400, # bridge contaminated os.environ + ) + assert "HERMES_MAX_ITERATIONS=90" in out + assert "shadows" in out + # Warn-only must NOT mutate .env. + assert "HERMES_MAX_ITERATIONS=90" in (hermes_home / ".env").read_text(encoding="utf-8") + + def test_fix_removes_ghost(self, monkeypatch, tmp_path): + out, hermes_home = self._run_config_section( + monkeypatch, tmp_path, fix=True, ghost=90, cfg_turns=400, + os_environ_value=400, + ) + assert "Removed stale HERMES_MAX_ITERATIONS" in out + env_after = (hermes_home / ".env").read_text(encoding="utf-8") + assert "HERMES_MAX_ITERATIONS" not in env_after + assert "OPENAI_API_KEY=sk-test" in env_after # other keys preserved + + def test_no_drift_when_values_match(self, monkeypatch, tmp_path): + out, _ = self._run_config_section( + monkeypatch, tmp_path, fix=False, ghost=400, cfg_turns=400, + ) + assert "shadows" not in out + + def test_no_drift_when_ghost_absent(self, monkeypatch, tmp_path): + out, _ = self._run_config_section( + monkeypatch, tmp_path, fix=False, ghost=None, cfg_turns=400, + ) + assert "shadows" not in out