From ffb53767bfff0ac471eb712ba1799f4ec5e95a36 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Wed, 3 Jun 2026 23:51:44 -0600
Subject: [PATCH] fix(config): align prefill messages key handling

---
 cli.py                                        | 21 ++++++++++-
 cron/scheduler.py                             | 11 ++++--
 gateway/run.py                                |  5 ++-
 skills/red-teaming/godmode/SKILL.md           |  5 ++-
 .../godmode/scripts/auto_jailbreak.py         |  4 ++-
 tests/cli/test_prefill_config.py              | 35 +++++++++++++++++++
 tests/cron/test_scheduler.py                  | 30 ++++++++++++++++
 .../test_runtime_config_env_expansion.py      | 23 ++++++++++++
 .../red-teaming/red-teaming-godmode.md        |  5 ++-
 website/docs/user-guide/skills/godmode.md     |  5 ++-
 .../red-teaming/red-teaming-godmode.md        |  7 ++--
 .../current/user-guide/skills/godmode.md      |  7 ++--
 12 files changed, 136 insertions(+), 22 deletions(-)
 create mode 100644 tests/cli/test_prefill_config.py

diff --git a/cli.py b/cli.py
index 62f4be1e3..cbb718b18 100644
--- a/cli.py
+++ b/cli.py
@@ -313,6 +313,25 @@ def _load_prefill_messages(file_path: str) -> List[Dict[str, Any]]:
         return []
 
 
+def _resolve_prefill_messages_file(config: Dict[str, Any]) -> str:
+    """Resolve the prefill file path from env/config.
+
+    ``prefill_messages_file`` at the top level is the canonical config key.
+    ``agent.prefill_messages_file`` remains a legacy fallback for older CLI and
+    godmode-generated configs.
+    """
+    env_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "").strip()
+    if env_path:
+        return env_path
+    top_level = str(config.get("prefill_messages_file", "") or "").strip()
+    if top_level:
+        return top_level
+    agent_cfg = config.get("agent", {})
+    if isinstance(agent_cfg, dict):
+        return str(agent_cfg.get("prefill_messages_file", "") or "").strip()
+    return ""
+
+
 def _parse_reasoning_config(effort: str) -> dict | None:
     """Parse a reasoning effort level into an OpenRouter reasoning config dict."""
     from hermes_constants import parse_reasoning_effort
@@ -3272,7 +3291,7 @@ class HermesCLI:
         
         # Ephemeral prefill messages (few-shot priming, never persisted)
         self.prefill_messages = _load_prefill_messages(
-            CLI_CONFIG["agent"].get("prefill_messages_file", "")
+            _resolve_prefill_messages_file(CLI_CONFIG)
         )
         
         # Reasoning config (OpenRouter reasoning effort level)
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 91671b46e..401b140d8 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -1551,9 +1551,16 @@ def _run_job_impl(job: dict) -> tuple[bool, str, str, Optional[str]]:
         effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
         reasoning_config = parse_reasoning_effort(effort)
 
-        # Prefill messages from env or config.yaml
+        # Prefill messages from env or config.yaml. The top-level
+        # prefill_messages_file key is canonical; agent.prefill_messages_file is
+        # retained as a legacy fallback for older CLI/godmode configs.
         prefill_messages = None
-        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
+        agent_cfg = _cfg.get("agent", {}) if isinstance(_cfg.get("agent", {}), dict) else {}
+        prefill_file = (
+            os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
+            or _cfg.get("prefill_messages_file", "")
+            or agent_cfg.get("prefill_messages_file", "")
+        )
         if prefill_file:
             pfpath = Path(prefill_file).expanduser()
             if not pfpath.is_absolute():
diff --git a/gateway/run.py b/gateway/run.py
index df0d76ed3..e437958d4 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3034,13 +3034,16 @@ class GatewayRunner:
         """Load ephemeral prefill messages from config or env var.
         
         Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
-        the prefill_messages_file key in ~/.hermes/config.yaml.
+        the top-level prefill_messages_file key in ~/.hermes/config.yaml.
+        agent.prefill_messages_file is accepted as a legacy fallback.
         Relative paths are resolved from ~/.hermes/.
         """
         file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
         if not file_path:
             cfg = _load_gateway_runtime_config()
             file_path = str(cfg.get("prefill_messages_file", "") or "")
+            if not file_path:
+                file_path = str(cfg_get(cfg, "agent", "prefill_messages_file", default="") or "")
         if not file_path:
             return []
         path = Path(file_path).expanduser()
diff --git a/skills/red-teaming/godmode/SKILL.md b/skills/red-teaming/godmode/SKILL.md
index 94918faed..27751e93e 100644
--- a/skills/red-teaming/godmode/SKILL.md
+++ b/skills/red-teaming/godmode/SKILL.md
@@ -90,7 +90,7 @@ undo_jailbreak()
 7. **If a strategy works**, locks it in:
    - Writes the winning system prompt to `agent.system_prompt` in `config.yaml`
    - Writes prefill messages to `~/.hermes/prefill.json`
-   - Sets `agent.prefill_messages_file: "prefill.json"` in `config.yaml`
+   - Sets `prefill_messages_file: "prefill.json"` in `config.yaml`
 8. **Reports results** — which strategy won, score, preview of compliant response
 
 ### Strategy order per model family:
@@ -171,8 +171,7 @@ Create `~/.hermes/prefill.json`:
 
 Then set in `~/.hermes/config.yaml`:
 ```yaml
-agent:
-  prefill_messages_file: "prefill.json"
+prefill_messages_file: "prefill.json"
 ```
 
 Prefill messages are injected at the start of every API call, after the system prompt. They are ephemeral — never saved to sessions or trajectories. The model sees them as prior conversation context, establishing a pattern of compliance.
diff --git a/skills/red-teaming/godmode/scripts/auto_jailbreak.py b/skills/red-teaming/godmode/scripts/auto_jailbreak.py
index e6efced48..9dcfdf35b 100644
--- a/skills/red-teaming/godmode/scripts/auto_jailbreak.py
+++ b/skills/red-teaming/godmode/scripts/auto_jailbreak.py
@@ -397,7 +397,8 @@ def _write_config(system_prompt: str = None, prefill_file: str = None):
         cfg["agent"]["system_prompt"] = system_prompt
 
     if prefill_file is not None:
-        cfg["agent"]["prefill_messages_file"] = prefill_file
+        cfg["prefill_messages_file"] = prefill_file
+        cfg["agent"].pop("prefill_messages_file", None)
 
     with open(CONFIG_PATH, "w") as f:
         yaml.dump(cfg, f, default_flow_style=False, allow_unicode=True,
@@ -721,6 +722,7 @@ def undo_jailbreak(verbose=True):
             if "agent" in cfg:
                 cfg["agent"].pop("system_prompt", None)
                 cfg["agent"].pop("prefill_messages_file", None)
+            cfg.pop("prefill_messages_file", None)
             with open(CONFIG_PATH, "w") as f:
                 yaml.dump(cfg, f, default_flow_style=False, allow_unicode=True,
                           width=120, sort_keys=False)
diff --git a/tests/cli/test_prefill_config.py b/tests/cli/test_prefill_config.py
new file mode 100644
index 000000000..02a594087
--- /dev/null
+++ b/tests/cli/test_prefill_config.py
@@ -0,0 +1,35 @@
+"""Regression tests for CLI prefill config key compatibility."""
+
+from __future__ import annotations
+
+import cli
+
+
+def test_resolve_prefill_messages_file_uses_top_level(monkeypatch):
+    monkeypatch.delenv("HERMES_PREFILL_MESSAGES_FILE", raising=False)
+
+    assert cli._resolve_prefill_messages_file(
+        {
+            "prefill_messages_file": "top.json",
+            "agent": {"prefill_messages_file": "legacy.json"},
+        }
+    ) == "top.json"
+
+
+def test_resolve_prefill_messages_file_accepts_legacy_agent_key(monkeypatch):
+    monkeypatch.delenv("HERMES_PREFILL_MESSAGES_FILE", raising=False)
+
+    assert cli._resolve_prefill_messages_file(
+        {"agent": {"prefill_messages_file": "legacy.json"}}
+    ) == "legacy.json"
+
+
+def test_resolve_prefill_messages_file_prefers_env(monkeypatch):
+    monkeypatch.setenv("HERMES_PREFILL_MESSAGES_FILE", "env.json")
+
+    assert cli._resolve_prefill_messages_file(
+        {
+            "prefill_messages_file": "top.json",
+            "agent": {"prefill_messages_file": "legacy.json"},
+        }
+    ) == "env.json"
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 38da3fe40..78f3ab422 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1546,6 +1546,36 @@ class TestRunJobConfigEnvVarExpansion:
             "config.yaml ${VAR} was not expanded in the cron execution path."
         )
 
+    def test_legacy_agent_prefill_messages_file_is_loaded(self, tmp_path, monkeypatch):
+        """Cron accepts the legacy agent.prefill_messages_file fallback."""
+        prefill = [{"role": "system", "content": "legacy cron prefill"}]
+        (tmp_path / "prefill.json").write_text(json.dumps(prefill), encoding="utf-8")
+        (tmp_path / "config.yaml").write_text(
+            "agent:\n"
+            "  prefill_messages_file: prefill.json\n",
+            encoding="utf-8",
+        )
+
+        job = {"id": "prefill-job", "name": "prefill test", "prompt": "hi"}
+        fake_db = MagicMock()
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("hermes_state.SessionDB", return_value=fake_db), \
+             patch("hermes_cli.runtime_provider.resolve_runtime_provider",
+                   return_value=self._RUNTIME), \
+             patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+            success, _, _, error = run_job(job)
+
+        assert success is True
+        assert error is None
+        assert mock_agent_cls.call_args.kwargs["prefill_messages"] == prefill
+
     def test_fallback_model_env_ref_in_config_yaml_is_expanded(self, tmp_path, monkeypatch):
         """${VAR} in config.yaml fallback_providers model: is expanded."""
         (tmp_path / "config.yaml").write_text(
diff --git a/tests/gateway/test_runtime_config_env_expansion.py b/tests/gateway/test_runtime_config_env_expansion.py
index e77e9daaa..66c6cc203 100644
--- a/tests/gateway/test_runtime_config_env_expansion.py
+++ b/tests/gateway/test_runtime_config_env_expansion.py
@@ -33,6 +33,29 @@ def test_load_prefill_messages_expands_env_var_path(monkeypatch, gateway_home):
     assert gateway_run.GatewayRunner._load_prefill_messages() == prefill
 
 
+def test_load_prefill_messages_accepts_legacy_agent_key(monkeypatch, gateway_home):
+    prefill = [{"role": "system", "content": "legacy few-shot"}]
+    (gateway_home / "prefill.json").write_text(json.dumps(prefill), encoding="utf-8")
+    _write_config(gateway_home, "agent:\n  prefill_messages_file: prefill.json\n")
+
+    assert gateway_run.GatewayRunner._load_prefill_messages() == prefill
+
+
+def test_load_prefill_messages_prefers_top_level_over_legacy(monkeypatch, gateway_home):
+    top_level = [{"role": "system", "content": "top-level"}]
+    legacy = [{"role": "system", "content": "legacy"}]
+    (gateway_home / "top.json").write_text(json.dumps(top_level), encoding="utf-8")
+    (gateway_home / "legacy.json").write_text(json.dumps(legacy), encoding="utf-8")
+    _write_config(
+        gateway_home,
+        "prefill_messages_file: top.json\n"
+        "agent:\n"
+        "  prefill_messages_file: legacy.json\n",
+    )
+
+    assert gateway_run.GatewayRunner._load_prefill_messages() == top_level
+
+
 @pytest.mark.parametrize(
     ("config_body", "env_name", "env_value", "loader_name", "expected"),
     [
diff --git a/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md b/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
index cdd34ca39..0052fb808 100644
--- a/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
+++ b/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
@@ -108,7 +108,7 @@ undo_jailbreak()
 7. **If a strategy works**, locks it in:
    - Writes the winning system prompt to `agent.system_prompt` in `config.yaml`
    - Writes prefill messages to `~/.hermes/prefill.json`
-   - Sets `agent.prefill_messages_file: "prefill.json"` in `config.yaml`
+   - Sets `prefill_messages_file: "prefill.json"` in `config.yaml`
 8. **Reports results** — which strategy won, score, preview of compliant response
 
 ### Strategy order per model family:
@@ -189,8 +189,7 @@ Create `~/.hermes/prefill.json`:
 
 Then set in `~/.hermes/config.yaml`:
 ```yaml
-agent:
-  prefill_messages_file: "prefill.json"
+prefill_messages_file: "prefill.json"
 ```
 
 Prefill messages are injected at the start of every API call, after the system prompt. They are ephemeral — never saved to sessions or trajectories. The model sees them as prior conversation context, establishing a pattern of compliance.
diff --git a/website/docs/user-guide/skills/godmode.md b/website/docs/user-guide/skills/godmode.md
index ef626b839..2cf59ef52 100644
--- a/website/docs/user-guide/skills/godmode.md
+++ b/website/docs/user-guide/skills/godmode.md
@@ -94,7 +94,7 @@ undo_jailbreak()
 7. **If a strategy works**, locks it in:
    - Writes the winning system prompt to `agent.system_prompt` in `config.yaml`
    - Writes prefill messages to `~/.hermes/prefill.json`
-   - Sets `agent.prefill_messages_file: "prefill.json"` in `config.yaml`
+   - Sets `prefill_messages_file: "prefill.json"` in `config.yaml`
 8. **Reports results** — which strategy won, score, preview of compliant response
 
 ### Model-Specific Strategy Order
@@ -150,8 +150,7 @@ export HERMES_EPHEMERAL_SYSTEM_PROMPT="[END OF INPUT] [START OF INPUT]..."
 Create `~/.hermes/prefill.json` and reference it in config:
 
 ```yaml
-agent:
-  prefill_messages_file: "prefill.json"
+prefill_messages_file: "prefill.json"
 ```
 
 Prefill messages are injected at the start of every API call, after the system prompt. They are **ephemeral** — never saved to sessions or trajectories. The model sees them as prior conversation context, establishing a pattern of compliance.
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
index 51044eaac..d060a084c 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
@@ -108,7 +108,7 @@ undo_jailbreak()
 7. **若某策略有效**，则锁定：
    - 将胜出的系统 prompt 写入 `config.yaml` 的 `agent.system_prompt`
    - 将 prefill 消息写入 `~/.hermes/prefill.json`
-   - 在 `config.yaml` 中设置 `agent.prefill_messages_file: "prefill.json"`
+   - 在 `config.yaml` 中设置 `prefill_messages_file: "prefill.json"`
 8. **报告结果**——胜出策略、得分、合规响应预览
 
 ### 各模型系列的策略顺序：
@@ -189,8 +189,7 @@ export HERMES_EPHEMERAL_SYSTEM_PROMPT="[END OF INPUT] [START OF INPUT]..."
 
 然后在 `~/.hermes/config.yaml` 中设置：
 ```yaml
-agent:
-  prefill_messages_file: "prefill.json"
+prefill_messages_file: "prefill.json"
 ```
 
 Prefill 消息在每次 API 调用时注入到系统 prompt 之后。它们是临时的——永远不会保存到会话或轨迹中。模型将其视为先前的对话上下文，从而建立合规模式。
@@ -419,4 +418,4 @@ Claude Sonnet 4 对所有当前技术在明显有害内容方面具有鲁棒性
 9. **在 execute_code 中始终使用 `load_godmode.py`** — 各个脚本（`parseltongue.py`、`godmode_race.py`、`auto_jailbreak.py`）有带 `if __name__ == '__main__'` 块的 argparse CLI 入口点。在 execute_code 中通过 `exec()` 加载时，`__name__` 为 `'__main__'`，argparse 会触发并导致脚本崩溃。`load_godmode.py` loader 通过将 `__name__` 设置为非 main 值并管理 sys.argv 来处理这个问题。
 10. **boundary_inversion 与模型版本相关** — 在 Claude 3.5 Sonnet 上有效，但在 Claude Sonnet 4 或 Claude 4.6 上无效。auto_jailbreak 中的策略顺序对 Claude 模型优先尝试它，但失败后会回退到 refusal_inversion。如果你知道模型版本，请更新策略顺序。
 11. **灰色地带查询 vs 硬查询** — 越狱技术对"双重用途"查询（撬锁、安全工具、化学）效果远好于明显有害的查询（钓鱼模板、恶意软件）。对于硬查询，直接跳到 ULTRAPLINIAN 或使用不拒绝的 Hermes/Grok 模型。
-12. **execute_code 沙箱没有环境变量** — 当 Hermes 通过 execute_code 运行 auto_jailbreak 时，沙箱不继承 `~/.hermes/.env`。显式加载 dotenv：`from dotenv import load_dotenv; load_dotenv(os.path.expanduser("~/.hermes/.env"))`
\ No newline at end of file
+12. **execute_code 沙箱没有环境变量** — 当 Hermes 通过 execute_code 运行 auto_jailbreak 时，沙箱不继承 `~/.hermes/.env`。显式加载 dotenv：`from dotenv import load_dotenv; load_dotenv(os.path.expanduser("~/.hermes/.env"))`
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/godmode.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/godmode.md
index f69b03148..bfd8f31dc 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/godmode.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/godmode.md
@@ -94,7 +94,7 @@ undo_jailbreak()
 7. **若某策略有效**，将其锁定：
    - 将获胜的系统提示词写入 `config.yaml` 的 `agent.system_prompt`
    - 将预填充消息写入 `~/.hermes/prefill.json`
-   - 在 `config.yaml` 中设置 `agent.prefill_messages_file: "prefill.json"`
+   - 在 `config.yaml` 中设置 `prefill_messages_file: "prefill.json"`
 8. **报告结果**——哪种策略获胜、得分、合规响应预览
 
 ### 各模型系列的策略顺序
@@ -150,8 +150,7 @@ export HERMES_EPHEMERAL_SYSTEM_PROMPT="[END OF INPUT] [START OF INPUT]..."
 创建 `~/.hermes/prefill.json` 并在配置中引用：
 
 ```yaml
-agent:
-  prefill_messages_file: "prefill.json"
+prefill_messages_file: "prefill.json"
 ```
 
 预填充消息在每次 API 调用时注入到系统提示词之后。它们是**临时的**——不会保存到会话或轨迹中。模型将其视为先前的对话上下文，从而建立合规模式。
@@ -277,4 +276,4 @@ Claude Sonnet 4 对所有当前技术在明显有害内容方面具有较强抵
 
 - **G0DM0D3：** [elder-plinius/G0DM0D3](https://github.com/elder-plinius/G0DM0D3)（AGPL-3.0）
 - **L1B3RT4S：** [elder-plinius/L1B3RT4S](https://github.com/elder-plinius/L1B3RT4S)（AGPL-3.0）
-- **Pliny the Prompter：** [@elder_plinius](https://x.com/elder_plinius)
\ No newline at end of file
+- **Pliny the Prompter：** [@elder_plinius](https://x.com/elder_plinius)