Salvage of #35508 (@dchenk), rebased onto current main. Resolved the tests/tools/test_stage2_hook_puid_pgid.py conflict (kept both the envdir-creation regression test on main and the new config-migration tests). Docker image upgrades replace code under $INSTALL_DIR but preserve $HERMES_HOME on the mounted volume, so the persisted config.yaml never received the schema migrations that non-Docker `hermes update` runs (#35406). This adds scripts/docker_config_migrate.py, invoked from stage2-hook after first-boot seeding and before gateway services start: it backs up config.yaml + .env, runs migrate_config(interactive=False), and honors HERMES_SKIP_CONFIG_MIGRATION=1 for manual control. Also fixes a latent bug in check_config_version(): it called load_config() which deep-merges DEFAULT_CONFIG, so a legacy config with no raw _config_version falsely reported as already-current. It now reads the raw on-disk file so legacy configs are correctly detected for migration. Differs from #35508 as submitted (Option B cleanup): dropped the `_config_version` line added to cli-config.yaml.example and removed the accompanying test_cli_config_example_declares_latest_version change-detector test. The example is a copy-template and has no business asserting a schema version; check_config_version() reads the user's real config.yaml, not the example. This removes a second sync point that drifts on every version bump. Closes #35508. Fixes #35406. Co-authored-by: Dmitriy Cherchenko <17372886+dchenk@users.noreply.github.com>
This commit is contained in:
@ -338,6 +338,17 @@ if [ -f "$HERMES_HOME/.env" ]; then
|
||||
chmod 600 "$HERMES_HOME/.env" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- Migrate persisted config schema ---
|
||||
# Docker image upgrades replace the code under $INSTALL_DIR but preserve
|
||||
# $HERMES_HOME on the mounted volume. Run the same safe, non-interactive
|
||||
# config-schema migrations that `hermes update` runs for non-Docker installs,
|
||||
# after first-boot seeding and before supervised gateway services start.
|
||||
# Set HERMES_SKIP_CONFIG_MIGRATION=1 for controlled/manual migrations.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
s6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python" "$INSTALL_DIR/scripts/docker_config_migrate.py" \
|
||||
|| echo "[stage2] Warning: docker_config_migrate.py failed; continuing"
|
||||
fi
|
||||
|
||||
# auth.json: bootstrap from env on first boot only. Same semantics as the
|
||||
# pre-s6 entrypoint — the [ ! -f ] guard is critical to avoid clobbering
|
||||
# rotated refresh tokens on container restart.
|
||||
|
||||
@ -3792,15 +3792,46 @@ def get_custom_provider_context_length(
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_config_version(value: Any) -> int:
|
||||
"""Return a safe integer config version, treating invalid values as legacy."""
|
||||
if isinstance(value, bool):
|
||||
return 0
|
||||
try:
|
||||
version = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
return max(version, 0)
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
Check the raw on-disk config schema version.
|
||||
|
||||
``load_config()`` deliberately starts from ``DEFAULT_CONFIG`` and deep-merges
|
||||
the user's file, which is correct for runtime reads but wrong for deciding
|
||||
whether the user's persisted schema has been migrated. A config file with no
|
||||
raw ``_config_version`` must remain visible as legacy instead of inheriting
|
||||
the latest default version in memory.
|
||||
|
||||
Returns (current_version, latest_version).
|
||||
"""
|
||||
config = load_config()
|
||||
current = config.get("_config_version", 0)
|
||||
latest = DEFAULT_CONFIG.get("_config_version", 1)
|
||||
latest = _coerce_config_version(DEFAULT_CONFIG.get("_config_version", 1)) or 1
|
||||
config_path = get_config_path()
|
||||
if not config_path.exists():
|
||||
return latest, latest
|
||||
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
except Exception as e:
|
||||
# Invalid YAML needs a parse warning, not an automatic schema rewrite
|
||||
# that could replace the user's broken file with defaults.
|
||||
_warn_config_parse_failure(config_path, e)
|
||||
return latest, latest
|
||||
|
||||
if not isinstance(config, dict):
|
||||
config = {}
|
||||
current = _coerce_config_version(config.get("_config_version"))
|
||||
return current, latest
|
||||
|
||||
|
||||
|
||||
67
scripts/docker_config_migrate.py
Normal file
67
scripts/docker_config_migrate.py
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run Docker boot-time config migrations safely."""
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from hermes_cli.config import (
|
||||
check_config_version,
|
||||
get_config_path,
|
||||
get_env_path,
|
||||
migrate_config,
|
||||
)
|
||||
from utils import env_var_enabled
|
||||
|
||||
|
||||
def _backup_path(path: Path, stamp: str) -> Path:
|
||||
base = path.with_name(f"{path.name}.bak-{stamp}")
|
||||
if not base.exists():
|
||||
return base
|
||||
for index in range(1, 1000):
|
||||
candidate = path.with_name(f"{path.name}.bak-{stamp}.{index}")
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
raise RuntimeError(f"could not choose a backup path for {path}")
|
||||
|
||||
|
||||
def _backup_existing(paths: Iterable[Path]) -> list[Path]:
|
||||
stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||
backups: list[Path] = []
|
||||
for path in paths:
|
||||
if not path.is_file():
|
||||
continue
|
||||
dest = _backup_path(path, stamp)
|
||||
shutil.copy2(path, dest)
|
||||
backups.append(dest)
|
||||
return backups
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if env_var_enabled("HERMES_SKIP_CONFIG_MIGRATION"):
|
||||
print("[config-migrate] HERMES_SKIP_CONFIG_MIGRATION is set; skipping config migration")
|
||||
return 0
|
||||
|
||||
current_ver, latest_ver = check_config_version()
|
||||
if current_ver >= latest_ver:
|
||||
return 0
|
||||
|
||||
backups = _backup_existing((get_config_path(), get_env_path()))
|
||||
backup_text = ", ".join(str(path) for path in backups) if backups else "none"
|
||||
print(
|
||||
f"[config-migrate] Migrating config schema {current_ver} -> {latest_ver}; "
|
||||
f"backups: {backup_text}"
|
||||
)
|
||||
migrate_config(interactive=False, quiet=False)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except Exception as exc:
|
||||
print(f"[config-migrate] ERROR: {exc}", file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
@ -9,6 +9,7 @@ import yaml
|
||||
|
||||
from hermes_cli.config import (
|
||||
DEFAULT_CONFIG,
|
||||
check_config_version,
|
||||
get_hermes_home,
|
||||
ensure_hermes_home,
|
||||
get_compatible_custom_providers,
|
||||
@ -542,6 +543,28 @@ class TestConfigMigrationSecretPrompts:
|
||||
assert results["env_added"] == ["TEST_API_KEY"]
|
||||
|
||||
|
||||
class TestConfigVersionDetection:
|
||||
def test_check_config_version_uses_raw_on_disk_version(self, tmp_path):
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text("model: {}\n", encoding="utf-8")
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
assert load_config()["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert check_config_version() == (0, DEFAULT_CONFIG["_config_version"])
|
||||
|
||||
def test_check_config_version_treats_missing_file_as_current(self, tmp_path):
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
latest = DEFAULT_CONFIG["_config_version"]
|
||||
assert check_config_version() == (latest, latest)
|
||||
|
||||
def test_check_config_version_does_not_migrate_invalid_yaml(self, tmp_path):
|
||||
(tmp_path / "config.yaml").write_text("model: [unterminated\n", encoding="utf-8")
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
latest = DEFAULT_CONFIG["_config_version"]
|
||||
assert check_config_version() == (latest, latest)
|
||||
|
||||
|
||||
class TestAnthropicTokenMigration:
|
||||
"""Test that config version 8→9 clears ANTHROPIC_TOKEN."""
|
||||
|
||||
@ -904,4 +927,3 @@ class TestEnvWriteDenylist:
|
||||
# But the write path still refuses to update it
|
||||
with pytest.raises(ValueError, match="denylist"):
|
||||
save_env_value("LD_PRELOAD", "/tmp/evil.so")
|
||||
|
||||
|
||||
119
tests/tools/test_docker_config_migrate.py
Normal file
119
tests/tools/test_docker_config_migrate.py
Normal file
@ -0,0 +1,119 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SCRIPT = REPO_ROOT / "scripts" / "docker_config_migrate.py"
|
||||
|
||||
|
||||
def _run_migration(hermes_home: Path, **env_overrides: str) -> subprocess.CompletedProcess[str]:
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
"HERMES_HOME": str(hermes_home),
|
||||
"HERMES_SKIP_CHMOD": "1",
|
||||
"PYTHONPATH": str(REPO_ROOT),
|
||||
}
|
||||
)
|
||||
env.update(env_overrides)
|
||||
return subprocess.run(
|
||||
[sys.executable, str(SCRIPT)],
|
||||
cwd=str(REPO_ROOT),
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
|
||||
def test_docker_config_migrate_backs_up_and_migrates_legacy_config(tmp_path: Path) -> None:
|
||||
config_path = tmp_path / "config.yaml"
|
||||
env_path = tmp_path / ".env"
|
||||
config_path.write_text(
|
||||
yaml.safe_dump(
|
||||
{
|
||||
"_config_version": 11,
|
||||
"custom_providers": [
|
||||
{
|
||||
"name": "Local API",
|
||||
"base_url": "http://localhost:8080/v1",
|
||||
"api_key": "test-key",
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
env_path.write_text("OPENROUTER_API_KEY=test\n", encoding="utf-8")
|
||||
|
||||
proc = _run_migration(tmp_path)
|
||||
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "Migrating config schema 11 ->" in proc.stdout
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert "custom_providers" not in raw
|
||||
assert raw["providers"]["local-api"]["api"] == "http://localhost:8080/v1"
|
||||
assert list(tmp_path.glob("config.yaml.bak-*"))
|
||||
assert list(tmp_path.glob(".env.bak-*"))
|
||||
|
||||
|
||||
def test_docker_config_migrate_backs_up_and_migrates_unversioned_config(tmp_path: Path) -> None:
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
yaml.safe_dump(
|
||||
{
|
||||
"custom_providers": [
|
||||
{
|
||||
"name": "Local API",
|
||||
"base_url": "http://localhost:8080/v1",
|
||||
"api_key": "test-key",
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
proc = _run_migration(tmp_path)
|
||||
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "Migrating config schema 0 ->" in proc.stdout
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert "custom_providers" not in raw
|
||||
assert raw["providers"]["local-api"]["api"] == "http://localhost:8080/v1"
|
||||
assert list(tmp_path.glob("config.yaml.bak-*"))
|
||||
|
||||
|
||||
def test_docker_config_migrate_does_not_rewrite_invalid_yaml(tmp_path: Path) -> None:
|
||||
config_path = tmp_path / "config.yaml"
|
||||
original = "model: [unterminated\n"
|
||||
config_path.write_text(original, encoding="utf-8")
|
||||
|
||||
proc = _run_migration(tmp_path)
|
||||
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "Migrating config schema" not in proc.stdout
|
||||
assert "hermes config:" in proc.stderr
|
||||
assert config_path.read_text(encoding="utf-8") == original
|
||||
assert not list(tmp_path.glob("*.bak-*"))
|
||||
|
||||
|
||||
def test_docker_config_migrate_skip_env_leaves_config_unchanged(tmp_path: Path) -> None:
|
||||
config_path = tmp_path / "config.yaml"
|
||||
original = yaml.safe_dump({"_config_version": 11})
|
||||
config_path.write_text(original, encoding="utf-8")
|
||||
|
||||
proc = _run_migration(tmp_path, HERMES_SKIP_CONFIG_MIGRATION="1")
|
||||
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "skipping config migration" in proc.stdout
|
||||
assert config_path.read_text(encoding="utf-8") == original
|
||||
assert not list(tmp_path.glob("*.bak-*"))
|
||||
@ -99,3 +99,12 @@ def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text:
|
||||
assert mkdir_line in stage2_text
|
||||
assert write_line in stage2_text
|
||||
assert stage2_text.index(mkdir_line) < stage2_text.index(write_line)
|
||||
|
||||
|
||||
def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None:
|
||||
assert "scripts/docker_config_migrate.py" in stage2_text
|
||||
assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text
|
||||
|
||||
|
||||
def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None:
|
||||
assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text
|
||||
|
||||
@ -418,7 +418,7 @@ The official image is based on `debian:13.4` and includes:
|
||||
- **[`s6-overlay`](https://github.com/just-containers/s6-overlay) v3** as PID 1 (replaces the older `tini`) — supervises the dashboard and per-profile gateways with auto-restart on crash, reaps zombie subprocesses, and forwards signals.
|
||||
|
||||
The container's `ENTRYPOINT` is s6-overlay's `/init`. On boot it:
|
||||
1. Runs `/etc/cont-init.d/01-hermes-setup` (= `docker/stage2-hook.sh`) as root: optional UID/GID remap, fixes volume ownership, seeds `.env` / `config.yaml` / `SOUL.md` on first boot, syncs bundled skills.
|
||||
1. Runs `/etc/cont-init.d/01-hermes-setup` (= `docker/stage2-hook.sh`) as root: optional UID/GID remap, fixes volume ownership, seeds `.env` / `config.yaml` / `SOUL.md` on first boot, runs non-interactive config-schema migrations unless `HERMES_SKIP_CONFIG_MIGRATION=1`, syncs bundled skills.
|
||||
2. Runs `/etc/cont-init.d/02-reconcile-profiles` (= `hermes_cli.container_boot`): walks `$HERMES_HOME/profiles/<name>/`, recreates the per-profile gateway s6 service slot under `/run/service/gateway-<profile>/`, and auto-starts only those whose last recorded state was `running` (see [Per-profile gateway supervision](#per-profile-gateway-supervision)).
|
||||
3. Starts the static `main-hermes` and `dashboard` s6-rc services.
|
||||
4. Exec's the container's CMD as the main program (`/opt/hermes/docker/main-wrapper.sh`), which routes the arguments the user passed to `docker run`:
|
||||
@ -462,7 +462,11 @@ Each profile created with `hermes profile create <name>` automatically gets an s
|
||||
|
||||
## Upgrading
|
||||
|
||||
Pull the latest image and recreate the container. Your data directory is untouched.
|
||||
Pull the latest image and recreate the container. Your data directory is
|
||||
preserved, and the container runs non-interactive config-schema migrations
|
||||
against the mounted `$HERMES_HOME/config.yaml` before starting the gateway.
|
||||
When a migration is needed, Hermes writes timestamped backups next to
|
||||
`config.yaml` and `.env` first.
|
||||
|
||||
```sh
|
||||
docker pull nousresearch/hermes-agent:latest
|
||||
@ -481,6 +485,9 @@ docker compose pull
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Set `HERMES_SKIP_CONFIG_MIGRATION=1` only if you need to inspect or migrate the
|
||||
persisted config manually before letting the new image rewrite it.
|
||||
|
||||
## Skills and credential files
|
||||
|
||||
When using Docker as the execution environment (not the methods above, but when the agent runs commands inside a Docker sandbox — see [Configuration → Docker Backend](./configuration.md#docker-backend)), Hermes reuses a single long-lived container for all tool calls and automatically bind-mounts the skills directory (`~/.hermes/skills/`) and any credential files declared by skills into that container as read-only volumes. Skill scripts, templates, and references are available inside the sandbox without manual configuration, and because the container persists for the life of the Hermes process, any dependencies you install or files you write stay around for the next tool call.
|
||||
|
||||
Reference in New Issue
Block a user