diff --git a/docker/stage2-hook.sh b/docker/stage2-hook.sh index 9abe4da6a..20035c30d 100755 --- a/docker/stage2-hook.sh +++ b/docker/stage2-hook.sh @@ -338,6 +338,17 @@ if [ -f "$HERMES_HOME/.env" ]; then chmod 600 "$HERMES_HOME/.env" 2>/dev/null || true fi +# --- Migrate persisted config schema --- +# Docker image upgrades replace the code under $INSTALL_DIR but preserve +# $HERMES_HOME on the mounted volume. Run the same safe, non-interactive +# config-schema migrations that `hermes update` runs for non-Docker installs, +# after first-boot seeding and before supervised gateway services start. +# Set HERMES_SKIP_CONFIG_MIGRATION=1 for controlled/manual migrations. +if [ -f "$HERMES_HOME/config.yaml" ]; then + s6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python" "$INSTALL_DIR/scripts/docker_config_migrate.py" \ + || echo "[stage2] Warning: docker_config_migrate.py failed; continuing" +fi + # auth.json: bootstrap from env on first boot only. Same semantics as the # pre-s6 entrypoint — the [ ! -f ] guard is critical to avoid clobbering # rotated refresh tokens on container restart. diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 447d5dc85..61996a8fd 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -3792,15 +3792,46 @@ def get_custom_provider_context_length( return None +def _coerce_config_version(value: Any) -> int: + """Return a safe integer config version, treating invalid values as legacy.""" + if isinstance(value, bool): + return 0 + try: + version = int(value) + except (TypeError, ValueError): + return 0 + return max(version, 0) + + def check_config_version() -> Tuple[int, int]: """ - Check config version. - + Check the raw on-disk config schema version. + + ``load_config()`` deliberately starts from ``DEFAULT_CONFIG`` and deep-merges + the user's file, which is correct for runtime reads but wrong for deciding + whether the user's persisted schema has been migrated. A config file with no + raw ``_config_version`` must remain visible as legacy instead of inheriting + the latest default version in memory. + Returns (current_version, latest_version). """ - config = load_config() - current = config.get("_config_version", 0) - latest = DEFAULT_CONFIG.get("_config_version", 1) + latest = _coerce_config_version(DEFAULT_CONFIG.get("_config_version", 1)) or 1 + config_path = get_config_path() + if not config_path.exists(): + return latest, latest + + try: + with open(config_path, encoding="utf-8") as f: + config = yaml.safe_load(f) or {} + except Exception as e: + # Invalid YAML needs a parse warning, not an automatic schema rewrite + # that could replace the user's broken file with defaults. + _warn_config_parse_failure(config_path, e) + return latest, latest + + if not isinstance(config, dict): + config = {} + current = _coerce_config_version(config.get("_config_version")) return current, latest diff --git a/scripts/docker_config_migrate.py b/scripts/docker_config_migrate.py new file mode 100644 index 000000000..a0c83ed12 --- /dev/null +++ b/scripts/docker_config_migrate.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""Run Docker boot-time config migrations safely.""" +from __future__ import annotations + +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Iterable + +from hermes_cli.config import ( + check_config_version, + get_config_path, + get_env_path, + migrate_config, +) +from utils import env_var_enabled + + +def _backup_path(path: Path, stamp: str) -> Path: + base = path.with_name(f"{path.name}.bak-{stamp}") + if not base.exists(): + return base + for index in range(1, 1000): + candidate = path.with_name(f"{path.name}.bak-{stamp}.{index}") + if not candidate.exists(): + return candidate + raise RuntimeError(f"could not choose a backup path for {path}") + + +def _backup_existing(paths: Iterable[Path]) -> list[Path]: + stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + backups: list[Path] = [] + for path in paths: + if not path.is_file(): + continue + dest = _backup_path(path, stamp) + shutil.copy2(path, dest) + backups.append(dest) + return backups + + +def main() -> int: + if env_var_enabled("HERMES_SKIP_CONFIG_MIGRATION"): + print("[config-migrate] HERMES_SKIP_CONFIG_MIGRATION is set; skipping config migration") + return 0 + + current_ver, latest_ver = check_config_version() + if current_ver >= latest_ver: + return 0 + + backups = _backup_existing((get_config_path(), get_env_path())) + backup_text = ", ".join(str(path) for path in backups) if backups else "none" + print( + f"[config-migrate] Migrating config schema {current_ver} -> {latest_ver}; " + f"backups: {backup_text}" + ) + migrate_config(interactive=False, quiet=False) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: + print(f"[config-migrate] ERROR: {exc}", file=sys.stderr) + raise SystemExit(1) diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index ec0a6aea7..4659934db 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -9,6 +9,7 @@ import yaml from hermes_cli.config import ( DEFAULT_CONFIG, + check_config_version, get_hermes_home, ensure_hermes_home, get_compatible_custom_providers, @@ -542,6 +543,28 @@ class TestConfigMigrationSecretPrompts: assert results["env_added"] == ["TEST_API_KEY"] +class TestConfigVersionDetection: + def test_check_config_version_uses_raw_on_disk_version(self, tmp_path): + config_path = tmp_path / "config.yaml" + config_path.write_text("model: {}\n", encoding="utf-8") + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + assert load_config()["_config_version"] == DEFAULT_CONFIG["_config_version"] + assert check_config_version() == (0, DEFAULT_CONFIG["_config_version"]) + + def test_check_config_version_treats_missing_file_as_current(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + latest = DEFAULT_CONFIG["_config_version"] + assert check_config_version() == (latest, latest) + + def test_check_config_version_does_not_migrate_invalid_yaml(self, tmp_path): + (tmp_path / "config.yaml").write_text("model: [unterminated\n", encoding="utf-8") + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + latest = DEFAULT_CONFIG["_config_version"] + assert check_config_version() == (latest, latest) + + class TestAnthropicTokenMigration: """Test that config version 8→9 clears ANTHROPIC_TOKEN.""" @@ -904,4 +927,3 @@ class TestEnvWriteDenylist: # But the write path still refuses to update it with pytest.raises(ValueError, match="denylist"): save_env_value("LD_PRELOAD", "/tmp/evil.so") - diff --git a/tests/tools/test_docker_config_migrate.py b/tests/tools/test_docker_config_migrate.py new file mode 100644 index 000000000..61f1dcc1a --- /dev/null +++ b/tests/tools/test_docker_config_migrate.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +import yaml + +from hermes_cli.config import DEFAULT_CONFIG + +REPO_ROOT = Path(__file__).resolve().parents[2] +SCRIPT = REPO_ROOT / "scripts" / "docker_config_migrate.py" + + +def _run_migration(hermes_home: Path, **env_overrides: str) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + env.update( + { + "HERMES_HOME": str(hermes_home), + "HERMES_SKIP_CHMOD": "1", + "PYTHONPATH": str(REPO_ROOT), + } + ) + env.update(env_overrides) + return subprocess.run( + [sys.executable, str(SCRIPT)], + cwd=str(REPO_ROOT), + env=env, + capture_output=True, + text=True, + ) + + +def test_docker_config_migrate_backs_up_and_migrates_legacy_config(tmp_path: Path) -> None: + config_path = tmp_path / "config.yaml" + env_path = tmp_path / ".env" + config_path.write_text( + yaml.safe_dump( + { + "_config_version": 11, + "custom_providers": [ + { + "name": "Local API", + "base_url": "http://localhost:8080/v1", + "api_key": "test-key", + } + ], + } + ), + encoding="utf-8", + ) + env_path.write_text("OPENROUTER_API_KEY=test\n", encoding="utf-8") + + proc = _run_migration(tmp_path) + + assert proc.returncode == 0, proc.stderr + assert "Migrating config schema 11 ->" in proc.stdout + raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] + assert "custom_providers" not in raw + assert raw["providers"]["local-api"]["api"] == "http://localhost:8080/v1" + assert list(tmp_path.glob("config.yaml.bak-*")) + assert list(tmp_path.glob(".env.bak-*")) + + +def test_docker_config_migrate_backs_up_and_migrates_unversioned_config(tmp_path: Path) -> None: + config_path = tmp_path / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "custom_providers": [ + { + "name": "Local API", + "base_url": "http://localhost:8080/v1", + "api_key": "test-key", + } + ], + } + ), + encoding="utf-8", + ) + + proc = _run_migration(tmp_path) + + assert proc.returncode == 0, proc.stderr + assert "Migrating config schema 0 ->" in proc.stdout + raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] + assert "custom_providers" not in raw + assert raw["providers"]["local-api"]["api"] == "http://localhost:8080/v1" + assert list(tmp_path.glob("config.yaml.bak-*")) + + +def test_docker_config_migrate_does_not_rewrite_invalid_yaml(tmp_path: Path) -> None: + config_path = tmp_path / "config.yaml" + original = "model: [unterminated\n" + config_path.write_text(original, encoding="utf-8") + + proc = _run_migration(tmp_path) + + assert proc.returncode == 0, proc.stderr + assert "Migrating config schema" not in proc.stdout + assert "hermes config:" in proc.stderr + assert config_path.read_text(encoding="utf-8") == original + assert not list(tmp_path.glob("*.bak-*")) + + +def test_docker_config_migrate_skip_env_leaves_config_unchanged(tmp_path: Path) -> None: + config_path = tmp_path / "config.yaml" + original = yaml.safe_dump({"_config_version": 11}) + config_path.write_text(original, encoding="utf-8") + + proc = _run_migration(tmp_path, HERMES_SKIP_CONFIG_MIGRATION="1") + + assert proc.returncode == 0, proc.stderr + assert "skipping config migration" in proc.stdout + assert config_path.read_text(encoding="utf-8") == original + assert not list(tmp_path.glob("*.bak-*")) diff --git a/tests/tools/test_stage2_hook_puid_pgid.py b/tests/tools/test_stage2_hook_puid_pgid.py index 9bf84ac6f..85f3fb131 100644 --- a/tests/tools/test_stage2_hook_puid_pgid.py +++ b/tests/tools/test_stage2_hook_puid_pgid.py @@ -99,3 +99,12 @@ def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text: assert mkdir_line in stage2_text assert write_line in stage2_text assert stage2_text.index(mkdir_line) < stage2_text.index(write_line) + + +def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None: + assert "scripts/docker_config_migrate.py" in stage2_text + assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text + + +def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None: + assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 9168d39ad..85b6984d8 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -418,7 +418,7 @@ The official image is based on `debian:13.4` and includes: - **[`s6-overlay`](https://github.com/just-containers/s6-overlay) v3** as PID 1 (replaces the older `tini`) — supervises the dashboard and per-profile gateways with auto-restart on crash, reaps zombie subprocesses, and forwards signals. The container's `ENTRYPOINT` is s6-overlay's `/init`. On boot it: -1. Runs `/etc/cont-init.d/01-hermes-setup` (= `docker/stage2-hook.sh`) as root: optional UID/GID remap, fixes volume ownership, seeds `.env` / `config.yaml` / `SOUL.md` on first boot, syncs bundled skills. +1. Runs `/etc/cont-init.d/01-hermes-setup` (= `docker/stage2-hook.sh`) as root: optional UID/GID remap, fixes volume ownership, seeds `.env` / `config.yaml` / `SOUL.md` on first boot, runs non-interactive config-schema migrations unless `HERMES_SKIP_CONFIG_MIGRATION=1`, syncs bundled skills. 2. Runs `/etc/cont-init.d/02-reconcile-profiles` (= `hermes_cli.container_boot`): walks `$HERMES_HOME/profiles//`, recreates the per-profile gateway s6 service slot under `/run/service/gateway-/`, and auto-starts only those whose last recorded state was `running` (see [Per-profile gateway supervision](#per-profile-gateway-supervision)). 3. Starts the static `main-hermes` and `dashboard` s6-rc services. 4. Exec's the container's CMD as the main program (`/opt/hermes/docker/main-wrapper.sh`), which routes the arguments the user passed to `docker run`: @@ -462,7 +462,11 @@ Each profile created with `hermes profile create ` automatically gets an s ## Upgrading -Pull the latest image and recreate the container. Your data directory is untouched. +Pull the latest image and recreate the container. Your data directory is +preserved, and the container runs non-interactive config-schema migrations +against the mounted `$HERMES_HOME/config.yaml` before starting the gateway. +When a migration is needed, Hermes writes timestamped backups next to +`config.yaml` and `.env` first. ```sh docker pull nousresearch/hermes-agent:latest @@ -481,6 +485,9 @@ docker compose pull docker compose up -d ``` +Set `HERMES_SKIP_CONFIG_MIGRATION=1` only if you need to inspect or migrate the +persisted config manually before letting the new image rewrite it. + ## Skills and credential files When using Docker as the execution environment (not the methods above, but when the agent runs commands inside a Docker sandbox — see [Configuration → Docker Backend](./configuration.md#docker-backend)), Hermes reuses a single long-lived container for all tool calls and automatically bind-mounts the skills directory (`~/.hermes/skills/`) and any credential files declared by skills into that container as read-only volumes. Skill scripts, templates, and references are available inside the sandbox without manual configuration, and because the container persists for the life of the Hermes process, any dependencies you install or files you write stay around for the next tool call.