diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b48b0bab0..275564107 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -171,6 +171,11 @@ jobs: source .venv/bin/activate uv pip install -e ".[all,dev]" + - name: Packaged-wheel i18n smoke test + run: | + source .venv/bin/activate + python -m pytest -m integration tests/test_wheel_locales_e2e.py -v + - name: Run e2e tests run: | source .venv/bin/activate diff --git a/MANIFEST.in b/MANIFEST.in index a0296c377..a6749adc2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ graft skills graft optional-skills +graft locales # Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the # PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs # built from the sdist (e.g. Homebrew, downstream packagers). package-data diff --git a/agent/i18n.py b/agent/i18n.py index 034fb747b..ef9fd4b06 100644 --- a/agent/i18n.py +++ b/agent/i18n.py @@ -32,6 +32,7 @@ from __future__ import annotations import logging import os +import sysconfig import threading from functools import lru_cache from pathlib import Path @@ -87,11 +88,54 @@ _catalog_lock = threading.Lock() def _locales_dir() -> Path: """Return the directory containing locale YAML files. - Lives next to the repo root so both the bundled install and editable - checkouts find it without PYTHONPATH gymnastics. + Resolution order, first existing wins: + + 1. ``HERMES_BUNDLED_LOCALES`` env var -- set by the Nix wrapper (or any + sealed-packaging system) to point at the installed catalog directory. + 2. ``/locales`` -- source checkouts and ``pip install -e .``, + where the working tree sits next to ``agent/``. + 3. ``/locales`` -- pip wheel installs. + setuptools ``data-files`` extracts ``locales/*.yaml`` under the + interpreter's ``data`` scheme; the other schemes are checked as a + safety net for nonstandard layouts. + + Falling through to the source-style path (even when missing) keeps + ``_load_catalog`` error messages informative -- it logs the path it + looked at -- rather than raising. """ - # agent/i18n.py -> agent/ -> repo root - return Path(__file__).resolve().parent.parent / "locales" + override = os.getenv("HERMES_BUNDLED_LOCALES", "").strip() + if override: + candidate = Path(override) + if candidate.is_dir(): + return candidate + logger.warning( + "HERMES_BUNDLED_LOCALES points to a non-directory path (%s); " + "falling back to bundled/source locale resolution", + override, + ) + + # agent/i18n.py -> agent/ -> repo root (source checkout, editable install) + source_dir = Path(__file__).resolve().parent.parent / "locales" + if source_dir.is_dir(): + return source_dir + + # pip wheel install: data-files lands under the interpreter data scheme. + # ``data`` (== sys.prefix in a venv) is where setuptools data-files extract + # and is checked first. ``purelib``/``platlib`` (site-packages) are a safety + # net for nonstandard layouts. NOTE: this does NOT cover ``pip install + # --user`` (user scheme, ~/.local/locales) or ``pip install --target`` -- + # both are out of scope; see the plan header. + for scheme in ("data", "purelib", "platlib"): + raw = sysconfig.get_path(scheme) + if not raw: + continue + candidate = Path(raw) / "locales" + if candidate.is_dir(): + return candidate + + # Last resort: return the source-style path so _load_catalog's catalog-missing + # log (logger.debug "i18n catalog missing for %s at %s") stays informative. + return source_dir def _normalize_lang(value: Any) -> str: diff --git a/nix/checks.nix b/nix/checks.nix index 63ec1eb67..7ae8c6ef2 100644 --- a/nix/checks.nix +++ b/nix/checks.nix @@ -160,6 +160,53 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2) echo "ok" > $out/result ''; + # Verify bundled i18n locale catalogs are present and resolvable. + # Regression for #23943 / #27632 / #35374 — sealed Nix venvs dropped + # locales/, surfacing raw i18n keys like gateway.reset.header_default. + bundled-locales = pkgs.runCommand "hermes-bundled-locales" { } '' + set -e + echo "=== Checking bundled locales ===" + test -d ${hermes-agent}/share/hermes-agent/locales || (echo "FAIL: locales directory missing"; exit 1) + echo "PASS: locales directory exists" + + LOC_COUNT=$(find ${hermes-agent}/share/hermes-agent/locales -name "*.yaml" | wc -l) + test "$LOC_COUNT" -ge 16 || (echo "FAIL: expected >=16 catalogs, found $LOC_COUNT"; exit 1) + echo "PASS: $LOC_COUNT locale catalogs found" + + test -f ${hermes-agent}/share/hermes-agent/locales/en.yaml || (echo "FAIL: en.yaml missing"; exit 1) + echo "PASS: en.yaml present" + + grep -q "HERMES_BUNDLED_LOCALES" ${hermes-agent}/bin/hermes || \ + (echo "FAIL: HERMES_BUNDLED_LOCALES not in wrapper"; exit 1) + echo "PASS: HERMES_BUNDLED_LOCALES set in wrapper" + + echo "=== Rendering via the wrapper override (HERMES_BUNDLED_LOCALES) ===" + export HOME=$(mktemp -d) + RENDERED=$(cd "$HOME" && HERMES_BUNDLED_LOCALES=${hermes-agent}/share/hermes-agent/locales \ + ${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n.t('gateway.reset.header_default', lang='en'))") + echo "rendered: $RENDERED" + test "$RENDERED" != "gateway.reset.header_default" || (echo "FAIL: i18n returned the raw key with HERMES_BUNDLED_LOCALES set"; exit 1) + echo "PASS: i18n renders a human string via the wrapper override" + + # Defense-in-depth check: the sealed venv must ALSO resolve catalogs + # with NO env var, via the wheel's setuptools data-files materialized + # into the venv data scheme. If a future uv2nix bump drops data-files, + # the wrapper override above would mask the regression at runtime while + # `pip install`/other sealed paths silently break — this catches it. + echo "=== Rendering WITHOUT the env var (data-files materialization) ===" + BARE_DIR=$(cd "$HOME" && ${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n._locales_dir())") + BARE=$(cd "$HOME" && ${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n.t('gateway.reset.header_default', lang='en'))") + echo "resolved dir (no env var): $BARE_DIR" + echo "rendered: $BARE" + test "$BARE" != "gateway.reset.header_default" || \ + (echo "FAIL: sealed venv could not resolve locales without HERMES_BUNDLED_LOCALES — data-files materialization regressed"; exit 1) + echo "PASS: sealed venv resolves locales via data-files without the env var" + + echo "=== All bundled locales checks passed ===" + mkdir -p $out + echo "ok" > $out/result + ''; + # Verify bundled TUI is present and compiled bundled-tui = pkgs.runCommand "hermes-bundled-tui" { } '' set -e diff --git a/nix/hermes-agent.nix b/nix/hermes-agent.nix index 073e652f8..1dd3031fb 100644 --- a/nix/hermes-agent.nix +++ b/nix/hermes-agent.nix @@ -67,6 +67,21 @@ let filter = path: _type: !(lib.hasInfix "/__pycache__/" path); }; + # i18n locale catalogs (locales/*.yaml). Shipped into the store and pointed + # at by HERMES_BUNDLED_LOCALES so the wrapped binary always resolves human + # strings instead of raw i18n keys (#23943 / #27632 / #35374). + # + # Defense-in-depth, not load-bearing: the wheel already declares locales/ as + # setuptools data-files, so uv2nix materializes them into the venv's data + # scheme and agent/i18n.py resolves them with no env var. The wrapper override + # pins the store path so a future uv2nix change that drops data-files can't + # silently ship raw keys via `nix build` (checks don't run on a plain build). + # The bundled-locales flake check verifies BOTH paths independently. + # + # Plain cleanSource (no __pycache__ filter): locales/ is bare *.yaml, never + # compiled, so it never carries a __pycache__ dir to exclude. + bundledLocales = lib.cleanSource ../locales; + runtimeDeps = [ nodejs ripgrep @@ -151,6 +166,7 @@ stdenv.mkDerivation (finalAttrs: { mkdir -p $out/share/hermes-agent $out/bin cp -r ${bundledSkills} $out/share/hermes-agent/skills cp -r ${bundledPlugins} $out/share/hermes-agent/plugins + cp -r ${bundledLocales} $out/share/hermes-agent/locales cp -r ${hermesWeb} $out/share/hermes-agent/web_dist mkdir -p $out/ui-tui @@ -162,6 +178,7 @@ stdenv.mkDerivation (finalAttrs: { --suffix PATH : "${runtimePath}" \ --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \ --set HERMES_BUNDLED_PLUGINS $out/share/hermes-agent/plugins \ + --set HERMES_BUNDLED_LOCALES $out/share/hermes-agent/locales \ --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \ --set HERMES_TUI_DIR $out/ui-tui \ --set HERMES_PYTHON ${hermesVenv}/bin/python3 \ diff --git a/pyproject.toml b/pyproject.toml index f2bb0813b..380567399 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -230,6 +230,15 @@ hermes-acp = "acp_adapter.entry:main" [tool.setuptools] py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils", "mcp_serve"] +[tool.setuptools.data-files] +# i18n catalogs. locales/ is a bare data directory (no __init__.py), so it is +# neither a package (packages.find) nor package-data (which attaches to a +# package). data-files ships it in the wheel; MANIFEST.in `graft locales` +# ships it in the sdist. Without this, sealed installs (pip wheel, Nix store +# venv) drop the catalogs and gateway/CLI commands surface raw i18n keys like +# `gateway.reset.header_default` (#27632, #35374, #23943). +locales = ["locales/*.yaml"] + [tool.setuptools.package-data] hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh", "scripts/install.ps1"] gateway = ["assets/**/*"] diff --git a/tests/agent/test_i18n.py b/tests/agent/test_i18n.py index 6c374ebf4..56a756862 100644 --- a/tests/agent/test_i18n.py +++ b/tests/agent/test_i18n.py @@ -167,3 +167,63 @@ def test_t_missing_key_in_non_english_falls_back_to_english(tmp_path, monkeypatc def test_t_unknown_language_uses_english(): """Unknown lang codes normalize to English, not to a key-path fallback.""" assert i18n.t("approval.denied", lang="klingon") == i18n.t("approval.denied", lang="en") + + +# --------------------------------------------------------------------------- +# _locales_dir resolution ladder -- regression for #23943 / #27632 / #35374. +# Sealed installs (Nix store venv, pip wheel) have no source tree next to +# agent/, so _locales_dir must resolve via env override or the data scheme. +# --------------------------------------------------------------------------- + +def test_locales_dir_env_override_used_when_dir_exists(tmp_path, monkeypatch): + """HERMES_BUNDLED_LOCALES wins when it points at a real directory.""" + bundled = tmp_path / "bundled-locales" + bundled.mkdir() + monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(bundled)) + assert i18n._locales_dir() == bundled + + +def test_locales_dir_env_override_ignored_when_missing(tmp_path, monkeypatch): + """A bogus HERMES_BUNDLED_LOCALES falls through to source/wheel resolution + instead of returning a path that doesn't exist.""" + monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(tmp_path / "does-not-exist")) + result = i18n._locales_dir() + assert result != tmp_path / "does-not-exist" + # In a source checkout this is the repo-root locales dir. + assert result.name == "locales" + + +def test_locales_dir_falls_back_to_data_scheme(tmp_path, monkeypatch): + """When neither the env override nor a source-adjacent locales/ exists, + _locales_dir uses sysconfig's data scheme (the pip-wheel layout).""" + import sysconfig + + # No env override. + monkeypatch.delenv("HERMES_BUNDLED_LOCALES", raising=False) + + # Force the source-adjacent path to a location with no locales/ dir. + fake_pkg = tmp_path / "site-packages" / "agent" + fake_pkg.mkdir(parents=True) + monkeypatch.setattr(i18n, "__file__", str(fake_pkg / "i18n.py")) + + # Stand up a fake data scheme containing locales/. + data_root = tmp_path / "data-scheme" + (data_root / "locales").mkdir(parents=True) + real_get_path = sysconfig.get_path + + def fake_get_path(name, *args, **kwargs): + if name == "data": + return str(data_root) + return real_get_path(name, *args, **kwargs) + + monkeypatch.setattr(i18n.sysconfig, "get_path", fake_get_path) + + assert i18n._locales_dir() == data_root / "locales" + + +def test_t_resolves_real_string_in_source_checkout(): + """Sanity: in the test environment (a source checkout) t() must return a + human string, never the bare key path. Guards against catalog-load + regressions independent of packaging.""" + assert i18n.t("gateway.reset.header_default", lang="en") != "gateway.reset.header_default" + assert i18n.t("gateway.status.header", lang="en") != "gateway.status.header" diff --git a/tests/test_packaging_metadata.py b/tests/test_packaging_metadata.py index fadb022f3..b4c367b14 100644 --- a/tests/test_packaging_metadata.py +++ b/tests/test_packaging_metadata.py @@ -200,3 +200,29 @@ def test_locked_starlette_is_not_vulnerable_to_cve_2026_48710(): f"floor {'.'.join(map(str, _STARLETTE_CVE_FLOOR))} — regenerate the " f"lockfile after bumping the pin" ) + + +def test_locale_catalogs_ship_in_both_wheel_and_sdist(): + """Regression test for #27632 / #35374 / #23943. + + locales/ is a bare data directory (no __init__.py), so it is invisible to + packages.find and to package-data (which attaches to a package). It must be + declared as setuptools data-files (wheel) AND grafted in MANIFEST.in + (sdist). Without both, sealed installs drop the catalogs and gateway/CLI + commands surface raw i18n keys like `gateway.reset.header_default`. + """ + data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + data_files = data["tool"]["setuptools"].get("data-files", {}) + assert data_files.get("locales") == ["locales/*.yaml"], ( + "pyproject [tool.setuptools.data-files] must declare " + 'locales = ["locales/*.yaml"] so the wheel ships i18n catalogs' + ) + + manifest = (REPO_ROOT / "MANIFEST.in").read_text(encoding="utf-8") + assert "graft locales" in manifest, ( + "MANIFEST.in must `graft locales` so the sdist ships i18n catalogs" + ) + + # Every on-disk catalog has the .yaml extension the globs above match. + on_disk = list((REPO_ROOT / "locales").glob("*.yaml")) + assert on_disk, "expected locales/*.yaml catalogs on disk" diff --git a/tests/test_wheel_locales_e2e.py b/tests/test_wheel_locales_e2e.py new file mode 100644 index 000000000..74b4b980b --- /dev/null +++ b/tests/test_wheel_locales_e2e.py @@ -0,0 +1,137 @@ +"""End-to-end: a built wheel, installed without a source tree, must resolve +i18n catalogs and render human strings — not raw key paths. + +This is the test that would have caught #27632 / #35374 / #23943. Metadata +unit tests (test_packaging_metadata.py) prove the glob is declared; this proves +the runtime actually finds the catalogs after a real pip install. + +This lives in tests/ (NOT tests/e2e/) so it is collected by the dedicated CI +step in Task 9, not by the existing `python -m pytest tests/e2e/` runner. + +Assumption: `from agent import i18n` must import with only stdlib + pyyaml +available (the test installs the wheel --no-deps + pyyaml). agent/__init__.py's +jiter preload swallows ImportError, and i18n.py imports yaml lazily inside +_load_catalog, so this holds today. If i18n.py ever gains a top-level non-stdlib +import, add it to the pip install line below. + +Marked `integration` because it shells out to `uv build` + `venv` + `pip` and +takes ~15-30s. Run with: pytest -m integration tests/test_wheel_locales_e2e.py +""" + +from __future__ import annotations + +import glob +import os +import subprocess +import sys +import tarfile +import venv +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[1] + + +@pytest.mark.integration +@pytest.mark.timeout(300) # overrides the global --timeout=30; cold-CI wheel build + venv + pip can exceed it +def test_installed_wheel_renders_i18n_strings(tmp_path): + # 1. Build the wheel from the current tree. + wheel_dir = tmp_path / "wheel" + build = subprocess.run( + ["uv", "build", "--wheel", "--out-dir", str(wheel_dir), "."], + cwd=REPO_ROOT, + capture_output=True, + text=True, + timeout=600, + ) + assert build.returncode == 0, f"uv build failed:\n{build.stderr}" + wheels = glob.glob(str(wheel_dir / "*.whl")) + assert wheels, "no wheel produced" + wheel = wheels[0] + + # 2. Fresh venv, install the wheel WITHOUT deps (we only exercise i18n, + # which needs pyyaml). --force-reinstall guards against pip's + # same-version no-op. + venv_dir = tmp_path / "venv" + venv.create(venv_dir, with_pip=True) + vpy = venv_dir / "bin" / "python" + subprocess.run([str(vpy), "-m", "pip", "install", "-q", "pyyaml"], check=True, timeout=300) + subprocess.run( + [str(vpy), "-m", "pip", "install", "-q", "--no-deps", "--force-reinstall", wheel], + check=True, + timeout=300, + ) + + # 3. Run from a directory that is NOT the source tree, with a clean env + # (no PYTHONPATH leaking the repo, no HERMES_BUNDLED_LOCALES). + probe = ( + "from agent import i18n;" + "import sys;" + "r = i18n.t('gateway.reset.header_default', lang='en');" + "s = i18n.t('gateway.status.header', lang='en');" + "print(repr(r)); print(repr(s));" + "sys.exit(0 if (r != 'gateway.reset.header_default' " + "and s != 'gateway.status.header') else 1)" + ) + env = {k: v for k, v in os.environ.items() if k not in ("PYTHONPATH", "HERMES_BUNDLED_LOCALES")} + env["PATH"] = f"{venv_dir / 'bin'}:{env['PATH']}" + env["VIRTUAL_ENV"] = str(venv_dir) + run = subprocess.run( + [str(vpy), "-c", probe], + cwd=str(tmp_path), # NOT the repo root + capture_output=True, + text=True, + env=env, + timeout=120, + ) + assert run.returncode == 0, ( + "installed wheel returned raw i18n keys instead of human strings:\n" + f"stdout: {run.stdout}\nstderr: {run.stderr}" + ) + + +@pytest.mark.integration +@pytest.mark.timeout(300) # overrides the global --timeout=30; cold-CI sdist build can exceed it +def test_built_sdist_ships_locale_catalogs(tmp_path): + """The sdist must carry locales/ too. + + The wheel is covered above; the sdist is a separately shipped artifact + (PyPI, and the form distro/Homebrew packagers build from). MANIFEST.in + `graft locales` is what puts the catalogs in the tarball — a stale graft or + a setuptools change would pass the metadata unit test (which only inspects + the declaration) while the actual artifact regresses. This inspects the + real tarball so that path can't rot silently. Closes the sdist half of + #27632 / #35374 / #23943. + """ + sdist_dir = tmp_path / "sdist" + build = subprocess.run( + ["uv", "build", "--sdist", "--out-dir", str(sdist_dir), "."], + cwd=REPO_ROOT, + capture_output=True, + text=True, + timeout=600, + ) + assert build.returncode == 0, f"uv build --sdist failed:\n{build.stderr}" + tarballs = glob.glob(str(sdist_dir / "*.tar.gz")) + assert tarballs, "no sdist produced" + + with tarfile.open(tarballs[0]) as tf: + # Members are prefixed with the sdist root dir, e.g. + # hermes_agent-0.15.1/locales/en.yaml — match on the suffix. + catalogs = [m for m in tf.getnames() if "/locales/" in m and m.endswith(".yaml")] + + # Compare against the canonical language list rather than a hardcoded floor + # so adding/removing a catalog updates the guard automatically and a dropped + # catalog (not just a fully-empty graft) trips it. + from agent.i18n import SUPPORTED_LANGUAGES + + expected = len(SUPPORTED_LANGUAGES) + assert len(catalogs) == expected, ( + f"sdist shipped {len(catalogs)} locale catalogs, expected {expected} " + f"({len(SUPPORTED_LANGUAGES)} supported languages) — check `graft " + "locales` in MANIFEST.in" + ) + assert any(m.endswith("/locales/en.yaml") for m in catalogs), ( + f"sdist missing locales/en.yaml; shipped: {catalogs[:5]}" + )