fix(packaging): ship locales/ i18n catalogs in wheel, sdist, and Nix (#38383)

* fix(packaging): ship locales/ i18n catalogs in wheel, sdist, and Nix

locales/ is a bare data dir (no __init__.py), invisible to packages.find
and package-data. Sealed installs (pip wheel, Nix store venv) dropped it,
so gateway/CLI commands rendered raw i18n keys like
gateway.reset.header_default.

- pyproject: [tool.setuptools.data-files] locales = ["locales/*.yaml"] (wheel)
- MANIFEST.in: graft locales (sdist)
- agent/i18n._locales_dir: env override -> source -> sysconfig data scheme
- nix/hermes-agent.nix: copy locales into the store + set HERMES_BUNDLED_LOCALES
  as defense-in-depth. The wheel's data-files already materialize into the
  uv2nix venv, so resolution works with no env var; the override pins the
  store path against a future uv2nix change that could drop data-files.
- tests: metadata regression, wheel + sdist build-install smoke tests, and a
  bundled-locales flake check that verifies BOTH the wrapper override and the
  env-var-less data-files path. Smoke test wired into CI.

Closes #23943, #27632, #35374.
Supersedes #23966, #27716, #30261, #33841, #35429, #35494, #35735, #36697.

* test: cap locale e2e timeout, tighten catalog count guard

The two wheel/sdist e2e tests inherit the global --timeout=30 from
addopts; a cold-CI run (isolated build env + venv create + network pip
install) can plausibly exceed it. Add @pytest.mark.timeout(300) so they
don't ride the unit-test budget and flake intermittently.

Also assert the shipped catalog count equals len(SUPPORTED_LANGUAGES)
instead of a hardcoded >=16 floor, so the guard self-updates and trips
on a single dropped catalog (not just a fully-empty graft).
This commit is contained in:
Siddharth Balyan
2026-06-04 00:30:27 +05:30
committed by GitHub
parent b91c382035
commit c349eca823
9 changed files with 350 additions and 4 deletions

View File

@ -171,6 +171,11 @@ jobs:
source .venv/bin/activate
uv pip install -e ".[all,dev]"
- name: Packaged-wheel i18n smoke test
run: |
source .venv/bin/activate
python -m pytest -m integration tests/test_wheel_locales_e2e.py -v
- name: Run e2e tests
run: |
source .venv/bin/activate

View File

@ -1,5 +1,6 @@
graft skills
graft optional-skills
graft locales
# Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the
# PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs
# built from the sdist (e.g. Homebrew, downstream packagers). package-data

View File

@ -32,6 +32,7 @@ from __future__ import annotations
import logging
import os
import sysconfig
import threading
from functools import lru_cache
from pathlib import Path
@ -87,11 +88,54 @@ _catalog_lock = threading.Lock()
def _locales_dir() -> Path:
"""Return the directory containing locale YAML files.
Lives next to the repo root so both the bundled install and editable
checkouts find it without PYTHONPATH gymnastics.
Resolution order, first existing wins:
1. ``HERMES_BUNDLED_LOCALES`` env var -- set by the Nix wrapper (or any
sealed-packaging system) to point at the installed catalog directory.
2. ``<repo-root>/locales`` -- source checkouts and ``pip install -e .``,
where the working tree sits next to ``agent/``.
3. ``<sysconfig data|purelib|platlib>/locales`` -- pip wheel installs.
setuptools ``data-files`` extracts ``locales/*.yaml`` under the
interpreter's ``data`` scheme; the other schemes are checked as a
safety net for nonstandard layouts.
Falling through to the source-style path (even when missing) keeps
``_load_catalog`` error messages informative -- it logs the path it
looked at -- rather than raising.
"""
# agent/i18n.py -> agent/ -> repo root
return Path(__file__).resolve().parent.parent / "locales"
override = os.getenv("HERMES_BUNDLED_LOCALES", "").strip()
if override:
candidate = Path(override)
if candidate.is_dir():
return candidate
logger.warning(
"HERMES_BUNDLED_LOCALES points to a non-directory path (%s); "
"falling back to bundled/source locale resolution",
override,
)
# agent/i18n.py -> agent/ -> repo root (source checkout, editable install)
source_dir = Path(__file__).resolve().parent.parent / "locales"
if source_dir.is_dir():
return source_dir
# pip wheel install: data-files lands under the interpreter data scheme.
# ``data`` (== sys.prefix in a venv) is where setuptools data-files extract
# and is checked first. ``purelib``/``platlib`` (site-packages) are a safety
# net for nonstandard layouts. NOTE: this does NOT cover ``pip install
# --user`` (user scheme, ~/.local/locales) or ``pip install --target`` --
# both are out of scope; see the plan header.
for scheme in ("data", "purelib", "platlib"):
raw = sysconfig.get_path(scheme)
if not raw:
continue
candidate = Path(raw) / "locales"
if candidate.is_dir():
return candidate
# Last resort: return the source-style path so _load_catalog's catalog-missing
# log (logger.debug "i18n catalog missing for %s at %s") stays informative.
return source_dir
def _normalize_lang(value: Any) -> str:

View File

@ -160,6 +160,53 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
echo "ok" > $out/result
'';
# Verify bundled i18n locale catalogs are present and resolvable.
# Regression for #23943 / #27632 / #35374 — sealed Nix venvs dropped
# locales/, surfacing raw i18n keys like gateway.reset.header_default.
bundled-locales = pkgs.runCommand "hermes-bundled-locales" { } ''
set -e
echo "=== Checking bundled locales ==="
test -d ${hermes-agent}/share/hermes-agent/locales || (echo "FAIL: locales directory missing"; exit 1)
echo "PASS: locales directory exists"
LOC_COUNT=$(find ${hermes-agent}/share/hermes-agent/locales -name "*.yaml" | wc -l)
test "$LOC_COUNT" -ge 16 || (echo "FAIL: expected >=16 catalogs, found $LOC_COUNT"; exit 1)
echo "PASS: $LOC_COUNT locale catalogs found"
test -f ${hermes-agent}/share/hermes-agent/locales/en.yaml || (echo "FAIL: en.yaml missing"; exit 1)
echo "PASS: en.yaml present"
grep -q "HERMES_BUNDLED_LOCALES" ${hermes-agent}/bin/hermes || \
(echo "FAIL: HERMES_BUNDLED_LOCALES not in wrapper"; exit 1)
echo "PASS: HERMES_BUNDLED_LOCALES set in wrapper"
echo "=== Rendering via the wrapper override (HERMES_BUNDLED_LOCALES) ==="
export HOME=$(mktemp -d)
RENDERED=$(cd "$HOME" && HERMES_BUNDLED_LOCALES=${hermes-agent}/share/hermes-agent/locales \
${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n.t('gateway.reset.header_default', lang='en'))")
echo "rendered: $RENDERED"
test "$RENDERED" != "gateway.reset.header_default" || (echo "FAIL: i18n returned the raw key with HERMES_BUNDLED_LOCALES set"; exit 1)
echo "PASS: i18n renders a human string via the wrapper override"
# Defense-in-depth check: the sealed venv must ALSO resolve catalogs
# with NO env var, via the wheel's setuptools data-files materialized
# into the venv data scheme. If a future uv2nix bump drops data-files,
# the wrapper override above would mask the regression at runtime while
# `pip install`/other sealed paths silently break this catches it.
echo "=== Rendering WITHOUT the env var (data-files materialization) ==="
BARE_DIR=$(cd "$HOME" && ${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n._locales_dir())")
BARE=$(cd "$HOME" && ${hermesVenv}/bin/python3 -c "from agent import i18n; print(i18n.t('gateway.reset.header_default', lang='en'))")
echo "resolved dir (no env var): $BARE_DIR"
echo "rendered: $BARE"
test "$BARE" != "gateway.reset.header_default" || \
(echo "FAIL: sealed venv could not resolve locales without HERMES_BUNDLED_LOCALES data-files materialization regressed"; exit 1)
echo "PASS: sealed venv resolves locales via data-files without the env var"
echo "=== All bundled locales checks passed ==="
mkdir -p $out
echo "ok" > $out/result
'';
# Verify bundled TUI is present and compiled
bundled-tui = pkgs.runCommand "hermes-bundled-tui" { } ''
set -e

View File

@ -67,6 +67,21 @@ let
filter = path: _type: !(lib.hasInfix "/__pycache__/" path);
};
# i18n locale catalogs (locales/*.yaml). Shipped into the store and pointed
# at by HERMES_BUNDLED_LOCALES so the wrapped binary always resolves human
# strings instead of raw i18n keys (#23943 / #27632 / #35374).
#
# Defense-in-depth, not load-bearing: the wheel already declares locales/ as
# setuptools data-files, so uv2nix materializes them into the venv's data
# scheme and agent/i18n.py resolves them with no env var. The wrapper override
# pins the store path so a future uv2nix change that drops data-files can't
# silently ship raw keys via `nix build` (checks don't run on a plain build).
# The bundled-locales flake check verifies BOTH paths independently.
#
# Plain cleanSource (no __pycache__ filter): locales/ is bare *.yaml, never
# compiled, so it never carries a __pycache__ dir to exclude.
bundledLocales = lib.cleanSource ../locales;
runtimeDeps = [
nodejs
ripgrep
@ -151,6 +166,7 @@ stdenv.mkDerivation (finalAttrs: {
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
cp -r ${bundledPlugins} $out/share/hermes-agent/plugins
cp -r ${bundledLocales} $out/share/hermes-agent/locales
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
mkdir -p $out/ui-tui
@ -162,6 +178,7 @@ stdenv.mkDerivation (finalAttrs: {
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
--set HERMES_BUNDLED_PLUGINS $out/share/hermes-agent/plugins \
--set HERMES_BUNDLED_LOCALES $out/share/hermes-agent/locales \
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \

View File

@ -230,6 +230,15 @@ hermes-acp = "acp_adapter.entry:main"
[tool.setuptools]
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils", "mcp_serve"]
[tool.setuptools.data-files]
# i18n catalogs. locales/ is a bare data directory (no __init__.py), so it is
# neither a package (packages.find) nor package-data (which attaches to a
# package). data-files ships it in the wheel; MANIFEST.in `graft locales`
# ships it in the sdist. Without this, sealed installs (pip wheel, Nix store
# venv) drop the catalogs and gateway/CLI commands surface raw i18n keys like
# `gateway.reset.header_default` (#27632, #35374, #23943).
locales = ["locales/*.yaml"]
[tool.setuptools.package-data]
hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh", "scripts/install.ps1"]
gateway = ["assets/**/*"]

View File

@ -167,3 +167,63 @@ def test_t_missing_key_in_non_english_falls_back_to_english(tmp_path, monkeypatc
def test_t_unknown_language_uses_english():
"""Unknown lang codes normalize to English, not to a key-path fallback."""
assert i18n.t("approval.denied", lang="klingon") == i18n.t("approval.denied", lang="en")
# ---------------------------------------------------------------------------
# _locales_dir resolution ladder -- regression for #23943 / #27632 / #35374.
# Sealed installs (Nix store venv, pip wheel) have no source tree next to
# agent/, so _locales_dir must resolve via env override or the data scheme.
# ---------------------------------------------------------------------------
def test_locales_dir_env_override_used_when_dir_exists(tmp_path, monkeypatch):
"""HERMES_BUNDLED_LOCALES wins when it points at a real directory."""
bundled = tmp_path / "bundled-locales"
bundled.mkdir()
monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(bundled))
assert i18n._locales_dir() == bundled
def test_locales_dir_env_override_ignored_when_missing(tmp_path, monkeypatch):
"""A bogus HERMES_BUNDLED_LOCALES falls through to source/wheel resolution
instead of returning a path that doesn't exist."""
monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(tmp_path / "does-not-exist"))
result = i18n._locales_dir()
assert result != tmp_path / "does-not-exist"
# In a source checkout this is the repo-root locales dir.
assert result.name == "locales"
def test_locales_dir_falls_back_to_data_scheme(tmp_path, monkeypatch):
"""When neither the env override nor a source-adjacent locales/ exists,
_locales_dir uses sysconfig's data scheme (the pip-wheel layout)."""
import sysconfig
# No env override.
monkeypatch.delenv("HERMES_BUNDLED_LOCALES", raising=False)
# Force the source-adjacent path to a location with no locales/ dir.
fake_pkg = tmp_path / "site-packages" / "agent"
fake_pkg.mkdir(parents=True)
monkeypatch.setattr(i18n, "__file__", str(fake_pkg / "i18n.py"))
# Stand up a fake data scheme containing locales/.
data_root = tmp_path / "data-scheme"
(data_root / "locales").mkdir(parents=True)
real_get_path = sysconfig.get_path
def fake_get_path(name, *args, **kwargs):
if name == "data":
return str(data_root)
return real_get_path(name, *args, **kwargs)
monkeypatch.setattr(i18n.sysconfig, "get_path", fake_get_path)
assert i18n._locales_dir() == data_root / "locales"
def test_t_resolves_real_string_in_source_checkout():
"""Sanity: in the test environment (a source checkout) t() must return a
human string, never the bare key path. Guards against catalog-load
regressions independent of packaging."""
assert i18n.t("gateway.reset.header_default", lang="en") != "gateway.reset.header_default"
assert i18n.t("gateway.status.header", lang="en") != "gateway.status.header"

View File

@ -200,3 +200,29 @@ def test_locked_starlette_is_not_vulnerable_to_cve_2026_48710():
f"floor {'.'.join(map(str, _STARLETTE_CVE_FLOOR))} — regenerate the "
f"lockfile after bumping the pin"
)
def test_locale_catalogs_ship_in_both_wheel_and_sdist():
"""Regression test for #27632 / #35374 / #23943.
locales/ is a bare data directory (no __init__.py), so it is invisible to
packages.find and to package-data (which attaches to a package). It must be
declared as setuptools data-files (wheel) AND grafted in MANIFEST.in
(sdist). Without both, sealed installs drop the catalogs and gateway/CLI
commands surface raw i18n keys like `gateway.reset.header_default`.
"""
data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
data_files = data["tool"]["setuptools"].get("data-files", {})
assert data_files.get("locales") == ["locales/*.yaml"], (
"pyproject [tool.setuptools.data-files] must declare "
'locales = ["locales/*.yaml"] so the wheel ships i18n catalogs'
)
manifest = (REPO_ROOT / "MANIFEST.in").read_text(encoding="utf-8")
assert "graft locales" in manifest, (
"MANIFEST.in must `graft locales` so the sdist ships i18n catalogs"
)
# Every on-disk catalog has the .yaml extension the globs above match.
on_disk = list((REPO_ROOT / "locales").glob("*.yaml"))
assert on_disk, "expected locales/*.yaml catalogs on disk"

View File

@ -0,0 +1,137 @@
"""End-to-end: a built wheel, installed without a source tree, must resolve
i18n catalogs and render human strings — not raw key paths.
This is the test that would have caught #27632 / #35374 / #23943. Metadata
unit tests (test_packaging_metadata.py) prove the glob is declared; this proves
the runtime actually finds the catalogs after a real pip install.
This lives in tests/ (NOT tests/e2e/) so it is collected by the dedicated CI
step in Task 9, not by the existing `python -m pytest tests/e2e/` runner.
Assumption: `from agent import i18n` must import with only stdlib + pyyaml
available (the test installs the wheel --no-deps + pyyaml). agent/__init__.py's
jiter preload swallows ImportError, and i18n.py imports yaml lazily inside
_load_catalog, so this holds today. If i18n.py ever gains a top-level non-stdlib
import, add it to the pip install line below.
Marked `integration` because it shells out to `uv build` + `venv` + `pip` and
takes ~15-30s. Run with: pytest -m integration tests/test_wheel_locales_e2e.py
"""
from __future__ import annotations
import glob
import os
import subprocess
import sys
import tarfile
import venv
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
@pytest.mark.integration
@pytest.mark.timeout(300) # overrides the global --timeout=30; cold-CI wheel build + venv + pip can exceed it
def test_installed_wheel_renders_i18n_strings(tmp_path):
# 1. Build the wheel from the current tree.
wheel_dir = tmp_path / "wheel"
build = subprocess.run(
["uv", "build", "--wheel", "--out-dir", str(wheel_dir), "."],
cwd=REPO_ROOT,
capture_output=True,
text=True,
timeout=600,
)
assert build.returncode == 0, f"uv build failed:\n{build.stderr}"
wheels = glob.glob(str(wheel_dir / "*.whl"))
assert wheels, "no wheel produced"
wheel = wheels[0]
# 2. Fresh venv, install the wheel WITHOUT deps (we only exercise i18n,
# which needs pyyaml). --force-reinstall guards against pip's
# same-version no-op.
venv_dir = tmp_path / "venv"
venv.create(venv_dir, with_pip=True)
vpy = venv_dir / "bin" / "python"
subprocess.run([str(vpy), "-m", "pip", "install", "-q", "pyyaml"], check=True, timeout=300)
subprocess.run(
[str(vpy), "-m", "pip", "install", "-q", "--no-deps", "--force-reinstall", wheel],
check=True,
timeout=300,
)
# 3. Run from a directory that is NOT the source tree, with a clean env
# (no PYTHONPATH leaking the repo, no HERMES_BUNDLED_LOCALES).
probe = (
"from agent import i18n;"
"import sys;"
"r = i18n.t('gateway.reset.header_default', lang='en');"
"s = i18n.t('gateway.status.header', lang='en');"
"print(repr(r)); print(repr(s));"
"sys.exit(0 if (r != 'gateway.reset.header_default' "
"and s != 'gateway.status.header') else 1)"
)
env = {k: v for k, v in os.environ.items() if k not in ("PYTHONPATH", "HERMES_BUNDLED_LOCALES")}
env["PATH"] = f"{venv_dir / 'bin'}:{env['PATH']}"
env["VIRTUAL_ENV"] = str(venv_dir)
run = subprocess.run(
[str(vpy), "-c", probe],
cwd=str(tmp_path), # NOT the repo root
capture_output=True,
text=True,
env=env,
timeout=120,
)
assert run.returncode == 0, (
"installed wheel returned raw i18n keys instead of human strings:\n"
f"stdout: {run.stdout}\nstderr: {run.stderr}"
)
@pytest.mark.integration
@pytest.mark.timeout(300) # overrides the global --timeout=30; cold-CI sdist build can exceed it
def test_built_sdist_ships_locale_catalogs(tmp_path):
"""The sdist must carry locales/ too.
The wheel is covered above; the sdist is a separately shipped artifact
(PyPI, and the form distro/Homebrew packagers build from). MANIFEST.in
`graft locales` is what puts the catalogs in the tarball — a stale graft or
a setuptools change would pass the metadata unit test (which only inspects
the declaration) while the actual artifact regresses. This inspects the
real tarball so that path can't rot silently. Closes the sdist half of
#27632 / #35374 / #23943.
"""
sdist_dir = tmp_path / "sdist"
build = subprocess.run(
["uv", "build", "--sdist", "--out-dir", str(sdist_dir), "."],
cwd=REPO_ROOT,
capture_output=True,
text=True,
timeout=600,
)
assert build.returncode == 0, f"uv build --sdist failed:\n{build.stderr}"
tarballs = glob.glob(str(sdist_dir / "*.tar.gz"))
assert tarballs, "no sdist produced"
with tarfile.open(tarballs[0]) as tf:
# Members are prefixed with the sdist root dir, e.g.
# hermes_agent-0.15.1/locales/en.yaml — match on the suffix.
catalogs = [m for m in tf.getnames() if "/locales/" in m and m.endswith(".yaml")]
# Compare against the canonical language list rather than a hardcoded floor
# so adding/removing a catalog updates the guard automatically and a dropped
# catalog (not just a fully-empty graft) trips it.
from agent.i18n import SUPPORTED_LANGUAGES
expected = len(SUPPORTED_LANGUAGES)
assert len(catalogs) == expected, (
f"sdist shipped {len(catalogs)} locale catalogs, expected {expected} "
f"({len(SUPPORTED_LANGUAGES)} supported languages) — check `graft "
"locales` in MANIFEST.in"
)
assert any(m.endswith("/locales/en.yaml") for m in catalogs), (
f"sdist missing locales/en.yaml; shipped: {catalogs[:5]}"
)