* fix(packaging): ship locales/ i18n catalogs in wheel, sdist, and Nix locales/ is a bare data dir (no __init__.py), invisible to packages.find and package-data. Sealed installs (pip wheel, Nix store venv) dropped it, so gateway/CLI commands rendered raw i18n keys like gateway.reset.header_default. - pyproject: [tool.setuptools.data-files] locales = ["locales/*.yaml"] (wheel) - MANIFEST.in: graft locales (sdist) - agent/i18n._locales_dir: env override -> source -> sysconfig data scheme - nix/hermes-agent.nix: copy locales into the store + set HERMES_BUNDLED_LOCALES as defense-in-depth. The wheel's data-files already materialize into the uv2nix venv, so resolution works with no env var; the override pins the store path against a future uv2nix change that could drop data-files. - tests: metadata regression, wheel + sdist build-install smoke tests, and a bundled-locales flake check that verifies BOTH the wrapper override and the env-var-less data-files path. Smoke test wired into CI. Closes #23943, #27632, #35374. Supersedes #23966, #27716, #30261, #33841, #35429, #35494, #35735, #36697. * test: cap locale e2e timeout, tighten catalog count guard The two wheel/sdist e2e tests inherit the global --timeout=30 from addopts; a cold-CI run (isolated build env + venv create + network pip install) can plausibly exceed it. Add @pytest.mark.timeout(300) so they don't ride the unit-test budget and flake intermittently. Also assert the shipped catalog count equals len(SUPPORTED_LANGUAGES) instead of a hardcoded >=16 floor, so the guard self-updates and trips on a single dropped catalog (not just a fully-empty graft).
230 lines
8.9 KiB
Python
230 lines
8.9 KiB
Python
"""Tests for agent.i18n -- catalog parity, fallback, language resolution."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from agent import i18n
|
|
|
|
|
|
LOCALES_DIR = Path(__file__).resolve().parents[2] / "locales"
|
|
|
|
|
|
def _load_raw(lang: str) -> dict:
|
|
with (LOCALES_DIR / f"{lang}.yaml").open("r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f)
|
|
|
|
|
|
def _flatten(d, prefix="") -> dict:
|
|
flat = {}
|
|
for k, v in (d or {}).items():
|
|
key = f"{prefix}.{k}" if prefix else k
|
|
if isinstance(v, dict):
|
|
flat.update(_flatten(v, key))
|
|
else:
|
|
flat[key] = v
|
|
return flat
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Catalog completeness -- this is the key invariant test. If someone adds a
|
|
# new key to en.yaml they MUST add it to every other locale, else runtime
|
|
# falls back to English for those users and defeats the feature.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_all_locales_exist():
|
|
"""Every supported language must have a catalog file on disk."""
|
|
for lang in i18n.SUPPORTED_LANGUAGES:
|
|
assert (LOCALES_DIR / f"{lang}.yaml").is_file(), f"missing locales/{lang}.yaml"
|
|
|
|
|
|
@pytest.mark.parametrize("lang", [l for l in i18n.SUPPORTED_LANGUAGES if l != "en"])
|
|
def test_catalog_keys_match_english(lang: str):
|
|
"""Every non-English catalog must have exactly the same key set as English."""
|
|
en_keys = set(_flatten(_load_raw("en")).keys())
|
|
lang_keys = set(_flatten(_load_raw(lang)).keys())
|
|
missing = en_keys - lang_keys
|
|
extra = lang_keys - en_keys
|
|
assert not missing, f"{lang}.yaml missing keys: {sorted(missing)}"
|
|
assert not extra, f"{lang}.yaml has keys not in en.yaml: {sorted(extra)}"
|
|
|
|
|
|
@pytest.mark.parametrize("lang", list(i18n.SUPPORTED_LANGUAGES))
|
|
def test_catalog_placeholders_match_english(lang: str):
|
|
"""Every translated value must use the same {placeholder} tokens as English.
|
|
|
|
A mistranslated placeholder (e.g. ``{description}`` typoed as ``{descricao}``)
|
|
would either raise KeyError at runtime or silently drop the interpolated
|
|
value. Pin parity at the test layer.
|
|
"""
|
|
import re
|
|
placeholder_re = re.compile(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}")
|
|
en_flat = _flatten(_load_raw("en"))
|
|
lang_flat = _flatten(_load_raw(lang))
|
|
for key, en_value in en_flat.items():
|
|
en_placeholders = set(placeholder_re.findall(en_value))
|
|
lang_value = lang_flat.get(key, "")
|
|
lang_placeholders = set(placeholder_re.findall(lang_value))
|
|
assert en_placeholders == lang_placeholders, (
|
|
f"{lang}.yaml key={key!r}: placeholders {lang_placeholders} "
|
|
f"don't match English {en_placeholders}"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Language resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_normalize_lang_accepts_supported():
|
|
assert i18n._normalize_lang("zh") == "zh"
|
|
assert i18n._normalize_lang("EN") == "en"
|
|
|
|
|
|
def test_normalize_lang_accepts_aliases():
|
|
assert i18n._normalize_lang("chinese") == "zh"
|
|
assert i18n._normalize_lang("zh-CN") == "zh"
|
|
assert i18n._normalize_lang("Deutsch") == "de"
|
|
assert i18n._normalize_lang("español") == "es"
|
|
assert i18n._normalize_lang("jp") == "ja"
|
|
assert i18n._normalize_lang("Ukrainian") == "uk"
|
|
assert i18n._normalize_lang("uk-UA") == "uk"
|
|
assert i18n._normalize_lang("ua") == "uk"
|
|
assert i18n._normalize_lang("Turkish") == "tr"
|
|
assert i18n._normalize_lang("tr-TR") == "tr"
|
|
assert i18n._normalize_lang("türkçe") == "tr"
|
|
|
|
|
|
def test_normalize_lang_unknown_falls_back():
|
|
assert i18n._normalize_lang("klingon") == "en"
|
|
assert i18n._normalize_lang("") == "en"
|
|
assert i18n._normalize_lang(None) == "en"
|
|
|
|
|
|
def test_env_var_override(monkeypatch):
|
|
"""HERMES_LANGUAGE wins over config."""
|
|
i18n.reset_language_cache()
|
|
monkeypatch.setenv("HERMES_LANGUAGE", "ja")
|
|
assert i18n.get_language() == "ja"
|
|
|
|
|
|
def test_env_var_normalized(monkeypatch):
|
|
i18n.reset_language_cache()
|
|
monkeypatch.setenv("HERMES_LANGUAGE", "Chinese")
|
|
assert i18n.get_language() == "zh"
|
|
|
|
|
|
def test_default_when_nothing_set(monkeypatch):
|
|
"""With no env var and no config override, falls back to English."""
|
|
monkeypatch.delenv("HERMES_LANGUAGE", raising=False)
|
|
# Force config lookup to return None -- patch the cached reader.
|
|
i18n.reset_language_cache()
|
|
monkeypatch.setattr(i18n, "_config_language_cached", lambda: None)
|
|
assert i18n.get_language() == "en"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# t() semantics
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_t_explicit_lang():
|
|
assert i18n.t("approval.denied", lang="en").endswith("Denied")
|
|
assert i18n.t("approval.denied", lang="zh").endswith("已拒绝")
|
|
assert i18n.t("approval.denied", lang="uk").endswith("Відхилено")
|
|
assert i18n.t("approval.denied", lang="tr").endswith("Reddedildi")
|
|
|
|
|
|
def test_t_formats_placeholders():
|
|
msg = i18n.t("gateway.draining", lang="en", count=3)
|
|
assert "3" in msg
|
|
|
|
|
|
def test_t_missing_key_returns_key():
|
|
"""A missing key returns its own path -- ugly but never crashes."""
|
|
result = i18n.t("nonexistent.key.path", lang="en")
|
|
assert result == "nonexistent.key.path"
|
|
|
|
|
|
def test_t_missing_key_in_non_english_falls_back_to_english(tmp_path, monkeypatch):
|
|
"""If a key exists in English but not in the target locale, fall back."""
|
|
# Stand up a fake incomplete locale under a temp locales dir.
|
|
fake_locales = tmp_path / "locales"
|
|
fake_locales.mkdir()
|
|
(fake_locales / "en.yaml").write_text("foo: English Foo\n", encoding="utf-8")
|
|
(fake_locales / "zh.yaml").write_text("# intentionally empty\n", encoding="utf-8")
|
|
monkeypatch.setattr(i18n, "_locales_dir", lambda: fake_locales)
|
|
i18n.reset_language_cache()
|
|
try:
|
|
assert i18n.t("foo", lang="zh") == "English Foo"
|
|
finally:
|
|
# Clear the cache on teardown so subsequent tests don't see the
|
|
# fake "foo: English Foo" catalog instead of the real locales/*.yaml.
|
|
i18n.reset_language_cache()
|
|
|
|
|
|
def test_t_unknown_language_uses_english():
|
|
"""Unknown lang codes normalize to English, not to a key-path fallback."""
|
|
assert i18n.t("approval.denied", lang="klingon") == i18n.t("approval.denied", lang="en")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _locales_dir resolution ladder -- regression for #23943 / #27632 / #35374.
|
|
# Sealed installs (Nix store venv, pip wheel) have no source tree next to
|
|
# agent/, so _locales_dir must resolve via env override or the data scheme.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_locales_dir_env_override_used_when_dir_exists(tmp_path, monkeypatch):
|
|
"""HERMES_BUNDLED_LOCALES wins when it points at a real directory."""
|
|
bundled = tmp_path / "bundled-locales"
|
|
bundled.mkdir()
|
|
monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(bundled))
|
|
assert i18n._locales_dir() == bundled
|
|
|
|
|
|
def test_locales_dir_env_override_ignored_when_missing(tmp_path, monkeypatch):
|
|
"""A bogus HERMES_BUNDLED_LOCALES falls through to source/wheel resolution
|
|
instead of returning a path that doesn't exist."""
|
|
monkeypatch.setenv("HERMES_BUNDLED_LOCALES", str(tmp_path / "does-not-exist"))
|
|
result = i18n._locales_dir()
|
|
assert result != tmp_path / "does-not-exist"
|
|
# In a source checkout this is the repo-root locales dir.
|
|
assert result.name == "locales"
|
|
|
|
|
|
def test_locales_dir_falls_back_to_data_scheme(tmp_path, monkeypatch):
|
|
"""When neither the env override nor a source-adjacent locales/ exists,
|
|
_locales_dir uses sysconfig's data scheme (the pip-wheel layout)."""
|
|
import sysconfig
|
|
|
|
# No env override.
|
|
monkeypatch.delenv("HERMES_BUNDLED_LOCALES", raising=False)
|
|
|
|
# Force the source-adjacent path to a location with no locales/ dir.
|
|
fake_pkg = tmp_path / "site-packages" / "agent"
|
|
fake_pkg.mkdir(parents=True)
|
|
monkeypatch.setattr(i18n, "__file__", str(fake_pkg / "i18n.py"))
|
|
|
|
# Stand up a fake data scheme containing locales/.
|
|
data_root = tmp_path / "data-scheme"
|
|
(data_root / "locales").mkdir(parents=True)
|
|
real_get_path = sysconfig.get_path
|
|
|
|
def fake_get_path(name, *args, **kwargs):
|
|
if name == "data":
|
|
return str(data_root)
|
|
return real_get_path(name, *args, **kwargs)
|
|
|
|
monkeypatch.setattr(i18n.sysconfig, "get_path", fake_get_path)
|
|
|
|
assert i18n._locales_dir() == data_root / "locales"
|
|
|
|
|
|
def test_t_resolves_real_string_in_source_checkout():
|
|
"""Sanity: in the test environment (a source checkout) t() must return a
|
|
human string, never the bare key path. Guards against catalog-load
|
|
regressions independent of packaging."""
|
|
assert i18n.t("gateway.reset.header_default", lang="en") != "gateway.reset.header_default"
|
|
assert i18n.t("gateway.status.header", lang="en") != "gateway.status.header"
|