fix: batch of small robustness/correctness fixes from @kyssta-exe
Salvages 8 distinct fixes from a batch of PRs by @kyssta-exe, reapplied
onto current main (original branches were stale) with a few refinements.
- cron(jobs.py): load_jobs() validates top-level JSON shape — a bare
list auto-repairs into the {"jobs": [...]} dict; scalars/null raise a
clear RuntimeError instead of an uncaught AttributeError that took
down the whole cron subsystem (#37065, closes #36867).
- web(web_server.py): close the per-action log file handle after Popen
so the parent stops leaking one fd per spawned action (#36843).
- web(web_server.py): DELETE /api/env returns 400 for invalid key names
instead of a misleading 500, mirroring PUT /api/env (#36840).
- gateway(gateway.py): read /proc/<pid>/cmdline inside a with-block so
the fd is released immediately instead of relying on GC (#36804).
- web-tools(web_tools.py): include "xai" in check_web_api_key() so a
configured X.AI web backend reports as available (#36802).
- compression(conversation_compression.py): mark the feasibility check
done only after it completes, and default the gate to "not checked"
if the attribute is missing (#36803).
- completion(completion.py): replace `ls` with directory globbing in the
generated bash/zsh/fish profile listers — handles names with spaces
and skips non-directory entries (#36806).
- terminal-tool(terminal_tool.py): drop a duplicate `import threading`
(#36808).
- claw(claw.py): the migrate recommendation now points at the real
`hermes gateway stop` command instead of the non-existent
`hermes stop` (#36795, #36796, closes #36771).
- tests: guard against a leaked HERMES_CRON_SESSION breaking gateway
approval tests — add it to the hermetic conftest unset list (root
cause, protects every test) and pop it in the affected test's
setup_method (#36796).
Co-authored-by: kyssta-exe <kyssta-exe@users.noreply.github.com>
This commit is contained in:
@ -308,11 +308,14 @@ def compress_context(
|
||||
# The check itself sets ``agent._compression_warning`` so the
|
||||
# status-callback replay machinery still emits the warning to the user
|
||||
# the first time it would matter.
|
||||
if not getattr(agent, "_compression_feasibility_checked", True):
|
||||
try:
|
||||
check_compression_model_feasibility(agent)
|
||||
finally:
|
||||
agent._compression_feasibility_checked = True
|
||||
if not getattr(agent, "_compression_feasibility_checked", False):
|
||||
# Mark as checked only after the probe completes. If the check
|
||||
# raises (e.g. a fatal aux-context ValueError that aborts the
|
||||
# session), leaving the flag unset is harmless; a non-fatal
|
||||
# transient failure is swallowed inside the function so the flag
|
||||
# is set normally on the next successful pass.
|
||||
check_compression_model_feasibility(agent)
|
||||
agent._compression_feasibility_checked = True
|
||||
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
|
||||
35
cron/jobs.py
35
cron/jobs.py
@ -428,22 +428,18 @@ def load_jobs() -> List[Dict[str, Any]]:
|
||||
ensure_dirs()
|
||||
if not JOBS_FILE.exists():
|
||||
return []
|
||||
|
||||
|
||||
_strict_retry = False # track whether we used the strict=False fallback
|
||||
|
||||
try:
|
||||
with open(JOBS_FILE, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data.get("jobs", [])
|
||||
except json.JSONDecodeError:
|
||||
# Retry with strict=False to handle bare control chars in string values
|
||||
_strict_retry = True
|
||||
try:
|
||||
with open(JOBS_FILE, 'r', encoding='utf-8') as f:
|
||||
data = json.loads(f.read(), strict=False)
|
||||
jobs = data.get("jobs", [])
|
||||
if jobs:
|
||||
# Auto-repair: rewrite with proper escaping
|
||||
save_jobs(jobs)
|
||||
logger.warning("Auto-repaired jobs.json (had invalid control characters)")
|
||||
return jobs
|
||||
except Exception as e:
|
||||
logger.error("Failed to auto-repair jobs.json: %s", e)
|
||||
raise RuntimeError(f"Cron database corrupted and unrepairable: {e}") from e
|
||||
@ -451,6 +447,29 @@ def load_jobs() -> List[Dict[str, Any]]:
|
||||
logger.error("IOError reading jobs.json: %s", e)
|
||||
raise RuntimeError(f"Failed to read cron database: {e}") from e
|
||||
|
||||
# Validate the top-level JSON shape: accept a dict (expected) or a bare
|
||||
# list (auto-repair). Anything else (str/number/null) is corruption that
|
||||
# would otherwise raise an uncaught AttributeError on ``.get()`` and take
|
||||
# down the whole cron subsystem.
|
||||
if isinstance(data, dict):
|
||||
jobs = data.get("jobs", [])
|
||||
if _strict_retry and jobs:
|
||||
# Hit control-character corruption — rewrite with proper escaping.
|
||||
save_jobs(jobs)
|
||||
logger.warning("Auto-repaired jobs.json (had invalid control characters)")
|
||||
return jobs
|
||||
if isinstance(data, list):
|
||||
# Bare array — likely saved/edited outside save_jobs(). Wrap it back
|
||||
# into the expected {"jobs": [...]} structure.
|
||||
if data:
|
||||
save_jobs(data)
|
||||
logger.warning("Auto-repaired jobs.json (bare list wrapped as dict)")
|
||||
return data
|
||||
|
||||
raise RuntimeError(
|
||||
f"Cron database corrupted: expected {{'jobs': [...]}}, got {type(data).__name__}"
|
||||
)
|
||||
|
||||
|
||||
def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
"""Save all jobs to storage."""
|
||||
|
||||
@ -177,7 +177,7 @@ def _warn_if_gateway_running(auto_yes: bool) -> None:
|
||||
"conflicts (Telegram, Discord, and Slack only allow one active "
|
||||
"session per token)."
|
||||
)
|
||||
print_info("Recommendation: stop the gateway first with 'hermes stop'.")
|
||||
print_info("Recommendation: stop the gateway first with 'hermes gateway stop'.")
|
||||
print()
|
||||
if not auto_yes and not prompt_yes_no("Continue anyway?", default=False):
|
||||
print_info("Migration cancelled. Stop the gateway and try again.")
|
||||
|
||||
@ -105,7 +105,9 @@ _hermes_profiles() {{
|
||||
local profiles_dir="$HOME/.hermes/profiles"
|
||||
local profiles="default"
|
||||
if [ -d "$profiles_dir" ]; then
|
||||
profiles="$profiles $(ls "$profiles_dir" 2>/dev/null)"
|
||||
for f in "$profiles_dir"/*/; do
|
||||
[ -d "$f" ] && profiles="$profiles $(basename "$f")"
|
||||
done
|
||||
fi
|
||||
echo "$profiles"
|
||||
}}
|
||||
@ -206,7 +208,7 @@ _hermes_profiles() {{
|
||||
local -a profiles
|
||||
profiles=(default)
|
||||
if [[ -d "$HOME/.hermes/profiles" ]]; then
|
||||
profiles+=("${{(@f)$(ls $HOME/.hermes/profiles 2>/dev/null)}}")
|
||||
profiles+=($HOME/.hermes/profiles/*(N/:t))
|
||||
fi
|
||||
_describe 'profile' profiles
|
||||
}}
|
||||
@ -260,7 +262,9 @@ def generate_fish(parser: argparse.ArgumentParser) -> str:
|
||||
"function __hermes_profiles",
|
||||
" echo default",
|
||||
" if test -d $HOME/.hermes/profiles",
|
||||
" ls $HOME/.hermes/profiles 2>/dev/null",
|
||||
" for d in $HOME/.hermes/profiles/*/",
|
||||
" basename $d",
|
||||
" end",
|
||||
" end",
|
||||
"end",
|
||||
"",
|
||||
|
||||
@ -453,11 +453,8 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
|
||||
if pid == my_pid or pid in exclude_pids:
|
||||
continue
|
||||
try:
|
||||
cmdline = (
|
||||
open(f"/proc/{pid}/cmdline", "rb")
|
||||
.read()
|
||||
.decode("utf-8", errors="replace")
|
||||
)
|
||||
with open(f"/proc/{pid}/cmdline", "rb") as _f:
|
||||
cmdline = _f.read().decode("utf-8", errors="replace")
|
||||
cmdline = cmdline.replace("\x00", " ")
|
||||
cmdline_lc = cmdline.lower()
|
||||
if any(p in cmdline_lc for p in patterns) and (
|
||||
|
||||
@ -838,6 +838,10 @@ def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
|
||||
popen_kwargs["start_new_session"] = True
|
||||
|
||||
proc = subprocess.Popen(cmd, **popen_kwargs)
|
||||
# The child inherits its own duplicated fd for stdout/stderr, so the
|
||||
# parent's handle can be released immediately — otherwise we leak one
|
||||
# fd per spawned action.
|
||||
log_file.close()
|
||||
_ACTION_RESULTS.pop(name, None)
|
||||
_ACTION_PROCS[name] = proc
|
||||
return proc
|
||||
@ -1808,6 +1812,11 @@ async def remove_env_var(body: EnvVarDelete):
|
||||
return {"ok": True, "key": body.key}
|
||||
except HTTPException:
|
||||
raise
|
||||
except ValueError as exc:
|
||||
# remove_env_value raises ValueError for invalid key names. Surface
|
||||
# the message to the SPA so the user understands why the delete was
|
||||
# refused instead of seeing an opaque 500. Mirrors PUT /api/env.
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception:
|
||||
_log.exception("DELETE /api/env failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@ -64,6 +64,7 @@ AUTHOR_MAP = {
|
||||
"524706+Twanislas@users.noreply.github.com": "Twanislas",
|
||||
"9592417+adam91holt@users.noreply.github.com": "adam91holt",
|
||||
"kchuang1015@users.noreply.github.com": "kchuang1015",
|
||||
"kyssta-exe@users.noreply.github.com": "kyssta-exe",
|
||||
"45688690+fujinice@users.noreply.github.com": "fujinice",
|
||||
"276689385+carltonawong@users.noreply.github.com": "carltonawong",
|
||||
"195255660+EvilHumphrey@users.noreply.github.com": "EvilHumphrey",
|
||||
|
||||
@ -182,6 +182,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||
"HERMES_SESSION_SOURCE",
|
||||
"HERMES_SESSION_KEY",
|
||||
"HERMES_GATEWAY_SESSION",
|
||||
"HERMES_CRON_SESSION",
|
||||
"_HERMES_GATEWAY",
|
||||
"HERMES_PLATFORM",
|
||||
"HERMES_MODEL",
|
||||
|
||||
@ -1395,11 +1395,16 @@ class TestApprovalTimeoutIsNotConsent:
|
||||
|
||||
self._saved_env = {
|
||||
k: os.environ.get(k)
|
||||
for k in ("HERMES_GATEWAY_SESSION", "HERMES_YOLO_MODE",
|
||||
for k in ("HERMES_GATEWAY_SESSION", "HERMES_CRON_SESSION",
|
||||
"HERMES_YOLO_MODE",
|
||||
"HERMES_SESSION_KEY", "HERMES_INTERACTIVE")
|
||||
}
|
||||
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||
os.environ.pop("HERMES_INTERACTIVE", None)
|
||||
# HERMES_CRON_SESSION takes priority over HERMES_GATEWAY_SESSION in
|
||||
# _is_gateway_approval_context(); a leaked value from a parent cron
|
||||
# process would force the cron path and break these gateway tests.
|
||||
os.environ.pop("HERMES_CRON_SESSION", None)
|
||||
os.environ["HERMES_GATEWAY_SESSION"] = "1"
|
||||
os.environ["HERMES_SESSION_KEY"] = self.SESSION_KEY
|
||||
|
||||
|
||||
@ -170,7 +170,6 @@ _sudo_password_cache_lock = threading.Lock()
|
||||
# own callback exactly like before. Gateway mode resolves approvals via
|
||||
# the per-session queue in tools.approval, not through these callbacks,
|
||||
# so it's unaffected.
|
||||
import threading
|
||||
_callback_tls = threading.local()
|
||||
|
||||
|
||||
|
||||
@ -1155,11 +1155,11 @@ async def web_extract_tool(
|
||||
def check_web_api_key() -> bool:
|
||||
"""Check whether the configured web backend is available."""
|
||||
configured = _load_web_config().get("backend", "").lower().strip()
|
||||
if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs"}:
|
||||
if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
|
||||
return _is_backend_available(configured)
|
||||
return any(
|
||||
_is_backend_available(backend)
|
||||
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs")
|
||||
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user