"""nemo_relay — optional Hermes plugin for NeMo Relay observability.""" from __future__ import annotations import asyncio import inspect import json import logging import os import threading import tomllib from dataclasses import dataclass, field from pathlib import Path from typing import Any, Optional logger = logging.getLogger(__name__) _INIT_FAILED = object() _LOCK = threading.RLock() _RUNTIME: "_Runtime | object | None" = None @dataclass class _SessionState: session_id: str handle: Any = None atif_exporter: Any = None atif_subscriber_name: str = "" is_embedded_subagent: bool = False parent_session_id: str = "" llm_spans: dict[str, Any] = field(default_factory=dict) tool_spans: dict[str, Any] = field(default_factory=dict) @dataclass class _SubagentParent: parent_session_id: str parent_handle: Any metadata: dict[str, Any] @dataclass class _Settings: plugins_toml_path: str = "" atof_enabled: bool = False atof_output_directory: str = "" atof_filename: str = "hermes-atof.jsonl" atof_mode: str = "append" atif_enabled: bool = False atif_output_directory: str = "" atif_filename_template: str = "hermes-atif-{session_id}.json" atif_subagent_export_mode: str = "embedded" atif_agent_name: str = "Hermes Agent" atif_agent_version: str = "unknown" atif_model_name: str = "unknown" class _Runtime: def __init__(self, nemo_relay: Any, settings: _Settings) -> None: self.nemo_relay = nemo_relay self.settings = settings self.sessions: dict[str, _SessionState] = {} self.subagent_parents: dict[str, _SubagentParent] = {} self.atof_exporter: Any = None self._plugin_config_initialized = self._configure_plugins_toml() if not self._plugin_config_initialized: self._configure_atof() def _configure_plugins_toml(self) -> bool: if not self.settings.plugins_toml_path: return False plugin_mod = getattr(self.nemo_relay, "plugin", None) initialize = getattr(plugin_mod, "initialize", None) if not callable(initialize): return False config_path = Path(self.settings.plugins_toml_path) try: config = tomllib.loads(config_path.read_text(encoding="utf-8")) self._ensure_plugin_config_output_dirs(config) result = initialize(config) if inspect.isawaitable(result): asyncio.run(result) return True except RuntimeError: logger.debug("NeMo Relay plugins.toml init skipped inside a running event loop") return False except Exception as exc: logger.debug("NeMo Relay plugins.toml init failed: %s", exc, exc_info=True) return False def _ensure_plugin_config_output_dirs(self, config: dict[str, Any]) -> None: for component in config.get("components", []): if not isinstance(component, dict): continue if component.get("kind") != "observability": continue if component.get("enabled") is False: continue component_config = component.get("config") if not isinstance(component_config, dict): continue for exporter_name in ("atof", "atif"): exporter_config = component_config.get(exporter_name) if not isinstance(exporter_config, dict): continue output_directory = exporter_config.get("output_directory") if isinstance(output_directory, str) and output_directory.strip(): Path(output_directory).mkdir(parents=True, exist_ok=True) def _configure_atof(self) -> None: if not self.settings.atof_enabled: return config = self.nemo_relay.AtofExporterConfig() if self.settings.atof_output_directory: Path(self.settings.atof_output_directory).mkdir(parents=True, exist_ok=True) config.output_directory = self.settings.atof_output_directory config.filename = self.settings.atof_filename if self.settings.atof_mode.lower() == "overwrite": config.mode = self.nemo_relay.AtofExporterMode.Overwrite else: config.mode = self.nemo_relay.AtofExporterMode.Append self.atof_exporter = self.nemo_relay.AtofExporter(config) self.atof_exporter.register("hermes.nemo_relay.atof") def ensure_session(self, kwargs: dict[str, Any]) -> _SessionState: session_id = _session_id(kwargs) state = self.sessions.get(session_id) if state is not None: return state state = _SessionState(session_id=session_id) if self.settings.atif_enabled: state.atif_exporter = self.nemo_relay.AtifExporter( session_id, self.settings.atif_agent_name, self.settings.atif_agent_version, model_name=str(kwargs.get("model") or self.settings.atif_model_name), extra={"source": "hermes-agent", "plugin": "observability/nemo_relay"}, ) state.atif_subscriber_name = f"hermes.nemo_relay.atif.{session_id}" state.atif_exporter.register(state.atif_subscriber_name) subagent_parent = self.subagent_parents.get(session_id) metadata = _metadata(kwargs) parent_handle = None if subagent_parent is not None: parent_handle = subagent_parent.parent_handle metadata = {**metadata, **subagent_parent.metadata} state.is_embedded_subagent = True state.parent_session_id = subagent_parent.parent_session_id state.handle = self.nemo_relay.scope.push( f"hermes-session-{session_id}", self.nemo_relay.ScopeType.Agent, handle=parent_handle, data={"session_id": session_id}, metadata=metadata, ) self.sessions[session_id] = state return state def export_atif(self, state: _SessionState) -> None: if not self.settings.atif_enabled or state.atif_exporter is None: return if state.is_embedded_subagent and self.settings.atif_subagent_export_mode != "all": return output_dir = self.settings.atif_output_directory if not output_dir: return Path(output_dir).mkdir(parents=True, exist_ok=True) filename = self.settings.atif_filename_template.format(session_id=state.session_id) Path(output_dir, filename).write_text(state.atif_exporter.export_json(), encoding="utf-8") def close_session(self, kwargs: dict[str, Any]) -> None: session_id = _session_id(kwargs) self.subagent_parents.pop(session_id, None) state = self.sessions.pop(session_id, None) if state is None: return if state.handle is not None: try: self.nemo_relay.scope.pop(state.handle, output=_jsonable(kwargs)) except Exception: logger.debug("NeMo Relay session pop failed", exc_info=True) self.export_atif(state) if state.atif_exporter is not None and state.atif_subscriber_name: try: state.atif_exporter.deregister(state.atif_subscriber_name) except Exception: logger.debug("NeMo Relay ATIF deregister failed", exc_info=True) def mark(self, name: str, kwargs: dict[str, Any]) -> None: state = self.ensure_session(kwargs) self.nemo_relay.scope.event( name, handle=state.handle, data=_jsonable(kwargs), metadata=_metadata(kwargs), ) def mark_subagent_start(self, kwargs: dict[str, Any]) -> None: parent_state = self.ensure_session(kwargs) metadata = _metadata(kwargs) child_session_id = _child_session_id(kwargs) if child_session_id: self.subagent_parents[child_session_id] = _SubagentParent( parent_session_id=parent_state.session_id, parent_handle=parent_state.handle, metadata=_subagent_child_metadata(kwargs, metadata), ) self.nemo_relay.scope.event( "hermes.subagent.start", handle=parent_state.handle, data=_jsonable(kwargs), metadata=metadata, ) def mark_subagent_stop(self, kwargs: dict[str, Any]) -> None: child_session_id = _child_session_id(kwargs) if child_session_id: self.subagent_parents.pop(child_session_id, None) self.mark("hermes.subagent.stop", kwargs) def register(ctx) -> None: ctx.register_hook("on_session_start", on_session_start) ctx.register_hook("on_session_end", on_session_end) ctx.register_hook("on_session_finalize", on_session_finalize) ctx.register_hook("on_session_reset", on_session_reset) ctx.register_hook("pre_llm_call", on_pre_llm_call) ctx.register_hook("post_llm_call", on_post_llm_call) ctx.register_hook("pre_api_request", on_pre_api_request) ctx.register_hook("post_api_request", on_post_api_request) ctx.register_hook("api_request_error", on_api_request_error) ctx.register_hook("pre_tool_call", on_pre_tool_call) ctx.register_hook("post_tool_call", on_post_tool_call) ctx.register_hook("pre_approval_request", on_pre_approval_request) ctx.register_hook("post_approval_response", on_post_approval_response) ctx.register_hook("subagent_start", on_subagent_start) ctx.register_hook("subagent_stop", on_subagent_stop) def on_session_start(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.ensure_session(kwargs)) def on_session_end(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: (runtime.mark("hermes.session.end", kwargs), runtime.export_atif(runtime.ensure_session(kwargs)))) def on_session_finalize(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.close_session(kwargs)) def on_session_reset(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.close_session(kwargs)) def on_pre_llm_call(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark("hermes.turn.start", kwargs)) def on_post_llm_call(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark("hermes.turn.end", kwargs)) def on_pre_api_request(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is None: return def _record() -> None: state = runtime.ensure_session(kwargs) request_payload = kwargs.get("request") request_body = request_payload.get("body") if isinstance(request_payload, dict) else {} request = runtime.nemo_relay.LLMRequest({}, _jsonable(request_body)) span = runtime.nemo_relay.llm.call( str(kwargs.get("provider") or "llm"), request, handle=state.handle, data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}), metadata=_metadata(kwargs), model_name=str(kwargs.get("model") or ""), ) state.llm_spans[_api_key(kwargs)] = span _safe(_record) def on_post_api_request(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is None: return def _record() -> None: state = runtime.ensure_session(kwargs) span = state.llm_spans.pop(_api_key(kwargs), None) if span is None: runtime.mark("hermes.api.response.unmatched", kwargs) return runtime.nemo_relay.llm.call_end( span, _jsonable(kwargs.get("response") or {}), data=_jsonable({"usage": kwargs.get("usage"), "finish_reason": kwargs.get("finish_reason")}), metadata=_metadata(kwargs), ) _safe(_record) def on_api_request_error(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is None: return def _record() -> None: state = runtime.ensure_session(kwargs) span = state.llm_spans.pop(_api_key(kwargs), None) if span is None: runtime.mark("hermes.api.error", kwargs) return runtime.nemo_relay.llm.call_end( span, {"error": _jsonable(kwargs.get("error") or {})}, data=_jsonable(kwargs), metadata=_metadata(kwargs), ) _safe(_record) def on_pre_tool_call(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is None: return def _record() -> None: state = runtime.ensure_session(kwargs) span = runtime.nemo_relay.tools.call( str(kwargs.get("tool_name") or "tool"), _jsonable(kwargs.get("args") or {}), handle=state.handle, data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}), metadata=_metadata(kwargs), tool_call_id=str(kwargs.get("tool_call_id") or ""), ) state.tool_spans[_tool_key(kwargs)] = span _safe(_record) def on_post_tool_call(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is None: return def _record() -> None: state = runtime.ensure_session(kwargs) span = state.tool_spans.pop(_tool_key(kwargs), None) if span is None: runtime.mark("hermes.tool.response.unmatched", kwargs) return runtime.nemo_relay.tools.call_end( span, _jsonable(kwargs.get("result")), data=_jsonable({"status": kwargs.get("status"), "duration_ms": kwargs.get("duration_ms")}), metadata=_metadata(kwargs), ) _safe(_record) def on_pre_approval_request(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark("hermes.approval.request", kwargs)) def on_post_approval_response(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark("hermes.approval.response", kwargs)) def on_subagent_start(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark_subagent_start(kwargs)) def on_subagent_stop(**kwargs: Any) -> None: runtime = _get_runtime() if runtime is not None: _safe(lambda: runtime.mark_subagent_stop(kwargs)) def _get_runtime() -> Optional[_Runtime]: global _RUNTIME with _LOCK: if _RUNTIME is _INIT_FAILED: return None if isinstance(_RUNTIME, _Runtime): return _RUNTIME try: import nemo_relay as nemo_runtime except Exception as exc: logger.debug("NeMo Relay plugin disabled: import failed: %s", exc) _RUNTIME = _INIT_FAILED return None try: _RUNTIME = _Runtime(nemo_relay=nemo_runtime, settings=_load_settings()) except Exception as exc: logger.debug("NeMo Relay plugin disabled: init failed: %s", exc, exc_info=True) _RUNTIME = _INIT_FAILED return None return _RUNTIME def _load_settings() -> _Settings: return _Settings( plugins_toml_path=_env("HERMES_NEMO_RELAY_PLUGINS_TOML"), atof_enabled=_env_bool("HERMES_NEMO_RELAY_ATOF_ENABLED"), atof_output_directory=_env("HERMES_NEMO_RELAY_ATOF_OUTPUT_DIRECTORY"), atof_filename=_env("HERMES_NEMO_RELAY_ATOF_FILENAME") or "hermes-atof.jsonl", atof_mode=_env("HERMES_NEMO_RELAY_ATOF_MODE") or "append", atif_enabled=_env_bool("HERMES_NEMO_RELAY_ATIF_ENABLED"), atif_output_directory=_env("HERMES_NEMO_RELAY_ATIF_OUTPUT_DIRECTORY"), atif_filename_template=_env("HERMES_NEMO_RELAY_ATIF_FILENAME_TEMPLATE") or "hermes-atif-{session_id}.json", atif_subagent_export_mode=_atif_subagent_export_mode(), atif_agent_name=_env("HERMES_NEMO_RELAY_ATIF_AGENT_NAME") or "Hermes Agent", atif_agent_version=_env("HERMES_NEMO_RELAY_ATIF_AGENT_VERSION") or "unknown", atif_model_name=_env("HERMES_NEMO_RELAY_ATIF_MODEL_NAME") or "unknown", ) def _env(name: str) -> str: return os.environ.get(name, "").strip() def _atif_subagent_export_mode() -> str: mode = _env("HERMES_NEMO_RELAY_ATIF_SUBAGENT_EXPORT_MODE").lower() return "all" if mode == "all" else "embedded" def _env_bool(name: str) -> bool: return _env(name).lower() in {"1", "true", "yes", "on"} def _session_id(kwargs: dict[str, Any]) -> str: return str(kwargs.get("session_id") or kwargs.get("parent_session_id") or "default") def _child_session_id(kwargs: dict[str, Any]) -> str: return str(kwargs.get("child_session_id") or "") def _subagent_child_metadata(kwargs: dict[str, Any], parent_metadata: dict[str, Any]) -> dict[str, Any]: child_session_id = _child_session_id(kwargs) metadata = { "session_id": child_session_id, "trajectory_id": child_session_id, "nemo_relay_scope_role": "subagent", } for target, source in ( ("subagent_id", "child_subagent_id"), ("child_session_id", "child_session_id"), ("child_subagent_id", "child_subagent_id"), ("child_role", "child_role"), ("parent_session_id", "parent_session_id"), ("parent_turn_id", "parent_turn_id"), ("parent_subagent_id", "parent_subagent_id"), ("parent_trajectory_id", "parent_trajectory_id"), ("telemetry_schema_version", "telemetry_schema_version"), ): value = parent_metadata.get(source) if value is not None: metadata[target] = value return metadata def _api_key(kwargs: dict[str, Any]) -> str: return str(kwargs.get("api_request_id") or f"{_session_id(kwargs)}:{kwargs.get('api_call_count') or 'api'}") def _tool_key(kwargs: dict[str, Any]) -> str: return str( kwargs.get("tool_call_id") or f"{_session_id(kwargs)}:{kwargs.get('turn_id') or ''}:{kwargs.get('tool_name') or 'tool'}" ) def _metadata(kwargs: dict[str, Any]) -> dict[str, Any]: keys = ( "telemetry_schema_version", "session_id", "platform", "task_id", "turn_id", "api_request_id", "tool_call_id", "parent_session_id", "parent_turn_id", "parent_subagent_id", "child_session_id", "child_subagent_id", "child_role", "child_status", "provider", "model", "api_mode", "status", "reason", ) metadata = { key: _jsonable(kwargs[key]) for key in keys if key in kwargs and kwargs[key] is not None } if "session_id" in metadata: metadata.setdefault("trajectory_id", metadata["session_id"]) if "parent_session_id" in metadata: metadata.setdefault("parent_trajectory_id", metadata["parent_session_id"]) if "child_session_id" in metadata: metadata.setdefault("child_trajectory_id", metadata["child_session_id"]) return metadata def _jsonable(value: Any) -> Any: if value is None or isinstance(value, (str, int, float, bool)): return value if isinstance(value, dict): return {str(k): _jsonable(v) for k, v in value.items()} if isinstance(value, (list, tuple, set)): return [_jsonable(v) for v in value] try: if hasattr(value, "model_dump"): return _jsonable(value.model_dump(mode="json")) except Exception: pass try: return json.loads(json.dumps(value, default=str)) except Exception: return str(value) def _safe(fn) -> None: try: fn() except Exception as exc: logger.debug("NeMo Relay hook handling failed: %s", exc, exc_info=True) def reset_for_tests() -> None: global _RUNTIME with _LOCK: _RUNTIME = None