Files
hermes-agent/plugins/observability/nemo_relay/__init__.py
Bryan Bednarski 0d9b7132ff feat(observability): observer-grade telemetry hooks + NeMo-Relay plugin
Adds backend-neutral observer hooks for plugins: session, turn, API
request, tool, approval, and subagent lifecycle events with stable
correlation IDs (session_id, task_id, turn_id, api_request_id,
tool_call_id, parent/child subagent ids). Extends VALID_HOOKS with
api_request_error and subagent_start.

Hot path is zero-cost when no plugin subscribes: has_hook()/presence
checks gate all payload construction, request payloads are returned
by reference when no middleware rewrites, and the sanitized response
payload no longer embeds raw response objects.

Bundles the optional NeMo-Relay observability plugin
(plugins/observability/nemo_relay) as an in-repo consumer of the new
hooks, peer to the existing langfuse plugin. Fails open when the
optional nemo-relay package is not installed.

Authored-by: Bryan Bednarski <bbednarski@nvidia.com>
Salvaged from #29722 onto current main.
2026-06-03 06:36:46 -07:00

569 lines
20 KiB
Python

"""nemo_relay — optional Hermes plugin for NeMo Relay observability."""
from __future__ import annotations
import asyncio
import inspect
import json
import logging
import os
import threading
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger(__name__)
_INIT_FAILED = object()
_LOCK = threading.RLock()
_RUNTIME: "_Runtime | object | None" = None
@dataclass
class _SessionState:
session_id: str
handle: Any = None
atif_exporter: Any = None
atif_subscriber_name: str = ""
is_embedded_subagent: bool = False
parent_session_id: str = ""
llm_spans: dict[str, Any] = field(default_factory=dict)
tool_spans: dict[str, Any] = field(default_factory=dict)
@dataclass
class _SubagentParent:
parent_session_id: str
parent_handle: Any
metadata: dict[str, Any]
@dataclass
class _Settings:
plugins_toml_path: str = ""
atof_enabled: bool = False
atof_output_directory: str = ""
atof_filename: str = "hermes-atof.jsonl"
atof_mode: str = "append"
atif_enabled: bool = False
atif_output_directory: str = ""
atif_filename_template: str = "hermes-atif-{session_id}.json"
atif_subagent_export_mode: str = "embedded"
atif_agent_name: str = "Hermes Agent"
atif_agent_version: str = "unknown"
atif_model_name: str = "unknown"
class _Runtime:
def __init__(self, nemo_relay: Any, settings: _Settings) -> None:
self.nemo_relay = nemo_relay
self.settings = settings
self.sessions: dict[str, _SessionState] = {}
self.subagent_parents: dict[str, _SubagentParent] = {}
self.atof_exporter: Any = None
self._plugin_config_initialized = self._configure_plugins_toml()
if not self._plugin_config_initialized:
self._configure_atof()
def _configure_plugins_toml(self) -> bool:
if not self.settings.plugins_toml_path:
return False
plugin_mod = getattr(self.nemo_relay, "plugin", None)
initialize = getattr(plugin_mod, "initialize", None)
if not callable(initialize):
return False
config_path = Path(self.settings.plugins_toml_path)
try:
config = tomllib.loads(config_path.read_text(encoding="utf-8"))
self._ensure_plugin_config_output_dirs(config)
result = initialize(config)
if inspect.isawaitable(result):
asyncio.run(result)
return True
except RuntimeError:
logger.debug("NeMo Relay plugins.toml init skipped inside a running event loop")
return False
except Exception as exc:
logger.debug("NeMo Relay plugins.toml init failed: %s", exc, exc_info=True)
return False
def _ensure_plugin_config_output_dirs(self, config: dict[str, Any]) -> None:
for component in config.get("components", []):
if not isinstance(component, dict):
continue
if component.get("kind") != "observability":
continue
if component.get("enabled") is False:
continue
component_config = component.get("config")
if not isinstance(component_config, dict):
continue
for exporter_name in ("atof", "atif"):
exporter_config = component_config.get(exporter_name)
if not isinstance(exporter_config, dict):
continue
output_directory = exporter_config.get("output_directory")
if isinstance(output_directory, str) and output_directory.strip():
Path(output_directory).mkdir(parents=True, exist_ok=True)
def _configure_atof(self) -> None:
if not self.settings.atof_enabled:
return
config = self.nemo_relay.AtofExporterConfig()
if self.settings.atof_output_directory:
Path(self.settings.atof_output_directory).mkdir(parents=True, exist_ok=True)
config.output_directory = self.settings.atof_output_directory
config.filename = self.settings.atof_filename
if self.settings.atof_mode.lower() == "overwrite":
config.mode = self.nemo_relay.AtofExporterMode.Overwrite
else:
config.mode = self.nemo_relay.AtofExporterMode.Append
self.atof_exporter = self.nemo_relay.AtofExporter(config)
self.atof_exporter.register("hermes.nemo_relay.atof")
def ensure_session(self, kwargs: dict[str, Any]) -> _SessionState:
session_id = _session_id(kwargs)
state = self.sessions.get(session_id)
if state is not None:
return state
state = _SessionState(session_id=session_id)
if self.settings.atif_enabled:
state.atif_exporter = self.nemo_relay.AtifExporter(
session_id,
self.settings.atif_agent_name,
self.settings.atif_agent_version,
model_name=str(kwargs.get("model") or self.settings.atif_model_name),
extra={"source": "hermes-agent", "plugin": "observability/nemo_relay"},
)
state.atif_subscriber_name = f"hermes.nemo_relay.atif.{session_id}"
state.atif_exporter.register(state.atif_subscriber_name)
subagent_parent = self.subagent_parents.get(session_id)
metadata = _metadata(kwargs)
parent_handle = None
if subagent_parent is not None:
parent_handle = subagent_parent.parent_handle
metadata = {**metadata, **subagent_parent.metadata}
state.is_embedded_subagent = True
state.parent_session_id = subagent_parent.parent_session_id
state.handle = self.nemo_relay.scope.push(
f"hermes-session-{session_id}",
self.nemo_relay.ScopeType.Agent,
handle=parent_handle,
data={"session_id": session_id},
metadata=metadata,
)
self.sessions[session_id] = state
return state
def export_atif(self, state: _SessionState) -> None:
if not self.settings.atif_enabled or state.atif_exporter is None:
return
if state.is_embedded_subagent and self.settings.atif_subagent_export_mode != "all":
return
output_dir = self.settings.atif_output_directory
if not output_dir:
return
Path(output_dir).mkdir(parents=True, exist_ok=True)
filename = self.settings.atif_filename_template.format(session_id=state.session_id)
Path(output_dir, filename).write_text(state.atif_exporter.export_json(), encoding="utf-8")
def close_session(self, kwargs: dict[str, Any]) -> None:
session_id = _session_id(kwargs)
self.subagent_parents.pop(session_id, None)
state = self.sessions.pop(session_id, None)
if state is None:
return
if state.handle is not None:
try:
self.nemo_relay.scope.pop(state.handle, output=_jsonable(kwargs))
except Exception:
logger.debug("NeMo Relay session pop failed", exc_info=True)
self.export_atif(state)
if state.atif_exporter is not None and state.atif_subscriber_name:
try:
state.atif_exporter.deregister(state.atif_subscriber_name)
except Exception:
logger.debug("NeMo Relay ATIF deregister failed", exc_info=True)
def mark(self, name: str, kwargs: dict[str, Any]) -> None:
state = self.ensure_session(kwargs)
self.nemo_relay.scope.event(
name,
handle=state.handle,
data=_jsonable(kwargs),
metadata=_metadata(kwargs),
)
def mark_subagent_start(self, kwargs: dict[str, Any]) -> None:
parent_state = self.ensure_session(kwargs)
metadata = _metadata(kwargs)
child_session_id = _child_session_id(kwargs)
if child_session_id:
self.subagent_parents[child_session_id] = _SubagentParent(
parent_session_id=parent_state.session_id,
parent_handle=parent_state.handle,
metadata=_subagent_child_metadata(kwargs, metadata),
)
self.nemo_relay.scope.event(
"hermes.subagent.start",
handle=parent_state.handle,
data=_jsonable(kwargs),
metadata=metadata,
)
def mark_subagent_stop(self, kwargs: dict[str, Any]) -> None:
child_session_id = _child_session_id(kwargs)
if child_session_id:
self.subagent_parents.pop(child_session_id, None)
self.mark("hermes.subagent.stop", kwargs)
def register(ctx) -> None:
ctx.register_hook("on_session_start", on_session_start)
ctx.register_hook("on_session_end", on_session_end)
ctx.register_hook("on_session_finalize", on_session_finalize)
ctx.register_hook("on_session_reset", on_session_reset)
ctx.register_hook("pre_llm_call", on_pre_llm_call)
ctx.register_hook("post_llm_call", on_post_llm_call)
ctx.register_hook("pre_api_request", on_pre_api_request)
ctx.register_hook("post_api_request", on_post_api_request)
ctx.register_hook("api_request_error", on_api_request_error)
ctx.register_hook("pre_tool_call", on_pre_tool_call)
ctx.register_hook("post_tool_call", on_post_tool_call)
ctx.register_hook("pre_approval_request", on_pre_approval_request)
ctx.register_hook("post_approval_response", on_post_approval_response)
ctx.register_hook("subagent_start", on_subagent_start)
ctx.register_hook("subagent_stop", on_subagent_stop)
def on_session_start(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.ensure_session(kwargs))
def on_session_end(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: (runtime.mark("hermes.session.end", kwargs), runtime.export_atif(runtime.ensure_session(kwargs))))
def on_session_finalize(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.close_session(kwargs))
def on_session_reset(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.close_session(kwargs))
def on_pre_llm_call(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark("hermes.turn.start", kwargs))
def on_post_llm_call(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark("hermes.turn.end", kwargs))
def on_pre_api_request(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is None:
return
def _record() -> None:
state = runtime.ensure_session(kwargs)
request_payload = kwargs.get("request")
request_body = request_payload.get("body") if isinstance(request_payload, dict) else {}
request = runtime.nemo_relay.LLMRequest({}, _jsonable(request_body))
span = runtime.nemo_relay.llm.call(
str(kwargs.get("provider") or "llm"),
request,
handle=state.handle,
data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}),
metadata=_metadata(kwargs),
model_name=str(kwargs.get("model") or ""),
)
state.llm_spans[_api_key(kwargs)] = span
_safe(_record)
def on_post_api_request(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is None:
return
def _record() -> None:
state = runtime.ensure_session(kwargs)
span = state.llm_spans.pop(_api_key(kwargs), None)
if span is None:
runtime.mark("hermes.api.response.unmatched", kwargs)
return
runtime.nemo_relay.llm.call_end(
span,
_jsonable(kwargs.get("response") or {}),
data=_jsonable({"usage": kwargs.get("usage"), "finish_reason": kwargs.get("finish_reason")}),
metadata=_metadata(kwargs),
)
_safe(_record)
def on_api_request_error(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is None:
return
def _record() -> None:
state = runtime.ensure_session(kwargs)
span = state.llm_spans.pop(_api_key(kwargs), None)
if span is None:
runtime.mark("hermes.api.error", kwargs)
return
runtime.nemo_relay.llm.call_end(
span,
{"error": _jsonable(kwargs.get("error") or {})},
data=_jsonable(kwargs),
metadata=_metadata(kwargs),
)
_safe(_record)
def on_pre_tool_call(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is None:
return
def _record() -> None:
state = runtime.ensure_session(kwargs)
span = runtime.nemo_relay.tools.call(
str(kwargs.get("tool_name") or "tool"),
_jsonable(kwargs.get("args") or {}),
handle=state.handle,
data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}),
metadata=_metadata(kwargs),
tool_call_id=str(kwargs.get("tool_call_id") or ""),
)
state.tool_spans[_tool_key(kwargs)] = span
_safe(_record)
def on_post_tool_call(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is None:
return
def _record() -> None:
state = runtime.ensure_session(kwargs)
span = state.tool_spans.pop(_tool_key(kwargs), None)
if span is None:
runtime.mark("hermes.tool.response.unmatched", kwargs)
return
runtime.nemo_relay.tools.call_end(
span,
_jsonable(kwargs.get("result")),
data=_jsonable({"status": kwargs.get("status"), "duration_ms": kwargs.get("duration_ms")}),
metadata=_metadata(kwargs),
)
_safe(_record)
def on_pre_approval_request(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark("hermes.approval.request", kwargs))
def on_post_approval_response(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark("hermes.approval.response", kwargs))
def on_subagent_start(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark_subagent_start(kwargs))
def on_subagent_stop(**kwargs: Any) -> None:
runtime = _get_runtime()
if runtime is not None:
_safe(lambda: runtime.mark_subagent_stop(kwargs))
def _get_runtime() -> Optional[_Runtime]:
global _RUNTIME
with _LOCK:
if _RUNTIME is _INIT_FAILED:
return None
if isinstance(_RUNTIME, _Runtime):
return _RUNTIME
try:
import nemo_relay as nemo_runtime
except Exception as exc:
logger.debug("NeMo Relay plugin disabled: import failed: %s", exc)
_RUNTIME = _INIT_FAILED
return None
try:
_RUNTIME = _Runtime(nemo_relay=nemo_runtime, settings=_load_settings())
except Exception as exc:
logger.debug("NeMo Relay plugin disabled: init failed: %s", exc, exc_info=True)
_RUNTIME = _INIT_FAILED
return None
return _RUNTIME
def _load_settings() -> _Settings:
return _Settings(
plugins_toml_path=_env("HERMES_NEMO_RELAY_PLUGINS_TOML"),
atof_enabled=_env_bool("HERMES_NEMO_RELAY_ATOF_ENABLED"),
atof_output_directory=_env("HERMES_NEMO_RELAY_ATOF_OUTPUT_DIRECTORY"),
atof_filename=_env("HERMES_NEMO_RELAY_ATOF_FILENAME") or "hermes-atof.jsonl",
atof_mode=_env("HERMES_NEMO_RELAY_ATOF_MODE") or "append",
atif_enabled=_env_bool("HERMES_NEMO_RELAY_ATIF_ENABLED"),
atif_output_directory=_env("HERMES_NEMO_RELAY_ATIF_OUTPUT_DIRECTORY"),
atif_filename_template=_env("HERMES_NEMO_RELAY_ATIF_FILENAME_TEMPLATE") or "hermes-atif-{session_id}.json",
atif_subagent_export_mode=_atif_subagent_export_mode(),
atif_agent_name=_env("HERMES_NEMO_RELAY_ATIF_AGENT_NAME") or "Hermes Agent",
atif_agent_version=_env("HERMES_NEMO_RELAY_ATIF_AGENT_VERSION") or "unknown",
atif_model_name=_env("HERMES_NEMO_RELAY_ATIF_MODEL_NAME") or "unknown",
)
def _env(name: str) -> str:
return os.environ.get(name, "").strip()
def _atif_subagent_export_mode() -> str:
mode = _env("HERMES_NEMO_RELAY_ATIF_SUBAGENT_EXPORT_MODE").lower()
return "all" if mode == "all" else "embedded"
def _env_bool(name: str) -> bool:
return _env(name).lower() in {"1", "true", "yes", "on"}
def _session_id(kwargs: dict[str, Any]) -> str:
return str(kwargs.get("session_id") or kwargs.get("parent_session_id") or "default")
def _child_session_id(kwargs: dict[str, Any]) -> str:
return str(kwargs.get("child_session_id") or "")
def _subagent_child_metadata(kwargs: dict[str, Any], parent_metadata: dict[str, Any]) -> dict[str, Any]:
child_session_id = _child_session_id(kwargs)
metadata = {
"session_id": child_session_id,
"trajectory_id": child_session_id,
"nemo_relay_scope_role": "subagent",
}
for target, source in (
("subagent_id", "child_subagent_id"),
("child_session_id", "child_session_id"),
("child_subagent_id", "child_subagent_id"),
("child_role", "child_role"),
("parent_session_id", "parent_session_id"),
("parent_turn_id", "parent_turn_id"),
("parent_subagent_id", "parent_subagent_id"),
("parent_trajectory_id", "parent_trajectory_id"),
("telemetry_schema_version", "telemetry_schema_version"),
):
value = parent_metadata.get(source)
if value is not None:
metadata[target] = value
return metadata
def _api_key(kwargs: dict[str, Any]) -> str:
return str(kwargs.get("api_request_id") or f"{_session_id(kwargs)}:{kwargs.get('api_call_count') or 'api'}")
def _tool_key(kwargs: dict[str, Any]) -> str:
return str(
kwargs.get("tool_call_id")
or f"{_session_id(kwargs)}:{kwargs.get('turn_id') or ''}:{kwargs.get('tool_name') or 'tool'}"
)
def _metadata(kwargs: dict[str, Any]) -> dict[str, Any]:
keys = (
"telemetry_schema_version",
"session_id",
"platform",
"task_id",
"turn_id",
"api_request_id",
"tool_call_id",
"parent_session_id",
"parent_turn_id",
"parent_subagent_id",
"child_session_id",
"child_subagent_id",
"child_role",
"child_status",
"provider",
"model",
"api_mode",
"status",
"reason",
)
metadata = {
key: _jsonable(kwargs[key])
for key in keys
if key in kwargs and kwargs[key] is not None
}
if "session_id" in metadata:
metadata.setdefault("trajectory_id", metadata["session_id"])
if "parent_session_id" in metadata:
metadata.setdefault("parent_trajectory_id", metadata["parent_session_id"])
if "child_session_id" in metadata:
metadata.setdefault("child_trajectory_id", metadata["child_session_id"])
return metadata
def _jsonable(value: Any) -> Any:
if value is None or isinstance(value, (str, int, float, bool)):
return value
if isinstance(value, dict):
return {str(k): _jsonable(v) for k, v in value.items()}
if isinstance(value, (list, tuple, set)):
return [_jsonable(v) for v in value]
try:
if hasattr(value, "model_dump"):
return _jsonable(value.model_dump(mode="json"))
except Exception:
pass
try:
return json.loads(json.dumps(value, default=str))
except Exception:
return str(value)
def _safe(fn) -> None:
try:
fn()
except Exception as exc:
logger.debug("NeMo Relay hook handling failed: %s", exc, exc_info=True)
def reset_for_tests() -> None:
global _RUNTIME
with _LOCK:
_RUNTIME = None