fix(browser): recover from CDP DOM-node serialization crash in browser_console (#35385)

browser_console(expression="document.body") returned the cryptic CDP error
"Object reference chain is too long" instead of a usable result.

With returnByValue=true, Chrome deep-serializes the eval result; for a live
DOM Node/NodeList/Window that serialization overruns CDP's recursion guard
and fails the whole call with a protocol-level error (not a JS exception),
which _browser_eval surfaced raw.

- browser_supervisor.evaluate_runtime: on that specific error, retry once
  with returnByValue=false so Chrome returns the node's description string —
  the same graceful path already used for document.querySelector() results.
- browser_tool._browser_eval (CLI subprocess fallback): the subprocess can't
  retry, so convert the reference-chain error into actionable guidance
  (extract a primitive / use JSON.stringify) instead of leaking it raw.

No expression rewriting — normal evals (1+41 -> 42) are untouched.
This commit is contained in:
Teknium
2026-05-30 07:31:25 -07:00
committed by GitHub
parent 42bbd221e8
commit 92ad7cc62c
3 changed files with 156 additions and 8 deletions

View File

@ -189,6 +189,32 @@ class TestBrowserEvalSupervisorPath:
json.loads(bt._browser_eval("1+1"))
assert called["subprocess"] is True
def test_subprocess_reference_chain_error_becomes_guidance(self, monkeypatch):
"""The CLI subprocess can't retry with returnByValue=False, so the
cryptic 'Object reference chain is too long' CDP error must be turned
into actionable guidance instead of surfaced raw."""
import tools.browser_tool as bt
# No supervisor → subprocess path runs.
_patch_supervisor(monkeypatch, None)
def _fake_subprocess(task_id, cmd, args):
assert cmd == "eval"
return {
"success": False,
"error": "Runtime.evaluate failed: Object reference chain is too long",
}
monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
out = json.loads(bt._browser_eval("document.body"))
assert out["success"] is False
# Raw protocol error must NOT leak through.
assert "reference chain" not in out["error"].lower()
# Actionable guidance instead.
assert "primitive" in out["error"].lower()
assert "DOM node" in out["error"] or "dom node" in out["error"].lower()
# ---------------------------------------------------------------------------
# Response shaping: CDPSupervisor.evaluate_runtime
@ -361,3 +387,91 @@ class TestEvaluateRuntimeResponseShaping:
finally:
loop.call_soon_threadsafe(loop.stop)
thread.join(timeout=2)
def _make_supervisor_with_cdp_fn(cdp_fn):
"""Like ``_make_supervisor_with_cdp`` but lets the test supply a coroutine
function as ``_cdp`` so behaviour can vary by params (e.g. returnByValue).
"""
import asyncio
import threading
from tools.browser_supervisor import CDPSupervisor
sup = object.__new__(CDPSupervisor)
sup._state_lock = threading.Lock()
sup._active = True
sup._page_session_id = "test-session-id"
loop = asyncio.new_event_loop()
def _runner():
asyncio.set_event_loop(loop)
loop.run_forever()
thread = threading.Thread(target=_runner, daemon=True)
thread.start()
sup._cdp = cdp_fn # type: ignore[method-assign]
sup._loop = loop
sup._thread = thread
return sup
class TestEvaluateRuntimeDomNodeCrashRetry:
"""returnByValue=True on a DOM node fails CDP serialization with 'Object
reference chain is too long'. evaluate_runtime must retry with
returnByValue=False and return the node's description instead of crashing.
"""
def test_reference_chain_crash_retries_without_by_value(self):
calls = []
async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
by_value = (params or {}).get("returnByValue")
calls.append(by_value)
if by_value:
# Mirror _read_loop turning a top-level CDP error into a RuntimeError.
raise RuntimeError(
"CDP error on id=7: {'code': -32000, "
"'message': 'Object reference chain is too long'}"
)
# returnByValue=False: Chrome returns the node's description, no value.
return {
"id": 8,
"result": {
"result": {
"type": "object",
"subtype": "node",
"description": "body",
}
},
}
sup = _make_supervisor_with_cdp_fn(_fake_cdp)
try:
out = sup.evaluate_runtime("document.body")
assert out["ok"] is True
assert out["result"] == "body"
assert out["result_type"] == "object"
# First call by_value=True (crashed), retried with by_value=False.
assert calls == [True, False]
finally:
_stop_supervisor(sup)
def test_unrelated_error_does_not_retry(self):
calls = []
async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
calls.append((params or {}).get("returnByValue"))
raise RuntimeError("CDP error on id=3: {'message': 'Target closed'}")
sup = _make_supervisor_with_cdp_fn(_fake_cdp)
try:
out = sup.evaluate_runtime("document.body")
assert out["ok"] is False
assert "Target closed" in out["error"]
# No retry for unrelated failures — exactly one call.
assert calls == [True]
finally:
_stop_supervisor(sup)

View File

@ -496,12 +496,12 @@ class CDPSupervisor:
if not session_id:
return {"ok": False, "error": "supervisor has no attached page session"}
async def _do_eval() -> Dict[str, Any]:
async def _do_eval(by_value: bool) -> Dict[str, Any]:
return await self._cdp(
"Runtime.evaluate",
{
"expression": expression,
"returnByValue": return_by_value,
"returnByValue": by_value,
"awaitPromise": await_promise,
# userGesture matters for things like clipboard / fullscreen
# APIs that require a user-activation context.
@ -511,14 +511,32 @@ class CDPSupervisor:
timeout=timeout,
)
try:
from agent.async_utils import safe_schedule_threadsafe
fut = safe_schedule_threadsafe(_do_eval(), loop)
from agent.async_utils import safe_schedule_threadsafe
def _run_eval(by_value: bool) -> Dict[str, Any]:
fut = safe_schedule_threadsafe(_do_eval(by_value), loop)
if fut is None:
return {"ok": False, "error": "Browser supervisor loop unavailable"}
response = fut.result(timeout=timeout + 1)
raise RuntimeError("Browser supervisor loop unavailable")
return fut.result(timeout=timeout + 1)
try:
response = _run_eval(return_by_value)
except Exception as exc:
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
# ``returnByValue=True`` asks Chrome to deep-serialize the result.
# For live DOM nodes / NodeLists / Window that serialization can
# blow past CDP's recursion guard and fail the whole call with
# ``Object reference chain is too long`` (a protocol-level error,
# not a JS exception). Retry once with ``returnByValue=False`` so
# Chrome returns the object's description string instead — the same
# graceful degradation path used for ``document.querySelector(...)``
# results — rather than crashing the eval.
if return_by_value and "reference chain is too long" in str(exc).lower():
try:
response = _run_eval(False)
except Exception as exc2:
return {"ok": False, "error": f"{type(exc2).__name__}: {exc2}"}
else:
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
# Runtime.evaluate response shape:
# {"id": N, "result": {"result": {"type": "...", "value": ..., ...},

View File

@ -2874,6 +2874,22 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
"error": f"JavaScript evaluation is not supported by this browser backend. {err}",
}
return json.dumps(_copy_fallback_warning(response, result))
# A live DOM node / NodeList / Window can't be JSON-serialized by CDP
# and fails the eval with "Object reference chain is too long". The
# supervisor fast path retries with returnByValue=false, but the CLI
# subprocess can't, so turn the cryptic protocol error into actionable
# guidance instead of surfacing it raw.
if "reference chain is too long" in err.lower():
response = {
"success": False,
"error": (
"Expression returned a live DOM node / NodeList / Window, "
"which can't be serialized. Extract a primitive value "
"(e.g. .innerText, .href, .src, .value) or use "
"JSON.stringify() / a snapshot tool instead."
),
}
return json.dumps(_copy_fallback_warning(response, result))
response = {
"success": False,
"error": err,