fix(browser): recover from CDP DOM-node serialization crash in browser_console (#35385)

browser_console(expression="document.body") returned the cryptic CDP error "Object reference chain is too long" instead of a usable result. With returnByValue=true, Chrome deep-serializes the eval result; for a live DOM Node/NodeList/Window that serialization overruns CDP's recursion guard and fails the whole call with a protocol-level error (not a JS exception), which _browser_eval surfaced raw. - browser_supervisor.evaluate_runtime: on that specific error, retry once with returnByValue=false so Chrome returns the node's description string — the same graceful path already used for document.querySelector() results. - browser_tool._browser_eval (CLI subprocess fallback): the subprocess can't retry, so convert the reference-chain error into actionable guidance (extract a primitive / use JSON.stringify) instead of leaking it raw. No expression rewriting — normal evals (1+41 -> 42) are untouched.
2026-05-30 07:31:25 -07:00
parent 42bbd221e8
commit 92ad7cc62c
3 changed files with 156 additions and 8 deletions
--- a/tests/tools/test_browser_eval_supervisor_path.py
+++ b/tests/tools/test_browser_eval_supervisor_path.py
@ -189,6 +189,32 @@ class TestBrowserEvalSupervisorPath:
        json.loads(bt._browser_eval("1+1"))
        assert called["subprocess"] is True

+    def test_subprocess_reference_chain_error_becomes_guidance(self, monkeypatch):
+        """The CLI subprocess can't retry with returnByValue=False, so the
+        cryptic 'Object reference chain is too long' CDP error must be turned
+        into actionable guidance instead of surfaced raw."""
+        import tools.browser_tool as bt
+
+        # No supervisor → subprocess path runs.
+        _patch_supervisor(monkeypatch, None)
+
+        def _fake_subprocess(task_id, cmd, args):
+            assert cmd == "eval"
+            return {
+                "success": False,
+                "error": "Runtime.evaluate failed: Object reference chain is too long",
+            }
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+
+        out = json.loads(bt._browser_eval("document.body"))
+        assert out["success"] is False
+        # Raw protocol error must NOT leak through.
+        assert "reference chain" not in out["error"].lower()
+        # Actionable guidance instead.
+        assert "primitive" in out["error"].lower()
+        assert "DOM node" in out["error"] or "dom node" in out["error"].lower()
+

 # ---------------------------------------------------------------------------
 # Response shaping: CDPSupervisor.evaluate_runtime
@ -361,3 +387,91 @@ class TestEvaluateRuntimeResponseShaping:
        finally:
            loop.call_soon_threadsafe(loop.stop)
            thread.join(timeout=2)
+
+
+def _make_supervisor_with_cdp_fn(cdp_fn):
+    """Like ``_make_supervisor_with_cdp`` but lets the test supply a coroutine
+    function as ``_cdp`` so behaviour can vary by params (e.g. returnByValue).
+    """
+    import asyncio
+    import threading
+
+    from tools.browser_supervisor import CDPSupervisor
+
+    sup = object.__new__(CDPSupervisor)
+    sup._state_lock = threading.Lock()
+    sup._active = True
+    sup._page_session_id = "test-session-id"
+
+    loop = asyncio.new_event_loop()
+
+    def _runner():
+        asyncio.set_event_loop(loop)
+        loop.run_forever()
+
+    thread = threading.Thread(target=_runner, daemon=True)
+    thread.start()
+
+    sup._cdp = cdp_fn  # type: ignore[method-assign]
+    sup._loop = loop
+    sup._thread = thread
+    return sup
+
+
+class TestEvaluateRuntimeDomNodeCrashRetry:
+    """returnByValue=True on a DOM node fails CDP serialization with 'Object
+    reference chain is too long'.  evaluate_runtime must retry with
+    returnByValue=False and return the node's description instead of crashing.
+    """
+
+    def test_reference_chain_crash_retries_without_by_value(self):
+        calls = []
+
+        async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
+            by_value = (params or {}).get("returnByValue")
+            calls.append(by_value)
+            if by_value:
+                # Mirror _read_loop turning a top-level CDP error into a RuntimeError.
+                raise RuntimeError(
+                    "CDP error on id=7: {'code': -32000, "
+                    "'message': 'Object reference chain is too long'}"
+                )
+            # returnByValue=False: Chrome returns the node's description, no value.
+            return {
+                "id": 8,
+                "result": {
+                    "result": {
+                        "type": "object",
+                        "subtype": "node",
+                        "description": "body",
+                    }
+                },
+            }
+
+        sup = _make_supervisor_with_cdp_fn(_fake_cdp)
+        try:
+            out = sup.evaluate_runtime("document.body")
+            assert out["ok"] is True
+            assert out["result"] == "body"
+            assert out["result_type"] == "object"
+            # First call by_value=True (crashed), retried with by_value=False.
+            assert calls == [True, False]
+        finally:
+            _stop_supervisor(sup)
+
+    def test_unrelated_error_does_not_retry(self):
+        calls = []
+
+        async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
+            calls.append((params or {}).get("returnByValue"))
+            raise RuntimeError("CDP error on id=3: {'message': 'Target closed'}")
+
+        sup = _make_supervisor_with_cdp_fn(_fake_cdp)
+        try:
+            out = sup.evaluate_runtime("document.body")
+            assert out["ok"] is False
+            assert "Target closed" in out["error"]
+            # No retry for unrelated failures — exactly one call.
+            assert calls == [True]
+        finally:
+            _stop_supervisor(sup)
--- a/tools/browser_supervisor.py
+++ b/tools/browser_supervisor.py
@ -496,12 +496,12 @@ class CDPSupervisor:
        if not session_id:
            return {"ok": False, "error": "supervisor has no attached page session"}

-        async def _do_eval() -> Dict[str, Any]:
+        async def _do_eval(by_value: bool) -> Dict[str, Any]:
            return await self._cdp(
                "Runtime.evaluate",
                {
                    "expression": expression,
-                    "returnByValue": return_by_value,
+                    "returnByValue": by_value,
                    "awaitPromise": await_promise,
                    # userGesture matters for things like clipboard / fullscreen
                    # APIs that require a user-activation context.
@ -511,14 +511,32 @@ class CDPSupervisor:
                timeout=timeout,
            )

-        try:
-            from agent.async_utils import safe_schedule_threadsafe
-            fut = safe_schedule_threadsafe(_do_eval(), loop)
+        from agent.async_utils import safe_schedule_threadsafe
+
+        def _run_eval(by_value: bool) -> Dict[str, Any]:
+            fut = safe_schedule_threadsafe(_do_eval(by_value), loop)
            if fut is None:
-                return {"ok": False, "error": "Browser supervisor loop unavailable"}
-            response = fut.result(timeout=timeout + 1)
+                raise RuntimeError("Browser supervisor loop unavailable")
+            return fut.result(timeout=timeout + 1)
+
+        try:
+            response = _run_eval(return_by_value)
        except Exception as exc:
-            return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
+            # ``returnByValue=True`` asks Chrome to deep-serialize the result.
+            # For live DOM nodes / NodeLists / Window that serialization can
+            # blow past CDP's recursion guard and fail the whole call with
+            # ``Object reference chain is too long`` (a protocol-level error,
+            # not a JS exception).  Retry once with ``returnByValue=False`` so
+            # Chrome returns the object's description string instead — the same
+            # graceful degradation path used for ``document.querySelector(...)``
+            # results — rather than crashing the eval.
+            if return_by_value and "reference chain is too long" in str(exc).lower():
+                try:
+                    response = _run_eval(False)
+                except Exception as exc2:
+                    return {"ok": False, "error": f"{type(exc2).__name__}: {exc2}"}
+            else:
+                return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}

        # Runtime.evaluate response shape:
        #   {"id": N, "result": {"result": {"type": "...", "value": ..., ...},
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -2874,6 +2874,22 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
                "error": f"JavaScript evaluation is not supported by this browser backend. {err}",
            }
            return json.dumps(_copy_fallback_warning(response, result))
+        # A live DOM node / NodeList / Window can't be JSON-serialized by CDP
+        # and fails the eval with "Object reference chain is too long".  The
+        # supervisor fast path retries with returnByValue=false, but the CLI
+        # subprocess can't, so turn the cryptic protocol error into actionable
+        # guidance instead of surfacing it raw.
+        if "reference chain is too long" in err.lower():
+            response = {
+                "success": False,
+                "error": (
+                    "Expression returned a live DOM node / NodeList / Window, "
+                    "which can't be serialized. Extract a primitive value "
+                    "(e.g. .innerText, .href, .src, .value) or use "
+                    "JSON.stringify() / a snapshot tool instead."
+                ),
+            }
+            return json.dumps(_copy_fallback_warning(response, result))
        response = {
            "success": False,
            "error": err,