diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index df6adbc41..6c6bb5e62 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -133,6 +133,20 @@ def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch): assert "one" not in capped +def test_tui_verbose_default_cap_stays_small(monkeypatch): + # Regression guard for #34095: the verbose tool text shipped to the TUI is + # rendered into a persisted, expanded-by-default trail block for the whole + # session. Raising this cap back toward the old 16KB re-introduces the Ink + # render-tree blowup that silently OOM-killed the TUI. Keep it small. + assert server._TUI_VERBOSE_TEXT_MAX_CHARS <= 2_000 + + huge = "x" * 40_000 + capped = server._cap_tui_verbose_text(huge) + + assert len(capped) < 2_000 + assert capped.startswith("[showing verbose tail; omitted ") + + def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch): redact_module = types.ModuleType("agent.redact") diff --git a/tui_gateway/server.py b/tui_gateway/server.py index a70dd3efe..651c15d6f 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1657,8 +1657,15 @@ def _tool_ctx(name: str, args: dict) -> str: return "" -_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000 -_TUI_VERBOSE_TEXT_MAX_LINES = 240 +# Tool Args/Result text shipped to the TUI for the verbose trail line. The TUI +# renders only a small persisted preview (ui-tui VERBOSE_TRAIL_MAX_CHARS), kept +# all session and expanded by default — so shipping more than that is pure pipe +# waste AND feeds the Ink render-tree blowup that silently OOM-killed the TUI +# parent (#34095). Cap here to match the render budget (a hair more, so the +# "[omitted …]" label is still informative when output is genuinely large). +# Full output stays in the agent context and the SQLite session, untouched. +_TUI_VERBOSE_TEXT_MAX_CHARS = 1_000 +_TUI_VERBOSE_TEXT_MAX_LINES = 16 def _cap_tui_verbose_text(text: str) -> str: diff --git a/ui-tui/src/__tests__/memoryMonitor.test.ts b/ui-tui/src/__tests__/memoryMonitor.test.ts new file mode 100644 index 000000000..f79d7aa9d --- /dev/null +++ b/ui-tui/src/__tests__/memoryMonitor.test.ts @@ -0,0 +1,102 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +// memory.js performs real heap dumps / fs work — stub it so the monitor's +// dump path is a no-op in tests. +vi.mock('../lib/memory.js', () => ({ + performHeapDump: vi.fn(async () => null) +})) + +// @hermes/ink is dynamically imported only on the dump path; stub the eviction. +vi.mock('@hermes/ink', () => ({ evictInkCaches: vi.fn() })) + +import { startMemoryMonitor } from '../lib/memoryMonitor.js' + +const GB = 1024 ** 3 +const MB = 1024 ** 2 + +describe('startMemoryMonitor thresholds (#34095)', () => { + let stop: (() => void) | undefined + + beforeEach(() => { + vi.useFakeTimers() + }) + + afterEach(() => { + stop?.() + stop = undefined + vi.restoreAllMocks() + vi.useRealTimers() + }) + + const withHeap = (heapUsed: number, rss = heapUsed) => + vi.spyOn(process, 'memoryUsage').mockReturnValue({ + arrayBuffers: 0, + external: 0, + heapTotal: heapUsed, + heapUsed, + rss + } as NodeJS.MemoryUsage) + + it('does NOT fire onCritical at 2.5GB when the heap ceiling is 8GB', async () => { + // The old hardcoded 2.5GB constant killed the process at ~31% of the real + // ceiling. With relative thresholds (~88%), 2.5GB is well within normal. + const onCritical = vi.fn() + withHeap(2.5 * GB) + stop = startMemoryMonitor({ intervalMs: 1, onCritical }) + + await vi.advanceTimersByTimeAsync(5) + + expect(onCritical).not.toHaveBeenCalled() + }) + + it('fires onCritical only near the configured ceiling', async () => { + const onCritical = vi.fn() + // Explicit small ceiling-derived thresholds via override to keep the test + // independent of the host V8 heap_size_limit. + withHeap(7.5 * GB) + stop = startMemoryMonitor({ criticalBytes: 7 * GB, highBytes: 5 * GB, intervalMs: 1, onCritical }) + + await vi.advanceTimersByTimeAsync(5) + + expect(onCritical).toHaveBeenCalledTimes(1) + }) + + it('fires onWarn once on fast sub-threshold heap growth, then re-arms', async () => { + const onWarn = vi.fn() + // Start low, then jump >150MB across a tick while above the 600MB floor and + // below `high` — the silent-death regime. + const spy = withHeap(100 * MB) + stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB }) + + await vi.advanceTimersByTimeAsync(2) // seed lastHeap at 100MB, below floor + expect(onWarn).not.toHaveBeenCalled() + + spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage) + await vi.advanceTimersByTimeAsync(2) // jumped 700MB → above floor + steep + expect(onWarn).toHaveBeenCalledTimes(1) + + // Stays elevated but not re-firing. + await vi.advanceTimersByTimeAsync(2) + expect(onWarn).toHaveBeenCalledTimes(1) + + // Falls back below the floor → re-armed, then climbs again → fires again. + spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 100 * MB, heapUsed: 100 * MB, rss: 100 * MB } as NodeJS.MemoryUsage) + await vi.advanceTimersByTimeAsync(2) + spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage) + await vi.advanceTimersByTimeAsync(2) + expect(onWarn).toHaveBeenCalledTimes(2) + }) + + it('does not warn on slow growth below the steep-growth step', async () => { + const onWarn = vi.fn() + const spy = withHeap(650 * MB) + stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB }) + + await vi.advanceTimersByTimeAsync(2) + // +50MB per tick — above the floor but gentle, not a render-tree blowup. + spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 700 * MB, heapUsed: 700 * MB, rss: 700 * MB } as NodeJS.MemoryUsage) + await vi.advanceTimersByTimeAsync(2) + + expect(onWarn).not.toHaveBeenCalled() + }) +}) diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts index 6fd250b5b..2202b2199 100644 --- a/ui-tui/src/__tests__/text.test.ts +++ b/ui-tui/src/__tests__/text.test.ts @@ -69,6 +69,29 @@ describe('buildVerboseToolTrailLine', () => { mark: '✗' }) }) + + it('caps a large result to a small persisted preview (#34095)', () => { + // A 40KB browser-snapshot-sized result must NOT be embedded whole — the + // persisted, expanded-by-default trail block is what blew up the Ink + // render tree and silently OOM-killed the TUI. The block stays small. + const huge = 'A'.repeat(40_000) + const line = buildVerboseToolTrailLine('browser_snapshot', 'https://x.example', false, 2, undefined, huge) + + expect(line).toContain('Result:\n') + // Far below the old 16KB live-render budget; the whole line (call + label + + // omitted marker + preview) must stay on the order of ~1KB, not ~40KB. + expect(line.length).toBeLessThan(2_000) + expect(line).toContain('omitted') + expect(line.endsWith(' ✓')).toBe(true) + }) + + it('does not truncate a result that already fits the preview budget', () => { + const small = 'ok: 3 files changed' + const line = buildVerboseToolTrailLine('patch', 'index.html', false, 0.1, undefined, small) + + expect(line).toContain(`Result:\n${small}`) + expect(line).not.toContain('omitted') + }) }) describe('lastCotTrailIndex', () => { diff --git a/ui-tui/src/config/limits.ts b/ui-tui/src/config/limits.ts index 31b062b9c..e66e37040 100644 --- a/ui-tui/src/config/limits.ts +++ b/ui-tui/src/config/limits.ts @@ -3,6 +3,19 @@ export const LARGE_PASTE = { lines: 5 } export const LIVE_RENDER_MAX_CHARS = 16_000 export const LIVE_RENDER_MAX_LINES = 240 +// Persisted verbose tool-trail blocks (Args/Result embedded in a completed +// tool line) are kept for the WHOLE session in transcript Msg.tools[] and +// rendered expanded by default, so a render-node tree is built for every one +// of up to MAX_HISTORY messages at once. Capping these to the live-render +// budget (16KB) let a heavy browser/large-output session retain ~12MB of +// strings that exploded into a few hundred MB of Ink nodes and silently OOM- +// killed the Node parent (→ stdin EOF, gateway death; issue #34095). The live +// streaming tail still uses the larger LIVE_RENDER budget — only the persisted +// per-call block shrinks to a readable preview here. Full output remains in the +// agent context and the SQLite session; the trail is a glance, not a log. +export const VERBOSE_TRAIL_MAX_CHARS = 800 +export const VERBOSE_TRAIL_MAX_LINES = 12 + export const LONG_MSG = 300 export const MAX_HISTORY = 800 export const THINKING_COT_MAX = 160 diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx index aa6cd9b9f..d1b7b9b49 100644 --- a/ui-tui/src/entry.tsx +++ b/ui-tui/src/entry.tsx @@ -75,7 +75,17 @@ const stopMemoryMonitor = startMemoryMonitor({ process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n') process.exit(137) }, - onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump)) + onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump)), + // Sub-threshold abnormal heap growth (#34095). The TUI used to die silently + // here — Node OOMs from a render-tree blowup well below the exit threshold, + // so the only trace was a bare gateway `stdin EOF`. Persist a breadcrumb + + // stderr line so the next such death is attributable instead of silent. + onWarn: snap => { + recordParentLifecycle(`memory-warning fast heap growth heap=${formatBytes(snap.heapUsed)} rss=${formatBytes(snap.rss)}`) + process.stderr.write( + `hermes-tui: heap climbing fast (${formatBytes(snap.heapUsed)}) — a large tool output or long session may be straining memory\n` + ) + } }) if (process.env.HERMES_HEAPDUMP_ON_START === '1') { diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index eaf11574a..512421f94 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -1,3 +1,5 @@ +import { getHeapStatistics } from 'node:v8' + import { type HeapDumpResult, performHeapDump } from './memory.js' export type MemoryLevel = 'critical' | 'high' | 'normal' @@ -14,9 +16,41 @@ export interface MemoryMonitorOptions { intervalMs?: number onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void + // Fired ONCE when heap growth looks abnormal while still far below the + // critical exit threshold — the regime where the TUI used to die silently + // (#34095: Node OOMs from an Ink render-tree blowup at a few hundred MB, + // well under criticalBytes, so onCritical never fired and the gateway death + // showed up only as a bare `stdin EOF`). A visible warning here makes that + // class of death diagnosable instead of silent. + onWarn?: (snap: MemorySnapshot) => void + warnBytes?: number } const GB = 1024 ** 3 +const MB = 1024 ** 2 + +// Resolve the exit / dump thresholds RELATIVE to the actual V8 heap ceiling +// (--max-old-space-size, 8GB for the TUI) instead of hardcoding 2.5GB. The old +// constant killed the process — and silently closed the gateway's stdin — at +// ~31% of an 8GB ceiling, treating a normal long-session heap as an OOM. We now +// exit only when genuinely near the ceiling (critical ~88%, high ~70%), and +// clamp to sane floors/ceilings so a tiny --max-old-space-size can't drive the +// thresholds below the warn watermark. Callers may still override explicitly. +function resolveThresholds(criticalBytes?: number, highBytes?: number) { + let limit = 0 + try { + limit = getHeapStatistics().heap_size_limit || 0 + } catch { + limit = 0 + } + + // Fall back to the historical 8GB ceiling if V8 doesn't report one. + const ceiling = limit > 0 ? limit : 8 * GB + const critical = criticalBytes ?? Math.max(2 * GB, Math.round(ceiling * 0.88)) + const high = highBytes ?? Math.max(1 * GB, Math.min(critical - 256 * MB, Math.round(ceiling * 0.7))) + + return { critical, high } +} // Deferred @hermes/ink import: loading `@hermes/ink` at module top-level // pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto @@ -53,18 +87,46 @@ async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unkno } export function startMemoryMonitor({ - criticalBytes = 2.5 * GB, - highBytes = 1.5 * GB, + criticalBytes, + highBytes, intervalMs = 10_000, onCritical, - onHigh + onHigh, + onWarn, + warnBytes = 600 * MB }: MemoryMonitorOptions = {}): () => void { + const { critical, high } = resolveThresholds(criticalBytes, highBytes) const dumped = new Set>() const inFlight = new Set>() + // Early-warning state (#34095): the silent-death regime is BELOW `high`, so + // the level machine above never sees it. Track the previous sample and fire + // onWarn at most once when heap both crosses a modest absolute floor AND is + // climbing steeply (≥150MB between 10s ticks) — the signature of a render- + // tree blowup — so the user gets a visible heads-up before Node OOMs under + // the exit threshold. Re-armed only after heap falls back below the floor. + // `lastHeap < 0` marks the un-seeded first sample so a cold start that opens + // already-high can't be mistaken for sudden growth (growth = current - last). + let lastHeap = -1 + let warned = false + const WARN_GROWTH_STEP = 150 * MB + const tick = async () => { const { heapUsed, rss } = process.memoryUsage() - const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' + + // Sub-threshold abnormal-growth warning. Skip on the first (un-seeded) + // sample — we need a prior reading to measure a delta against. + if (heapUsed < high && lastHeap >= 0) { + if (!warned && heapUsed >= warnBytes && heapUsed - lastHeap >= WARN_GROWTH_STEP) { + warned = true + onWarn?.({ heapUsed, level: 'normal', rss }) + } else if (heapUsed < warnBytes) { + warned = false + } + } + lastHeap = heapUsed + + const level: MemoryLevel = heapUsed >= critical ? 'critical' : heapUsed >= high ? 'high' : 'normal' if (level === 'normal') { dumped.clear() diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts index 2b1ae33c5..feb3547a3 100644 --- a/ui-tui/src/lib/text.ts +++ b/ui-tui/src/lib/text.ts @@ -1,7 +1,9 @@ import { LIVE_RENDER_MAX_CHARS, LIVE_RENDER_MAX_LINES, - THINKING_COT_MAX + THINKING_COT_MAX, + VERBOSE_TRAIL_MAX_CHARS, + VERBOSE_TRAIL_MAX_LINES } from '../config/limits.js' import { VERBS } from '../content/verbs.js' import type { ThinkingMode } from '../types.js' @@ -215,7 +217,16 @@ export const buildToolTrailLine = ( const verboseToolBlock = (label: string, text?: string) => { const body = (text ?? '').trim() - return body ? `${label}:\n${boundedLiveRenderText(body)}` : '' + // Persisted trail blocks are kept all session and rendered expanded by + // default — cap to a small readable preview (NOT the 16KB live-render + // budget) so a large tool output can't balloon the Ink render tree and + // silently OOM-kill the TUI. See VERBOSE_TRAIL_MAX_CHARS (#34095). + return body + ? `${label}:\n${boundedLiveRenderText(body, { + maxChars: VERBOSE_TRAIL_MAX_CHARS, + maxLines: VERBOSE_TRAIL_MAX_LINES + })}` + : '' } export const buildVerboseToolTrailLine = (