fix(tui): stop persisting full tool output in trail lines (silent OOM death)
A heavy --tui session (browser snapshots, large tool outputs) silently OOM-killed the Node parent within minutes — closing the gateway child's stdin, which the user saw only as a bare "gateway exited" / stdin EOF. CLI was immune. Root cause: each completed tool's verbose trail line embedded up to 16KB of result_text, persisted in transcript Msg.tools[] for the whole session and rendered EXPANDED by default, so an Ink render-node tree was built for every one of up to 800 messages at once. That tree blew past Node's heap at a few hundred MB — far below the 2.5GB memory-monitor exit threshold, so the death was never even attributed. - text.ts: persisted verbose tool-trail blocks now cap to a small preview (VERBOSE_TRAIL_MAX_CHARS=800/12 lines), not the 16KB live-render budget. Retained trail strings drop ~17x (12.2MB -> 0.7MB at 800 msgs); the live streaming tail still uses the larger LIVE_RENDER budget. - tui_gateway/server.py: lower the gateway-side verbose text cap to match (1KB/16 lines) so we stop shipping output the TUI no longer renders. - memoryMonitor.ts: derive critical/high thresholds from the real V8 heap ceiling (~88%/70%) instead of the hardcoded 2.5GB that killed the process at 31% of an 8GB ceiling; add a one-shot onWarn early-warning on fast sub-threshold heap growth so the next such death is diagnosable, not silent. - entry.tsx: wire onWarn to a crash-log breadcrumb + stderr line. Full tool output is unchanged in the agent context and SQLite session — this is display/transport only, no behavior or context change. Fixes #34095. Related #27282. Tests: ui-tui text + new memoryMonitor suites (33 pass), python verbose-cap guard (5 pass); full ui-tui suite shows no new failures vs pristine main. E2E repro confirms the retention drop.
This commit is contained in:
@ -133,6 +133,20 @@ def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch):
|
||||
assert "one" not in capped
|
||||
|
||||
|
||||
def test_tui_verbose_default_cap_stays_small(monkeypatch):
|
||||
# Regression guard for #34095: the verbose tool text shipped to the TUI is
|
||||
# rendered into a persisted, expanded-by-default trail block for the whole
|
||||
# session. Raising this cap back toward the old 16KB re-introduces the Ink
|
||||
# render-tree blowup that silently OOM-killed the TUI. Keep it small.
|
||||
assert server._TUI_VERBOSE_TEXT_MAX_CHARS <= 2_000
|
||||
|
||||
huge = "x" * 40_000
|
||||
capped = server._cap_tui_verbose_text(huge)
|
||||
|
||||
assert len(capped) < 2_000
|
||||
assert capped.startswith("[showing verbose tail; omitted ")
|
||||
|
||||
|
||||
def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch):
|
||||
redact_module = types.ModuleType("agent.redact")
|
||||
|
||||
|
||||
@ -1657,8 +1657,15 @@ def _tool_ctx(name: str, args: dict) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000
|
||||
_TUI_VERBOSE_TEXT_MAX_LINES = 240
|
||||
# Tool Args/Result text shipped to the TUI for the verbose trail line. The TUI
|
||||
# renders only a small persisted preview (ui-tui VERBOSE_TRAIL_MAX_CHARS), kept
|
||||
# all session and expanded by default — so shipping more than that is pure pipe
|
||||
# waste AND feeds the Ink render-tree blowup that silently OOM-killed the TUI
|
||||
# parent (#34095). Cap here to match the render budget (a hair more, so the
|
||||
# "[omitted …]" label is still informative when output is genuinely large).
|
||||
# Full output stays in the agent context and the SQLite session, untouched.
|
||||
_TUI_VERBOSE_TEXT_MAX_CHARS = 1_000
|
||||
_TUI_VERBOSE_TEXT_MAX_LINES = 16
|
||||
|
||||
|
||||
def _cap_tui_verbose_text(text: str) -> str:
|
||||
|
||||
102
ui-tui/src/__tests__/memoryMonitor.test.ts
Normal file
102
ui-tui/src/__tests__/memoryMonitor.test.ts
Normal file
@ -0,0 +1,102 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
// memory.js performs real heap dumps / fs work — stub it so the monitor's
|
||||
// dump path is a no-op in tests.
|
||||
vi.mock('../lib/memory.js', () => ({
|
||||
performHeapDump: vi.fn(async () => null)
|
||||
}))
|
||||
|
||||
// @hermes/ink is dynamically imported only on the dump path; stub the eviction.
|
||||
vi.mock('@hermes/ink', () => ({ evictInkCaches: vi.fn() }))
|
||||
|
||||
import { startMemoryMonitor } from '../lib/memoryMonitor.js'
|
||||
|
||||
const GB = 1024 ** 3
|
||||
const MB = 1024 ** 2
|
||||
|
||||
describe('startMemoryMonitor thresholds (#34095)', () => {
|
||||
let stop: (() => void) | undefined
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
stop?.()
|
||||
stop = undefined
|
||||
vi.restoreAllMocks()
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
const withHeap = (heapUsed: number, rss = heapUsed) =>
|
||||
vi.spyOn(process, 'memoryUsage').mockReturnValue({
|
||||
arrayBuffers: 0,
|
||||
external: 0,
|
||||
heapTotal: heapUsed,
|
||||
heapUsed,
|
||||
rss
|
||||
} as NodeJS.MemoryUsage)
|
||||
|
||||
it('does NOT fire onCritical at 2.5GB when the heap ceiling is 8GB', async () => {
|
||||
// The old hardcoded 2.5GB constant killed the process at ~31% of the real
|
||||
// ceiling. With relative thresholds (~88%), 2.5GB is well within normal.
|
||||
const onCritical = vi.fn()
|
||||
withHeap(2.5 * GB)
|
||||
stop = startMemoryMonitor({ intervalMs: 1, onCritical })
|
||||
|
||||
await vi.advanceTimersByTimeAsync(5)
|
||||
|
||||
expect(onCritical).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('fires onCritical only near the configured ceiling', async () => {
|
||||
const onCritical = vi.fn()
|
||||
// Explicit small ceiling-derived thresholds via override to keep the test
|
||||
// independent of the host V8 heap_size_limit.
|
||||
withHeap(7.5 * GB)
|
||||
stop = startMemoryMonitor({ criticalBytes: 7 * GB, highBytes: 5 * GB, intervalMs: 1, onCritical })
|
||||
|
||||
await vi.advanceTimersByTimeAsync(5)
|
||||
|
||||
expect(onCritical).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('fires onWarn once on fast sub-threshold heap growth, then re-arms', async () => {
|
||||
const onWarn = vi.fn()
|
||||
// Start low, then jump >150MB across a tick while above the 600MB floor and
|
||||
// below `high` — the silent-death regime.
|
||||
const spy = withHeap(100 * MB)
|
||||
stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB })
|
||||
|
||||
await vi.advanceTimersByTimeAsync(2) // seed lastHeap at 100MB, below floor
|
||||
expect(onWarn).not.toHaveBeenCalled()
|
||||
|
||||
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage)
|
||||
await vi.advanceTimersByTimeAsync(2) // jumped 700MB → above floor + steep
|
||||
expect(onWarn).toHaveBeenCalledTimes(1)
|
||||
|
||||
// Stays elevated but not re-firing.
|
||||
await vi.advanceTimersByTimeAsync(2)
|
||||
expect(onWarn).toHaveBeenCalledTimes(1)
|
||||
|
||||
// Falls back below the floor → re-armed, then climbs again → fires again.
|
||||
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 100 * MB, heapUsed: 100 * MB, rss: 100 * MB } as NodeJS.MemoryUsage)
|
||||
await vi.advanceTimersByTimeAsync(2)
|
||||
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage)
|
||||
await vi.advanceTimersByTimeAsync(2)
|
||||
expect(onWarn).toHaveBeenCalledTimes(2)
|
||||
})
|
||||
|
||||
it('does not warn on slow growth below the steep-growth step', async () => {
|
||||
const onWarn = vi.fn()
|
||||
const spy = withHeap(650 * MB)
|
||||
stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB })
|
||||
|
||||
await vi.advanceTimersByTimeAsync(2)
|
||||
// +50MB per tick — above the floor but gentle, not a render-tree blowup.
|
||||
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 700 * MB, heapUsed: 700 * MB, rss: 700 * MB } as NodeJS.MemoryUsage)
|
||||
await vi.advanceTimersByTimeAsync(2)
|
||||
|
||||
expect(onWarn).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
@ -69,6 +69,29 @@ describe('buildVerboseToolTrailLine', () => {
|
||||
mark: '✗'
|
||||
})
|
||||
})
|
||||
|
||||
it('caps a large result to a small persisted preview (#34095)', () => {
|
||||
// A 40KB browser-snapshot-sized result must NOT be embedded whole — the
|
||||
// persisted, expanded-by-default trail block is what blew up the Ink
|
||||
// render tree and silently OOM-killed the TUI. The block stays small.
|
||||
const huge = 'A'.repeat(40_000)
|
||||
const line = buildVerboseToolTrailLine('browser_snapshot', 'https://x.example', false, 2, undefined, huge)
|
||||
|
||||
expect(line).toContain('Result:\n')
|
||||
// Far below the old 16KB live-render budget; the whole line (call + label +
|
||||
// omitted marker + preview) must stay on the order of ~1KB, not ~40KB.
|
||||
expect(line.length).toBeLessThan(2_000)
|
||||
expect(line).toContain('omitted')
|
||||
expect(line.endsWith(' ✓')).toBe(true)
|
||||
})
|
||||
|
||||
it('does not truncate a result that already fits the preview budget', () => {
|
||||
const small = 'ok: 3 files changed'
|
||||
const line = buildVerboseToolTrailLine('patch', 'index.html', false, 0.1, undefined, small)
|
||||
|
||||
expect(line).toContain(`Result:\n${small}`)
|
||||
expect(line).not.toContain('omitted')
|
||||
})
|
||||
})
|
||||
|
||||
describe('lastCotTrailIndex', () => {
|
||||
|
||||
@ -3,6 +3,19 @@ export const LARGE_PASTE = { lines: 5 }
|
||||
export const LIVE_RENDER_MAX_CHARS = 16_000
|
||||
export const LIVE_RENDER_MAX_LINES = 240
|
||||
|
||||
// Persisted verbose tool-trail blocks (Args/Result embedded in a completed
|
||||
// tool line) are kept for the WHOLE session in transcript Msg.tools[] and
|
||||
// rendered expanded by default, so a render-node tree is built for every one
|
||||
// of up to MAX_HISTORY messages at once. Capping these to the live-render
|
||||
// budget (16KB) let a heavy browser/large-output session retain ~12MB of
|
||||
// strings that exploded into a few hundred MB of Ink nodes and silently OOM-
|
||||
// killed the Node parent (→ stdin EOF, gateway death; issue #34095). The live
|
||||
// streaming tail still uses the larger LIVE_RENDER budget — only the persisted
|
||||
// per-call block shrinks to a readable preview here. Full output remains in the
|
||||
// agent context and the SQLite session; the trail is a glance, not a log.
|
||||
export const VERBOSE_TRAIL_MAX_CHARS = 800
|
||||
export const VERBOSE_TRAIL_MAX_LINES = 12
|
||||
|
||||
export const LONG_MSG = 300
|
||||
export const MAX_HISTORY = 800
|
||||
export const THINKING_COT_MAX = 160
|
||||
|
||||
@ -75,7 +75,17 @@ const stopMemoryMonitor = startMemoryMonitor({
|
||||
process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n')
|
||||
process.exit(137)
|
||||
},
|
||||
onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump))
|
||||
onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump)),
|
||||
// Sub-threshold abnormal heap growth (#34095). The TUI used to die silently
|
||||
// here — Node OOMs from a render-tree blowup well below the exit threshold,
|
||||
// so the only trace was a bare gateway `stdin EOF`. Persist a breadcrumb +
|
||||
// stderr line so the next such death is attributable instead of silent.
|
||||
onWarn: snap => {
|
||||
recordParentLifecycle(`memory-warning fast heap growth heap=${formatBytes(snap.heapUsed)} rss=${formatBytes(snap.rss)}`)
|
||||
process.stderr.write(
|
||||
`hermes-tui: heap climbing fast (${formatBytes(snap.heapUsed)}) — a large tool output or long session may be straining memory\n`
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
if (process.env.HERMES_HEAPDUMP_ON_START === '1') {
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
import { getHeapStatistics } from 'node:v8'
|
||||
|
||||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||
|
||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||
@ -14,9 +16,41 @@ export interface MemoryMonitorOptions {
|
||||
intervalMs?: number
|
||||
onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
// Fired ONCE when heap growth looks abnormal while still far below the
|
||||
// critical exit threshold — the regime where the TUI used to die silently
|
||||
// (#34095: Node OOMs from an Ink render-tree blowup at a few hundred MB,
|
||||
// well under criticalBytes, so onCritical never fired and the gateway death
|
||||
// showed up only as a bare `stdin EOF`). A visible warning here makes that
|
||||
// class of death diagnosable instead of silent.
|
||||
onWarn?: (snap: MemorySnapshot) => void
|
||||
warnBytes?: number
|
||||
}
|
||||
|
||||
const GB = 1024 ** 3
|
||||
const MB = 1024 ** 2
|
||||
|
||||
// Resolve the exit / dump thresholds RELATIVE to the actual V8 heap ceiling
|
||||
// (--max-old-space-size, 8GB for the TUI) instead of hardcoding 2.5GB. The old
|
||||
// constant killed the process — and silently closed the gateway's stdin — at
|
||||
// ~31% of an 8GB ceiling, treating a normal long-session heap as an OOM. We now
|
||||
// exit only when genuinely near the ceiling (critical ~88%, high ~70%), and
|
||||
// clamp to sane floors/ceilings so a tiny --max-old-space-size can't drive the
|
||||
// thresholds below the warn watermark. Callers may still override explicitly.
|
||||
function resolveThresholds(criticalBytes?: number, highBytes?: number) {
|
||||
let limit = 0
|
||||
try {
|
||||
limit = getHeapStatistics().heap_size_limit || 0
|
||||
} catch {
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Fall back to the historical 8GB ceiling if V8 doesn't report one.
|
||||
const ceiling = limit > 0 ? limit : 8 * GB
|
||||
const critical = criticalBytes ?? Math.max(2 * GB, Math.round(ceiling * 0.88))
|
||||
const high = highBytes ?? Math.max(1 * GB, Math.min(critical - 256 * MB, Math.round(ceiling * 0.7)))
|
||||
|
||||
return { critical, high }
|
||||
}
|
||||
|
||||
// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
|
||||
// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
|
||||
@ -53,18 +87,46 @@ async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unkno
|
||||
}
|
||||
|
||||
export function startMemoryMonitor({
|
||||
criticalBytes = 2.5 * GB,
|
||||
highBytes = 1.5 * GB,
|
||||
criticalBytes,
|
||||
highBytes,
|
||||
intervalMs = 10_000,
|
||||
onCritical,
|
||||
onHigh
|
||||
onHigh,
|
||||
onWarn,
|
||||
warnBytes = 600 * MB
|
||||
}: MemoryMonitorOptions = {}): () => void {
|
||||
const { critical, high } = resolveThresholds(criticalBytes, highBytes)
|
||||
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
|
||||
// Early-warning state (#34095): the silent-death regime is BELOW `high`, so
|
||||
// the level machine above never sees it. Track the previous sample and fire
|
||||
// onWarn at most once when heap both crosses a modest absolute floor AND is
|
||||
// climbing steeply (≥150MB between 10s ticks) — the signature of a render-
|
||||
// tree blowup — so the user gets a visible heads-up before Node OOMs under
|
||||
// the exit threshold. Re-armed only after heap falls back below the floor.
|
||||
// `lastHeap < 0` marks the un-seeded first sample so a cold start that opens
|
||||
// already-high can't be mistaken for sudden growth (growth = current - last).
|
||||
let lastHeap = -1
|
||||
let warned = false
|
||||
const WARN_GROWTH_STEP = 150 * MB
|
||||
|
||||
const tick = async () => {
|
||||
const { heapUsed, rss } = process.memoryUsage()
|
||||
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||
|
||||
// Sub-threshold abnormal-growth warning. Skip on the first (un-seeded)
|
||||
// sample — we need a prior reading to measure a delta against.
|
||||
if (heapUsed < high && lastHeap >= 0) {
|
||||
if (!warned && heapUsed >= warnBytes && heapUsed - lastHeap >= WARN_GROWTH_STEP) {
|
||||
warned = true
|
||||
onWarn?.({ heapUsed, level: 'normal', rss })
|
||||
} else if (heapUsed < warnBytes) {
|
||||
warned = false
|
||||
}
|
||||
}
|
||||
lastHeap = heapUsed
|
||||
|
||||
const level: MemoryLevel = heapUsed >= critical ? 'critical' : heapUsed >= high ? 'high' : 'normal'
|
||||
|
||||
if (level === 'normal') {
|
||||
dumped.clear()
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import {
|
||||
LIVE_RENDER_MAX_CHARS,
|
||||
LIVE_RENDER_MAX_LINES,
|
||||
THINKING_COT_MAX
|
||||
THINKING_COT_MAX,
|
||||
VERBOSE_TRAIL_MAX_CHARS,
|
||||
VERBOSE_TRAIL_MAX_LINES
|
||||
} from '../config/limits.js'
|
||||
import { VERBS } from '../content/verbs.js'
|
||||
import type { ThinkingMode } from '../types.js'
|
||||
@ -215,7 +217,16 @@ export const buildToolTrailLine = (
|
||||
const verboseToolBlock = (label: string, text?: string) => {
|
||||
const body = (text ?? '').trim()
|
||||
|
||||
return body ? `${label}:\n${boundedLiveRenderText(body)}` : ''
|
||||
// Persisted trail blocks are kept all session and rendered expanded by
|
||||
// default — cap to a small readable preview (NOT the 16KB live-render
|
||||
// budget) so a large tool output can't balloon the Ink render tree and
|
||||
// silently OOM-kill the TUI. See VERBOSE_TRAIL_MAX_CHARS (#34095).
|
||||
return body
|
||||
? `${label}:\n${boundedLiveRenderText(body, {
|
||||
maxChars: VERBOSE_TRAIL_MAX_CHARS,
|
||||
maxLines: VERBOSE_TRAIL_MAX_LINES
|
||||
})}`
|
||||
: ''
|
||||
}
|
||||
|
||||
export const buildVerboseToolTrailLine = (
|
||||
|
||||
Reference in New Issue
Block a user