From 01c010e23378c318bd96e9ed2de068c698e74779 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 3 Jun 2026 19:05:26 -0500 Subject: [PATCH 1/5] fix(hermes-ink): collapse SGR mouse fragment guards into one flush-aware rule When App's 50ms flush watchdog fires mid-CSI during a render stall, an SGR mouse report (ESC[ k.kind === 'key') + + return key ? new InputEvent(key).input : '' +} + +describe('InputEvent SGR mouse fragment suppression', () => { + it('suppresses the buffered CSI prefix force-emitted by a mid-sequence flush', () => { + // The tokenizer buffers an incomplete CSI mouse sequence; the flush + // force-emits it as a nameless sequence token (ESC still attached). Intact + // `[ { + // These are the cases the older `/^\[<\d+;\d+;\d+[Mm]/` guard missed — + // the prefix was lost to the flush, only the tail reaches us as text. + for (const tail of ['46M', '6M', '35;46M', '0;35;46M']) { + expect(pipelineInput(tail)).toBe('') + } + }) + + it('suppresses leading-semicolon tails from a split at a `;` boundary', () => { + for (const tail of [';46M', ';35;46M']) { + expect(pipelineInput(tail)).toBe('') + } + }) + + it('suppresses both halves of a `ESC[<0; / 35;46M` split end to end', () => { + expect(pipelineInput('\x1b[<0;', null)).toBe('') // flushed prefix + expect(pipelineInput('35;46M')).toBe('') // continuation + }) + + it('suppresses release (`m`) terminators as well as press (`M`)', () => { + expect(pipelineInput('35;46m')).toBe('') + expect(pipelineInput('\x1b[<0;35;', null)).toBe('') + }) +}) + +describe('InputEvent SGR mouse fragment guard does not eat real input', () => { + it('passes through lone bracket/angle/semicolon characters', () => { + // No coordinate digit → the `(?=…\d)` lookahead fails, so typing these + // characters is never swallowed. + expect(pipelineInput('<')).toBe('<') + expect(pipelineInput('[')).toBe('[') + expect(pipelineInput(';')).toBe(';') + }) + + it('passes through digits and the literal letter M', () => { + // These parse to a named key (number / m), so the `!keypress.name` gate + // skips suppression entirely. + expect(pipelineInput('5')).toBe('5') + expect(pipelineInput('M')).toBe('M') + }) + + it('passes through ordinary text', () => { + expect(pipelineInput('hello')).toBe('hello') + }) + + it('keeps two stuck-together fragments / coordinate-like prose intact', () => { + // An embedded M/m breaks the `[\d;]+...$` anchor, so a run like this is + // left for the upstream burst/recovery logic rather than blanked here. + expect(pipelineInput('1234;56;78M9;10;11M')).toBe('1234;56;78M9;10;11M') + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts index 19031402b..f88146be9 100644 --- a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts +++ b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts @@ -5,6 +5,32 @@ import { Event } from './event.js' const inputForSpecialSequence = (name: string): string => name === 'space' ? ' ' : name === 'return' || name === 'escape' ? '' : name +// SGR mouse-report fragment that leaked into a nameless text/sequence token. +// In alt-screen Ink enables MOUSE_ANY (DEC 1003), so every pixel of motion +// emits a CSI mouse report (ESC[ Date: Wed, 3 Jun 2026 19:24:28 -0500 Subject: [PATCH 2/5] fix(hermes-ink): reassemble split mouse sequences at the tokenizer; drop the regex sink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root-cause fix for the SGR mouse fragment leak (`46M35;40M...` typed into the prompt). The leak was never really about the fragments — it was the flush emitting them. When App's 50ms watchdog fires mid-CSI during a render stall, the tokenizer was force-emitting the buffered partial as a token and resetting to ground, so both the prefix and the ESC-less remainder surfaced as unparseable input. Make the flush state-aware (xterm.js discipline): a bare ESC still flushes to the Escape key (the legitimate ESCDELAY case), but a buffer still inside a multi-byte control sequence (csi/osc/dcs/apc/ss3/intermediate) is NOT emitted — it's kept so the continuation reassembles on the next feed. A one-tick truncation valve in createTokenizer.flush() drops a partial that survives a second flush with no progress, so a genuinely truncated write can't fuse into the next keypress. With partials never entering the input stream, the downstream scrubber is dead code: remove the SGR fragment guard from input-event.ts (both the original `/^\[<\d+;\d+;\d+[Mm]/` and the consolidated form added earlier in this PR). The parse-keypress burst-recovery regexes (MOUSE_BURST_*) are now also redundant but left in place as a safety net for one release; they can be removed in a follow-up once this soaks. Tests: tokenize.test.ts proves a mid-CSI flush keeps/reassembles and that a stale partial is dropped after a second flush and a bare ESC still emits; parse-keypress.test.ts adds the end-to-end split-then-reassemble case yielding a single clean mouse event with no leaked key. Supersedes #29337. --- .../hermes-ink/src/ink/events/input-event.ts | 39 ++--------- .../hermes-ink/src/ink/parse-keypress.test.ts | 27 ++++++++ .../src/ink/termio/tokenize.test.ts | 65 +++++++++++++++++++ .../hermes-ink/src/ink/termio/tokenize.ts | 40 +++++++++++- 4 files changed, 133 insertions(+), 38 deletions(-) create mode 100644 ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts diff --git a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts index f88146be9..900f0042c 100644 --- a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts +++ b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts @@ -5,32 +5,6 @@ import { Event } from './event.js' const inputForSpecialSequence = (name: string): string => name === 'space' ? ' ' : name === 'return' || name === 'escape' ? '' : name -// SGR mouse-report fragment that leaked into a nameless text/sequence token. -// In alt-screen Ink enables MOUSE_ANY (DEC 1003), so every pixel of motion -// emits a CSI mouse report (ESC[ { // blob types into the composer and locks the user out. const blob = 'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M' + const [events] = parseMultipleKeypresses(INITIAL_STATE, blob) expect(events).toEqual([]) @@ -165,3 +166,29 @@ describe('fragmented SGR mouse recovery', () => { expect(events).toEqual([]) }) }) + +describe('flush-boundary SGR mouse reassembly', () => { + it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => { + // chunk 1: heavy render stalls the loop, only the prefix is read + let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;') + expect(keys).toEqual([]) + + // App's 50ms watchdog flushes (input=null) — must NOT emit the partial + ;[keys, state] = parseMultipleKeypresses(state, null) + expect(keys).toEqual([]) + + // continuation arrives; the whole report reassembles, nothing leaks + ;[keys, state] = parseMultipleKeypresses(state, '46M') + expect(keys).toEqual([expect.objectContaining({ kind: 'mouse', button: 0, col: 35, row: 46, action: 'press' })]) + }) + + it('drops a truncated mouse prefix after a second flush instead of leaking it', () => { + let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;') + + ;[keys, state] = parseMultipleKeypresses(state, null) // first flush keeps it + ;[keys, state] = parseMultipleKeypresses(state, null) // second flush drops it + + expect(keys).toEqual([]) + expect(state.incomplete).toBe('') + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts new file mode 100644 index 000000000..4d73cfcdd --- /dev/null +++ b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest' + +import { createTokenizer } from './tokenize.js' + +describe('tokenizer escape-sequence boundaries', () => { + it('reassembles a CSI mouse sequence split across two feeds', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) +}) + +describe('tokenizer state-aware flush', () => { + it('does not emit an incomplete CSI on flush — it keeps it for reassembly', () => { + const t = createTokenizer({ x10Mouse: true }) + + // A render stall lets App's watchdog flush mid-sequence. The buffered CSI + // prefix must NOT be emitted (that is the `46M…` leak); it stays buffered. + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.flush()).toEqual([]) + expect(t.buffer()).toBe('\x1b[<0;35;') + + // The continuation arrives on the next feed and the whole report + // reassembles into a single clean sequence token — nothing leaked. + expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) + + it('drops a partial control sequence that survives a second flush (truncation)', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.flush()).toEqual([]) // first flush keeps the buffer + expect(t.buffer()).toBe('\x1b[<0;35;') + + // Continuation never arrived: the next flush sees the same buffer and + // drops it so it can't fuse with the next keypress's bytes. + expect(t.flush()).toEqual([]) + expect(t.buffer()).toBe('') + }) + + it('still emits a bare ESC on flush so the Escape key works', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b')).toEqual([]) + expect(t.flush()).toEqual([{ type: 'sequence', value: '\x1b' }]) + expect(t.buffer()).toBe('') + }) + + it('reassembles even when a flush fires between every byte of the report', () => { + const t = createTokenizer({ x10Mouse: true }) + + // Pathological stall: a flush between each chunk. As long as the + // continuation eventually arrives, no fragment is ever emitted as input. + for (const chunk of ['\x1b[', '<', '0;', '35;', '46']) { + expect(t.feed(chunk)).toEqual([]) + expect(t.flush()).toEqual([]) + } + + expect(t.feed('M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts index 40ba7e214..03f99cf2f 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts @@ -47,10 +47,18 @@ type TokenizerOptions = { export function createTokenizer(options?: TokenizerOptions): Tokenizer { let currentState: State = 'ground' let currentBuffer = '' + // The control-sequence buffer kept across the previous flush, if any. Used + // as a one-tick truncation valve: a partial CSI mouse report normally + // reassembles on the very next feed, so if a flush sees the exact same + // buffer it kept last time (the continuation never arrived), we drop it. + let lastFlushedBuffer = '' const x10Mouse = options?.x10Mouse ?? false return { feed(input: string): Token[] { + // Real bytes arrived — any kept partial is no longer stale. + lastFlushedBuffer = '' + const result = tokenize(input, currentState, currentBuffer, false, x10Mouse) currentState = result.state.state @@ -64,12 +72,25 @@ export function createTokenizer(options?: TokenizerOptions): Tokenizer { currentState = result.state.state currentBuffer = result.state.buffer + // tokenize() keeps (doesn't emit) an incomplete control sequence on + // flush. If two consecutive flushes see the same buffer with no feed in + // between, the continuation is never coming (truncated write / killed + // process) — drop it so it can't fuse with the next keypress's bytes. + if (currentBuffer && currentBuffer === lastFlushedBuffer) { + currentState = 'ground' + currentBuffer = '' + lastFlushedBuffer = '' + } else { + lastFlushedBuffer = currentBuffer + } + return result.tokens }, reset(): void { currentState = 'ground' currentBuffer = '' + lastFlushedBuffer = '' }, buffer(): string { @@ -298,8 +319,10 @@ function tokenize( // Handle end of input if (result.state === 'ground') { flushText() - } else if (flush) { - // Force output incomplete sequence + } else if (flush && result.state === 'escape') { + // A bare ESC with nothing after it is the Escape key — the one incomplete + // state a flush should turn into input (the classic ESCDELAY lone-ESC + // disambiguation: ESC alone vs. ESC as a sequence/meta prefix). const remaining = data.slice(seqStart) if (remaining) { @@ -308,7 +331,18 @@ function tokenize( result.state = 'ground' } else { - // Buffer incomplete sequence for next call + // Buffer the incomplete sequence. Two paths land here: + // - streaming (flush=false): normal carry-over to the next feed. + // - flush=true while still inside a multi-byte control sequence + // (csi/osc/dcs/apc/ss3/escapeIntermediate): we deliberately do NOT + // emit it. A half-arrived CSI mouse report (ESC[ Date: Wed, 3 Jun 2026 19:24:51 -0500 Subject: [PATCH 3/5] test(hermes-ink): drop input-event SGR guard test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guard it covered was removed in the previous commit (fragments no longer reach input-event — they reassemble at the tokenizer). Reassembly is now covered by termio/tokenize.test.ts and the flush-boundary cases in parse-keypress.test.ts. --- .../src/ink/events/input-event.test.ts | 88 ------------------- 1 file changed, 88 deletions(-) delete mode 100644 ui-tui/packages/hermes-ink/src/ink/events/input-event.test.ts diff --git a/ui-tui/packages/hermes-ink/src/ink/events/input-event.test.ts b/ui-tui/packages/hermes-ink/src/ink/events/input-event.test.ts deleted file mode 100644 index 771294f0a..000000000 --- a/ui-tui/packages/hermes-ink/src/ink/events/input-event.test.ts +++ /dev/null @@ -1,88 +0,0 @@ -import { describe, expect, it } from 'vitest' - -import { INITIAL_STATE, type ParsedInput, type ParsedKey, parseMultipleKeypresses } from '../parse-keypress.js' - -import { InputEvent } from './input-event.js' - -/** - * Drive the real input pipeline (tokenizer → parseKeypress → InputEvent) for a - * sequence of stdin chunks. `null` chunks simulate App's 50ms flush watchdog - * firing mid-sequence. Returns the `.input` of the first key-kind token — i.e. - * what would actually be typed into the composer. - */ -function pipelineInput(...chunks: (string | null)[]): string { - let state = INITIAL_STATE - const all: ParsedInput[] = [] - - for (const chunk of chunks) { - const [keys, next] = parseMultipleKeypresses(state, chunk) - all.push(...keys) - state = next - } - - const key = all.find((k): k is ParsedKey => k.kind === 'key') - - return key ? new InputEvent(key).input : '' -} - -describe('InputEvent SGR mouse fragment suppression', () => { - it('suppresses the buffered CSI prefix force-emitted by a mid-sequence flush', () => { - // The tokenizer buffers an incomplete CSI mouse sequence; the flush - // force-emits it as a nameless sequence token (ESC still attached). Intact - // `[ { - // These are the cases the older `/^\[<\d+;\d+;\d+[Mm]/` guard missed — - // the prefix was lost to the flush, only the tail reaches us as text. - for (const tail of ['46M', '6M', '35;46M', '0;35;46M']) { - expect(pipelineInput(tail)).toBe('') - } - }) - - it('suppresses leading-semicolon tails from a split at a `;` boundary', () => { - for (const tail of [';46M', ';35;46M']) { - expect(pipelineInput(tail)).toBe('') - } - }) - - it('suppresses both halves of a `ESC[<0; / 35;46M` split end to end', () => { - expect(pipelineInput('\x1b[<0;', null)).toBe('') // flushed prefix - expect(pipelineInput('35;46M')).toBe('') // continuation - }) - - it('suppresses release (`m`) terminators as well as press (`M`)', () => { - expect(pipelineInput('35;46m')).toBe('') - expect(pipelineInput('\x1b[<0;35;', null)).toBe('') - }) -}) - -describe('InputEvent SGR mouse fragment guard does not eat real input', () => { - it('passes through lone bracket/angle/semicolon characters', () => { - // No coordinate digit → the `(?=…\d)` lookahead fails, so typing these - // characters is never swallowed. - expect(pipelineInput('<')).toBe('<') - expect(pipelineInput('[')).toBe('[') - expect(pipelineInput(';')).toBe(';') - }) - - it('passes through digits and the literal letter M', () => { - // These parse to a named key (number / m), so the `!keypress.name` gate - // skips suppression entirely. - expect(pipelineInput('5')).toBe('5') - expect(pipelineInput('M')).toBe('M') - }) - - it('passes through ordinary text', () => { - expect(pipelineInput('hello')).toBe('hello') - }) - - it('keeps two stuck-together fragments / coordinate-like prose intact', () => { - // An embedded M/m breaks the `[\d;]+...$` anchor, so a run like this is - // left for the upstream burst/recovery logic rather than blanked here. - expect(pipelineInput('1234;56;78M9;10;11M')).toBe('1234;56;78M9;10;11M') - }) -}) From 6efc7eda57c31f1925bffc3d6acc4419e9a6b15b Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 3 Jun 2026 19:29:42 -0500 Subject: [PATCH 4/5] refactor(hermes-ink): delete now-dead SGR mouse fragment recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the tokenizer reassembling split CSI sequences across a flush (prior commit), no SGR mouse fragment can reach a text token anymore — terminals write a mouse report as one atomic sequence, and any read/flush split now re-joins in the tokenizer buffer instead of leaking. That makes the whole downstream recovery layer dead code: - SGR_MOUSE_FRAGMENT_RE, MOUSE_BURST_NOISE_RE, MOUSE_BURST_RESIDUE_RE - parseTextWithSgrMouseFragments / parseSgrMouseFragment / normalizeSgrMouseFragment - the whole-text mouse-burst noise fast path in parseMultipleKeypresses Remove all of it (~185 lines) and the tests that only exercised it. The narrow legacy X10 wheel-tail resynth stays (distinct mechanism, kept with its own test). This retires the #17701 → #18113 → #26781 → #28463 → #35512 regex hardening chain in favor of the one correct parser fix. --- .../hermes-ink/src/ink/parse-keypress.test.ts | 79 ++-------- .../hermes-ink/src/ink/parse-keypress.ts | 146 ++---------------- 2 files changed, 20 insertions(+), 205 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/parse-keypress.test.ts b/ui-tui/packages/hermes-ink/src/ink/parse-keypress.test.ts index e2ea93369..c84982d68 100644 --- a/ui-tui/packages/hermes-ink/src/ink/parse-keypress.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/parse-keypress.test.ts @@ -97,76 +97,6 @@ describe('mouse wheel modifier decoding', () => { }) }) -describe('fragmented SGR mouse recovery', () => { - it('re-synthesizes bracket-only SGR mouse tails as mouse events', () => { - const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '[<35;159;11M') - - expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' }) - }) - - it('re-synthesizes angle-only SGR mouse tails as mouse events', () => { - const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '<35;159;11M') - - expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' }) - }) - - it('re-synthesizes degraded SGR mouse bursts without leaking prompt text', () => { - const [events] = parseMultipleKeypresses(INITIAL_STATE, '5;142;11M<35;159;11M35;124;26M35;119;26Mtyped') - - expect(events.slice(0, 4)).toEqual([ - expect.objectContaining({ kind: 'mouse', button: 5, col: 142, row: 11 }), - expect.objectContaining({ kind: 'mouse', button: 35, col: 159, row: 11 }), - expect.objectContaining({ kind: 'mouse', button: 35, col: 124, row: 26 }), - expect.objectContaining({ kind: 'mouse', button: 35, col: 119, row: 26 }) - ]) - expect(events[4]).toMatchObject({ kind: 'key', sequence: 'typed' }) - }) - - it('keeps isolated semicolon text that only resembles a prefixless mouse report', () => { - const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'see 1;2;3M for details') - - expect(key).toMatchObject({ kind: 'key', sequence: 'see 1;2;3M for details' }) - }) - - it('does not match prefixless fragments inside longer digit runs', () => { - const [[key]] = parseMultipleKeypresses(INITIAL_STATE, '1234;56;78M9;10;11M') - - expect(key).toMatchObject({ kind: 'key', sequence: '1234;56;78M9;10;11M' }) - }) - - it('swallows a fully degraded mouse-burst noise blob without leaking prompt text', () => { - // Captured from Windows Terminal during a heavy tool-call render: the event - // loop blocked past App's 50ms flush timer, so a long burst of SGR mouse - // reports (mode 1003 any-motion) arrived as text with prefixes AND - // too degraded for SGR_MOUSE_FRAGMENT_RE (1- and 2-param remnants, a - // stray focus-in `[I`), so without the whole-text noise fast path the entire - // blob types into the composer and locks the user out. - const blob = - 'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M' - - const [events] = parseMultipleKeypresses(INITIAL_STATE, blob) - - expect(events).toEqual([]) - }) - - it('keeps plain prose that only contains scattered M and m letters', () => { - const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'Mmm MMM mmm yummy') - - expect(key).toMatchObject({ kind: 'key', sequence: 'Mmm MMM mmm yummy' }) - }) - - it('swallows noise wholesale even when it contains intact recoverable fragments', () => { - // A noise blob can carry a few intact ` { it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => { // chunk 1: heavy render stalls the loop, only the prefix is read @@ -191,4 +121,13 @@ describe('flush-boundary SGR mouse reassembly', () => { expect(keys).toEqual([]) expect(state.incomplete).toBe('') }) + + it('re-synthesizes an orphaned X10 wheel tail (legacy mouse) into a scroll key', () => { + // X10 wheel-up = ESC[M + (0x40+32) + col + row. If the ESC was flushed as a + // lone Escape and the `[M…` payload arrives as text, resynthesize it. + const tail = '[M' + String.fromCharCode(0x60) + '!!' + const [[key]] = parseMultipleKeypresses(INITIAL_STATE, tail) + + expect(key).toMatchObject({ name: 'wheelup' }) + }) }) diff --git a/ui-tui/packages/hermes-ink/src/ink/parse-keypress.ts b/ui-tui/packages/hermes-ink/src/ink/parse-keypress.ts index 8f7cceb1b..966e32bac 100644 --- a/ui-tui/packages/hermes-ink/src/ink/parse-keypress.ts +++ b/ui-tui/packages/hermes-ink/src/ink/parse-keypress.ts @@ -63,35 +63,6 @@ const XTVERSION_RE = /^\x1bP>\|(.*?)(?:\x07|\x1b\\)$/s // Button 32=left-drag (0x20 | motion-bit). Plain 0/1/2 = left/mid/right click. // eslint-disable-next-line no-control-regex const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/ -const SGR_MOUSE_FRAGMENT_RE = /(? match[0].startsWith('[<') || match[0].startsWith('<')) - const isFragmentBurst = run.length > 1 - - if (!hasExplicitMousePrefix && !isFragmentBurst) { - continue - } - - if (first.index! > cursor) { - const gap = text.slice(cursor, first.index!) - // Skip pure mouse-leak residue between recovered fragments; only emit - // real text gaps as keypresses. - if (!MOUSE_BURST_RESIDUE_RE.test(gap)) { - parsed.push(parseKeypress(gap)) - } - } - - for (const match of run) { - parsed.push(parseSgrMouseFragment(match[0])) - } - - cursor = runEnd - consumedAny = true - } - - if (!consumedAny) { - return null - } - - if (cursor < text.length) { - const tail = text.slice(cursor) - // Swallow a pure mouse-leak residue tail (the head fragments recovered, but - // the burst trailed off into chewed-up shards). Emit only real trailing text. - if (!MOUSE_BURST_RESIDUE_RE.test(tail)) { - parsed.push(parseKeypress(tail)) - } - } - - return parsed -} - function parseKeypress(s: string = ''): ParsedKey { let parts From 725290db63a7d85efb206508d4afccf084c44c21 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 3 Jun 2026 19:38:08 -0500 Subject: [PATCH 5/5] test(hermes-ink): fuzz the tokenizer flush valve against fragment leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hammer createTokenizer with the worst stalls a terminal can produce — split + flush at every interior byte, and a 200-report byte-by-byte feed that flushes after every single byte — and assert the two invariants that make the SGR-leak class structurally impossible: nothing ever leaks as a text token, and every complete report reassembles whole. A mixed mouse+keystroke variant proves real input survives the same storm. --- .../src/ink/termio/tokenize.test.ts | 122 +++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts index 4d73cfcdd..b3cf2cb5e 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest' -import { createTokenizer } from './tokenize.js' +import { createTokenizer, type Token } from './tokenize.js' describe('tokenizer escape-sequence boundaries', () => { it('reassembles a CSI mouse sequence split across two feeds', () => { @@ -63,3 +63,123 @@ describe('tokenizer state-aware flush', () => { expect(t.buffer()).toBe('') }) }) + +// Battle-test: prove the leak class is structurally impossible, not just that +// the known cases are patched. We hammer the tokenizer with the worst stalls a +// terminal can produce (split + flush at every byte) and assert the two hard +// invariants: nothing leaks as text, and every complete report reassembles. +describe('tokenizer fuzz: fragments never leak under a flush storm', () => { + const sgr = (btn: number, col: number, row: number, press: boolean): string => + `\x1b[<${btn};${col};${row}${press ? 'M' : 'm'}` + + it('reassembles a report split + flushed at every interior byte', () => { + const seq = sgr(0, 35, 46, true) + + // Start at 2: an earlier split is the lone-ESC ESCDELAY boundary, which + // intentionally flushes to the Escape key. Terminals never split a mouse + // report there — a report is one atomic write — so it's not a real case. + for (let i = 2; i < seq.length; i++) { + const t = createTokenizer({ x10Mouse: true }) + const tokens: Token[] = [...t.feed(seq.slice(0, i)), ...t.flush(), ...t.feed(seq.slice(i))] + + expect(tokens).toEqual([{ type: 'sequence', value: seq }]) + expect(t.buffer()).toBe('') + } + }) + + it('feeds 200 random reports one byte at a time, flushing after every byte', () => { + // Deterministic PRNG so a failure is reproducible. + let s = 0x1234567 + + const rnd = (n: number): number => { + s = (s * 1103515245 + 12345) & 0x7fffffff + + return s % n + } + + const reports = Array.from({ length: 200 }, () => sgr(rnd(120), 1 + rnd(300), 1 + rnd(200), rnd(2) === 0)) + const stream = reports.join('') + + const t = createTokenizer({ x10Mouse: true }) + const seqTokens: string[] = [] + let textLeak = '' + + const drain = (tokens: Token[]): void => { + for (const tok of tokens) { + if (tok.type === 'sequence') { + seqTokens.push(tok.value) + } else { + textLeak += tok.value + } + } + } + + for (const ch of stream) { + drain(t.feed(ch)) + + // Flush storm — but not at a lone-ESC boundary (the real watchdog + // re-arms while bytes are pending; a single flush between feeds never + // hits the truncation valve). + if (t.buffer() !== '\x1b') { + drain(t.flush()) + } + } + + expect(textLeak).toBe('') + expect(seqTokens.join('')).toBe(stream) + }) + + it('keeps real keystrokes intact while mouse reports reassemble around them', () => { + let s = 0x0badf00d + + const rnd = (n: number): number => { + s = (s * 1103515245 + 12345) & 0x7fffffff + + return s % n + } + + const typed = 'abc 123 xyz' + const expectedKeys: string[] = [] + const expectedSeqs: string[] = [] + const parts: string[] = [] + + for (let k = 0; k < 120; k++) { + if (rnd(3) === 0) { + const ch = typed[rnd(typed.length)]! + expectedKeys.push(ch) + parts.push(ch) + } else { + const seq = sgr(rnd(64), 1 + rnd(200), 1 + rnd(100), rnd(2) === 0) + expectedSeqs.push(seq) + parts.push(seq) + } + } + + const stream = parts.join('') + const t = createTokenizer({ x10Mouse: true }) + const seqTokens: string[] = [] + let text = '' + + const drain = (tokens: Token[]): void => { + for (const tok of tokens) { + if (tok.type === 'sequence') { + seqTokens.push(tok.value) + } else { + text += tok.value + } + } + } + + for (const ch of stream) { + drain(t.feed(ch)) + + if (t.buffer() !== '\x1b') { + drain(t.flush()) + } + } + + // Every typed character survives, in order; every report reassembles whole. + expect(text).toBe(expectedKeys.join('')) + expect(seqTokens).toEqual(expectedSeqs) + }) +})