Merge pull request #38564 from NousResearch/bb/tui-sgr-mouse-fragment-leak

fix(hermes-ink): reassemble split SGR mouse sequences at the tokenizer (supersedes #29337)
This commit is contained in:
brooklyn!
2026-06-03 20:10:48 -05:00
committed by GitHub
5 changed files with 262 additions and 208 deletions

View File

@ -83,17 +83,10 @@ function parseKey(keypress: ParsedKey): [Key, string] {
input = ''
}
// Suppress ESC-less SGR mouse fragments. When a heavy React commit blocks
// the event loop past App's 50ms NORMAL_TIMEOUT flush, a CSI split across
// stdin chunks gets its buffered ESC flushed as a lone Escape key, and the
// continuation arrives as a text token with name='' — which falls through
// all of parseKeypress's ESC-anchored regexes and the nonAlphanumericKeys
// clear below (name is falsy). The fragment then leaks into the prompt as
// literal `[<64;74;16M`. This is the same defensive sink as the F13 guard
// above; the underlying tokenizer-flush race is upstream of this layer.
if (!keypress.name && /^\[<\d+;\d+;\d+[Mm]/.test(input)) {
input = ''
}
// (SGR mouse-report fragments used to be scrubbed here. They no longer reach
// this layer: the tokenizer keeps an incomplete CSI buffered across a
// watchdog flush and reassembles it on the next feed instead of force-
// emitting the partial as input. See termio/tokenize.ts.)
// Strip meta if it's still remaining after `parseKeypress`
// TODO(vadimdemedes): remove this in the next major version.

View File

@ -97,71 +97,37 @@ describe('mouse wheel modifier decoding', () => {
})
})
describe('fragmented SGR mouse recovery', () => {
it('re-synthesizes bracket-only SGR mouse tails as mouse events', () => {
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '[<35;159;11M')
describe('flush-boundary SGR mouse reassembly', () => {
it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => {
// chunk 1: heavy render stalls the loop, only the prefix is read
let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;')
expect(keys).toEqual([])
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
// App's 50ms watchdog flushes (input=null) — must NOT emit the partial
;[keys, state] = parseMultipleKeypresses(state, null)
expect(keys).toEqual([])
// continuation arrives; the whole report reassembles, nothing leaks
;[keys, state] = parseMultipleKeypresses(state, '46M')
expect(keys).toEqual([expect.objectContaining({ kind: 'mouse', button: 0, col: 35, row: 46, action: 'press' })])
})
it('re-synthesizes angle-only SGR mouse tails as mouse events', () => {
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '<35;159;11M')
it('drops a truncated mouse prefix after a second flush instead of leaking it', () => {
let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;')
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
;[keys, state] = parseMultipleKeypresses(state, null) // first flush keeps it
;[keys, state] = parseMultipleKeypresses(state, null) // second flush drops it
expect(keys).toEqual([])
expect(state.incomplete).toBe('')
})
it('re-synthesizes degraded SGR mouse bursts without leaking prompt text', () => {
const [events] = parseMultipleKeypresses(INITIAL_STATE, '5;142;11M<35;159;11M35;124;26M35;119;26Mtyped')
it('re-synthesizes an orphaned X10 wheel tail (legacy mouse) into a scroll key', () => {
// X10 wheel-up = ESC[M + (0x40+32) + col + row. If the ESC was flushed as a
// lone Escape and the `[M…` payload arrives as text, resynthesize it.
const tail = '[M' + String.fromCharCode(0x60) + '!!'
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, tail)
expect(events.slice(0, 4)).toEqual([
expect.objectContaining({ kind: 'mouse', button: 5, col: 142, row: 11 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 159, row: 11 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 124, row: 26 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 119, row: 26 })
])
expect(events[4]).toMatchObject({ kind: 'key', sequence: 'typed' })
})
it('keeps isolated semicolon text that only resembles a prefixless mouse report', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'see 1;2;3M for details')
expect(key).toMatchObject({ kind: 'key', sequence: 'see 1;2;3M for details' })
})
it('does not match prefixless fragments inside longer digit runs', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, '1234;56;78M9;10;11M')
expect(key).toMatchObject({ kind: 'key', sequence: '1234;56;78M9;10;11M' })
})
it('swallows a fully degraded mouse-burst noise blob without leaking prompt text', () => {
// Captured from Windows Terminal during a heavy tool-call render: the event
// loop blocked past App's 50ms flush timer, so a long burst of SGR mouse
// reports (mode 1003 any-motion) arrived as text with prefixes AND
// too degraded for SGR_MOUSE_FRAGMENT_RE (1- and 2-param remnants, a
// stray focus-in `[I`), so without the whole-text noise fast path the entire
// blob types into the composer and locks the user out.
const blob =
'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M'
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
expect(events).toEqual([])
})
it('keeps plain prose that only contains scattered M and m letters', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'Mmm MMM mmm yummy')
expect(key).toMatchObject({ kind: 'key', sequence: 'Mmm MMM mmm yummy' })
})
it('swallows noise wholesale even when it contains intact recoverable fragments', () => {
// A noise blob can carry a few intact `<b;c;r M` fragments amid the chewed
// shards. The whole-text noise check must run BEFORE fragment recovery —
// otherwise parseTextWithSgrMouseFragments returns non-null and emits a
// pile of recovered mouse events instead of dropping the blob wholesale.
const blob = '<35;159;11M;44M20;43M0M7M<35;124;26M;47;40M9;15;32M5M2M'
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
expect(events).toEqual([])
expect(key).toMatchObject({ name: 'wheelup' })
})
})

View File

@ -63,35 +63,6 @@ const XTVERSION_RE = /^\x1bP>\|(.*?)(?:\x07|\x1b\\)$/s
// Button 32=left-drag (0x20 | motion-bit). Plain 0/1/2 = left/mid/right click.
// eslint-disable-next-line no-control-regex
const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/
const SGR_MOUSE_FRAGMENT_RE = /(?<!\d)(?:\[<|<)?(?:[0-9]|[1-9][0-9]|1\d{2}|2[0-4]\d|25[0-5]);\d+;\d+[Mm]/g
// Whole-text mouse-burst noise fast path. When a heavy render blocks the event
// loop past App's 50ms flush watchdog, a long burst of SGR mouse reports (mode
// 1003 any-motion / 1006 SGR) can arrive as a single text token with prefixes
// AND coordinate digits chewed off across many partial reads. The surviving
// shards (1- and 2-param remnants, stray focus-in `[I`, lone `M`/`m`
// terminators) are too degraded for SGR_MOUSE_FRAGMENT_RE, so the leftover
// tail leaks into the composer and locks the user out (they can't type or exit).
//
// If the ENTIRE text token is drawn only from the mouse-leak alphabet
// (`[ ] < ; I M m`, digits, and the stray spaces a burst can carry) AND it
// carries the structural signature of mouse coordinates — ≥3 `M`/`m`
// terminators, at least one digit, and at least one `;` separator — swallow it
// wholesale. All three constraints together preserve real prose: `Mmm MMM mmm`
// has no digit and no `;`, `see 1;2;3M for details` contains disqualifying
// letters, and `1234;56;78M9;10;11M` has only two terminators.
// eslint-disable-next-line no-control-regex
const MOUSE_BURST_NOISE_RE = /^(?=[\s\S]*\d)(?=[\s\S]*;)(?=(?:[^Mm]*[Mm]){3})[\d;<\[\]IMm \x1b]+$/
// Residual-shard variant for the gaps BETWEEN / AFTER recovered fragments
// inside parseTextWithSgrMouseFragments. A real recovery run leaves degraded
// remnants (e.g. `M6M`, `7M;220;1MM0M`, lone `;157;47M`) that are pure
// mouse-leak alphabet but too short to satisfy the ≥3-terminator whole-text
// rule. Swallow such a residue only when it is pure alphabet AND carries a
// digit AND at least one `M`/`m` — a prose gap like ` for details ` contains
// disqualifying letters and never matches.
// eslint-disable-next-line no-control-regex
const MOUSE_BURST_RESIDUE_RE = /^(?=[^\d]*\d)(?=[^Mm]*[Mm])[\d;<\[\]IMm \x1b]+$/
function createPasteKey(content: string): ParsedKey {
return {
@ -296,32 +267,18 @@ export function parseMultipleKeypresses(
} else if (token.type === 'text') {
if (inPaste) {
pasteBuffer += token.value
} else if (MOUSE_BURST_NOISE_RE.test(token.value)) {
// Fully degraded mouse-burst noise — a heavy render (e.g. a sudo /
// secret prompt repaint) blocked the event loop past App's 50ms flush
// watchdog, so a long burst of SGR mouse reports arrived as text with
// prefixes AND coordinate digits chewed off. Checked BEFORE fragment
// recovery: a noise blob can still contain a few intact `<b;c;r M`
// fragments, and parseTextWithSgrMouseFragments would then return
// non-null and emit a pile of recovered mouse events instead of
// dropping the blob wholesale. Swallow it here so it never leaks into
// the composer (and we skip the extra fragment-recovery work mid-stall).
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
// Orphaned X10 wheel tail (legacy 1000/1002 terminals, fullscreen
// only). If the buffered ESC was flushed as a lone Escape and the X10
// payload (`[M` + 3 bytes) arrived as the next text token, re-synthesize
// with ESC so the scroll event still fires instead of leaking. SGR mouse
// reports no longer reach this branch — the tokenizer keeps an
// incomplete CSI buffered across a flush and reassembles it (see
// termio/tokenize.ts), so the old fragment/burst recovery is gone.
const resynthesized = '\x1b' + token.value
keys.push(parseKeypress(resynthesized))
} else {
const mouseFragments = parseTextWithSgrMouseFragments(token.value)
if (mouseFragments) {
keys.push(...mouseFragments)
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
// Orphaned X10 wheel tail (fullscreen only — mouse tracking is off
// otherwise). A heavy render blocked the event loop past App's 50ms
// flush timer, so the buffered ESC was flushed as a lone Escape and
// the continuation arrived as text. Re-synthesize with ESC so the
// scroll event still fires instead of leaking into the prompt.
const resynthesized = '\x1b' + token.value
keys.push(parseKeypress(resynthesized))
} else {
keys.push(parseKeypress(token.value))
}
keys.push(parseKeypress(token.value))
}
}
}
@ -663,87 +620,6 @@ function parseMouseEvent(s: string): ParsedMouse | null {
}
}
function normalizeSgrMouseFragment(fragment: string): string {
if (fragment.startsWith('[<')) {
return `\x1b${fragment}`
}
if (fragment.startsWith('<')) {
return `\x1b[${fragment}`
}
return `\x1b[<${fragment}`
}
function parseSgrMouseFragment(fragment: string): ParsedInput {
const sequence = normalizeSgrMouseFragment(fragment)
return parseMouseEvent(sequence) ?? parseKeypress(sequence)
}
function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
SGR_MOUSE_FRAGMENT_RE.lastIndex = 0
const matches = [...text.matchAll(SGR_MOUSE_FRAGMENT_RE)]
if (matches.length === 0) {
return null
}
const parsed: ParsedInput[] = []
let cursor = 0
let consumedAny = false
for (let i = 0; i < matches.length;) {
const first = matches[i]!
const run: RegExpMatchArray[] = [first]
let runEnd = first.index! + first[0].length
i++
while (i < matches.length && matches[i]!.index === runEnd) {
run.push(matches[i]!)
runEnd = matches[i]!.index! + matches[i]![0].length
i++
}
const hasExplicitMousePrefix = run.some(match => match[0].startsWith('[<') || match[0].startsWith('<'))
const isFragmentBurst = run.length > 1
if (!hasExplicitMousePrefix && !isFragmentBurst) {
continue
}
if (first.index! > cursor) {
const gap = text.slice(cursor, first.index!)
// Skip pure mouse-leak residue between recovered fragments; only emit
// real text gaps as keypresses.
if (!MOUSE_BURST_RESIDUE_RE.test(gap)) {
parsed.push(parseKeypress(gap))
}
}
for (const match of run) {
parsed.push(parseSgrMouseFragment(match[0]))
}
cursor = runEnd
consumedAny = true
}
if (!consumedAny) {
return null
}
if (cursor < text.length) {
const tail = text.slice(cursor)
// Swallow a pure mouse-leak residue tail (the head fragments recovered, but
// the burst trailed off into chewed-up shards). Emit only real trailing text.
if (!MOUSE_BURST_RESIDUE_RE.test(tail)) {
parsed.push(parseKeypress(tail))
}
}
return parsed
}
function parseKeypress(s: string = ''): ParsedKey {
let parts

View File

@ -0,0 +1,185 @@
import { describe, expect, it } from 'vitest'
import { createTokenizer, type Token } from './tokenize.js'
describe('tokenizer escape-sequence boundaries', () => {
it('reassembles a CSI mouse sequence split across two feeds', () => {
const t = createTokenizer({ x10Mouse: true })
expect(t.feed('\x1b[<0;35;')).toEqual([])
expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
expect(t.buffer()).toBe('')
})
})
describe('tokenizer state-aware flush', () => {
it('does not emit an incomplete CSI on flush — it keeps it for reassembly', () => {
const t = createTokenizer({ x10Mouse: true })
// A render stall lets App's watchdog flush mid-sequence. The buffered CSI
// prefix must NOT be emitted (that is the `46M…` leak); it stays buffered.
expect(t.feed('\x1b[<0;35;')).toEqual([])
expect(t.flush()).toEqual([])
expect(t.buffer()).toBe('\x1b[<0;35;')
// The continuation arrives on the next feed and the whole report
// reassembles into a single clean sequence token — nothing leaked.
expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
expect(t.buffer()).toBe('')
})
it('drops a partial control sequence that survives a second flush (truncation)', () => {
const t = createTokenizer({ x10Mouse: true })
expect(t.feed('\x1b[<0;35;')).toEqual([])
expect(t.flush()).toEqual([]) // first flush keeps the buffer
expect(t.buffer()).toBe('\x1b[<0;35;')
// Continuation never arrived: the next flush sees the same buffer and
// drops it so it can't fuse with the next keypress's bytes.
expect(t.flush()).toEqual([])
expect(t.buffer()).toBe('')
})
it('still emits a bare ESC on flush so the Escape key works', () => {
const t = createTokenizer({ x10Mouse: true })
expect(t.feed('\x1b')).toEqual([])
expect(t.flush()).toEqual([{ type: 'sequence', value: '\x1b' }])
expect(t.buffer()).toBe('')
})
it('reassembles even when a flush fires between every byte of the report', () => {
const t = createTokenizer({ x10Mouse: true })
// Pathological stall: a flush between each chunk. As long as the
// continuation eventually arrives, no fragment is ever emitted as input.
for (const chunk of ['\x1b[', '<', '0;', '35;', '46']) {
expect(t.feed(chunk)).toEqual([])
expect(t.flush()).toEqual([])
}
expect(t.feed('M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
expect(t.buffer()).toBe('')
})
})
// Battle-test: prove the leak class is structurally impossible, not just that
// the known cases are patched. We hammer the tokenizer with the worst stalls a
// terminal can produce (split + flush at every byte) and assert the two hard
// invariants: nothing leaks as text, and every complete report reassembles.
describe('tokenizer fuzz: fragments never leak under a flush storm', () => {
const sgr = (btn: number, col: number, row: number, press: boolean): string =>
`\x1b[<${btn};${col};${row}${press ? 'M' : 'm'}`
it('reassembles a report split + flushed at every interior byte', () => {
const seq = sgr(0, 35, 46, true)
// Start at 2: an earlier split is the lone-ESC ESCDELAY boundary, which
// intentionally flushes to the Escape key. Terminals never split a mouse
// report there — a report is one atomic write — so it's not a real case.
for (let i = 2; i < seq.length; i++) {
const t = createTokenizer({ x10Mouse: true })
const tokens: Token[] = [...t.feed(seq.slice(0, i)), ...t.flush(), ...t.feed(seq.slice(i))]
expect(tokens).toEqual([{ type: 'sequence', value: seq }])
expect(t.buffer()).toBe('')
}
})
it('feeds 200 random reports one byte at a time, flushing after every byte', () => {
// Deterministic PRNG so a failure is reproducible.
let s = 0x1234567
const rnd = (n: number): number => {
s = (s * 1103515245 + 12345) & 0x7fffffff
return s % n
}
const reports = Array.from({ length: 200 }, () => sgr(rnd(120), 1 + rnd(300), 1 + rnd(200), rnd(2) === 0))
const stream = reports.join('')
const t = createTokenizer({ x10Mouse: true })
const seqTokens: string[] = []
let textLeak = ''
const drain = (tokens: Token[]): void => {
for (const tok of tokens) {
if (tok.type === 'sequence') {
seqTokens.push(tok.value)
} else {
textLeak += tok.value
}
}
}
for (const ch of stream) {
drain(t.feed(ch))
// Flush storm — but not at a lone-ESC boundary (the real watchdog
// re-arms while bytes are pending; a single flush between feeds never
// hits the truncation valve).
if (t.buffer() !== '\x1b') {
drain(t.flush())
}
}
expect(textLeak).toBe('')
expect(seqTokens.join('')).toBe(stream)
})
it('keeps real keystrokes intact while mouse reports reassemble around them', () => {
let s = 0x0badf00d
const rnd = (n: number): number => {
s = (s * 1103515245 + 12345) & 0x7fffffff
return s % n
}
const typed = 'abc 123 xyz'
const expectedKeys: string[] = []
const expectedSeqs: string[] = []
const parts: string[] = []
for (let k = 0; k < 120; k++) {
if (rnd(3) === 0) {
const ch = typed[rnd(typed.length)]!
expectedKeys.push(ch)
parts.push(ch)
} else {
const seq = sgr(rnd(64), 1 + rnd(200), 1 + rnd(100), rnd(2) === 0)
expectedSeqs.push(seq)
parts.push(seq)
}
}
const stream = parts.join('')
const t = createTokenizer({ x10Mouse: true })
const seqTokens: string[] = []
let text = ''
const drain = (tokens: Token[]): void => {
for (const tok of tokens) {
if (tok.type === 'sequence') {
seqTokens.push(tok.value)
} else {
text += tok.value
}
}
}
for (const ch of stream) {
drain(t.feed(ch))
if (t.buffer() !== '\x1b') {
drain(t.flush())
}
}
// Every typed character survives, in order; every report reassembles whole.
expect(text).toBe(expectedKeys.join(''))
expect(seqTokens).toEqual(expectedSeqs)
})
})

View File

@ -47,10 +47,18 @@ type TokenizerOptions = {
export function createTokenizer(options?: TokenizerOptions): Tokenizer {
let currentState: State = 'ground'
let currentBuffer = ''
// The control-sequence buffer kept across the previous flush, if any. Used
// as a one-tick truncation valve: a partial CSI mouse report normally
// reassembles on the very next feed, so if a flush sees the exact same
// buffer it kept last time (the continuation never arrived), we drop it.
let lastFlushedBuffer = ''
const x10Mouse = options?.x10Mouse ?? false
return {
feed(input: string): Token[] {
// Real bytes arrived — any kept partial is no longer stale.
lastFlushedBuffer = ''
const result = tokenize(input, currentState, currentBuffer, false, x10Mouse)
currentState = result.state.state
@ -64,12 +72,25 @@ export function createTokenizer(options?: TokenizerOptions): Tokenizer {
currentState = result.state.state
currentBuffer = result.state.buffer
// tokenize() keeps (doesn't emit) an incomplete control sequence on
// flush. If two consecutive flushes see the same buffer with no feed in
// between, the continuation is never coming (truncated write / killed
// process) — drop it so it can't fuse with the next keypress's bytes.
if (currentBuffer && currentBuffer === lastFlushedBuffer) {
currentState = 'ground'
currentBuffer = ''
lastFlushedBuffer = ''
} else {
lastFlushedBuffer = currentBuffer
}
return result.tokens
},
reset(): void {
currentState = 'ground'
currentBuffer = ''
lastFlushedBuffer = ''
},
buffer(): string {
@ -298,8 +319,10 @@ function tokenize(
// Handle end of input
if (result.state === 'ground') {
flushText()
} else if (flush) {
// Force output incomplete sequence
} else if (flush && result.state === 'escape') {
// A bare ESC with nothing after it is the Escape key — the one incomplete
// state a flush should turn into input (the classic ESCDELAY lone-ESC
// disambiguation: ESC alone vs. ESC as a sequence/meta prefix).
const remaining = data.slice(seqStart)
if (remaining) {
@ -308,7 +331,18 @@ function tokenize(
result.state = 'ground'
} else {
// Buffer incomplete sequence for next call
// Buffer the incomplete sequence. Two paths land here:
// - streaming (flush=false): normal carry-over to the next feed.
// - flush=true while still inside a multi-byte control sequence
// (csi/osc/dcs/apc/ss3/escapeIntermediate): we deliberately do NOT
// emit it. A half-arrived CSI mouse report (ESC[<btn;col;row M) is an
// unfinished sequence, not user input — force-emitting it is what
// injects `46M`/`35;46M` shards into the prompt during a render stall.
// Keeping it buffered lets the continuation reassemble on the next
// feed (the xterm.js state-machine discipline — partial sequences
// never become text). createTokenizer.flush() drops the buffer if it
// survives a second flush with no progress (a genuine truncation), so
// a stuck partial can never merge into the next keypress's bytes.
result.buffer = data.slice(seqStart)
}