Merge pull request #38564 from NousResearch/bb/tui-sgr-mouse-fragment-leak
fix(hermes-ink): reassemble split SGR mouse sequences at the tokenizer (supersedes #29337)
This commit is contained in:
@ -83,17 +83,10 @@ function parseKey(keypress: ParsedKey): [Key, string] {
|
||||
input = ''
|
||||
}
|
||||
|
||||
// Suppress ESC-less SGR mouse fragments. When a heavy React commit blocks
|
||||
// the event loop past App's 50ms NORMAL_TIMEOUT flush, a CSI split across
|
||||
// stdin chunks gets its buffered ESC flushed as a lone Escape key, and the
|
||||
// continuation arrives as a text token with name='' — which falls through
|
||||
// all of parseKeypress's ESC-anchored regexes and the nonAlphanumericKeys
|
||||
// clear below (name is falsy). The fragment then leaks into the prompt as
|
||||
// literal `[<64;74;16M`. This is the same defensive sink as the F13 guard
|
||||
// above; the underlying tokenizer-flush race is upstream of this layer.
|
||||
if (!keypress.name && /^\[<\d+;\d+;\d+[Mm]/.test(input)) {
|
||||
input = ''
|
||||
}
|
||||
// (SGR mouse-report fragments used to be scrubbed here. They no longer reach
|
||||
// this layer: the tokenizer keeps an incomplete CSI buffered across a
|
||||
// watchdog flush and reassembles it on the next feed instead of force-
|
||||
// emitting the partial as input. See termio/tokenize.ts.)
|
||||
|
||||
// Strip meta if it's still remaining after `parseKeypress`
|
||||
// TODO(vadimdemedes): remove this in the next major version.
|
||||
|
||||
@ -97,71 +97,37 @@ describe('mouse wheel modifier decoding', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('fragmented SGR mouse recovery', () => {
|
||||
it('re-synthesizes bracket-only SGR mouse tails as mouse events', () => {
|
||||
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '[<35;159;11M')
|
||||
describe('flush-boundary SGR mouse reassembly', () => {
|
||||
it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => {
|
||||
// chunk 1: heavy render stalls the loop, only the prefix is read
|
||||
let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;')
|
||||
expect(keys).toEqual([])
|
||||
|
||||
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
|
||||
// App's 50ms watchdog flushes (input=null) — must NOT emit the partial
|
||||
;[keys, state] = parseMultipleKeypresses(state, null)
|
||||
expect(keys).toEqual([])
|
||||
|
||||
// continuation arrives; the whole report reassembles, nothing leaks
|
||||
;[keys, state] = parseMultipleKeypresses(state, '46M')
|
||||
expect(keys).toEqual([expect.objectContaining({ kind: 'mouse', button: 0, col: 35, row: 46, action: 'press' })])
|
||||
})
|
||||
|
||||
it('re-synthesizes angle-only SGR mouse tails as mouse events', () => {
|
||||
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '<35;159;11M')
|
||||
it('drops a truncated mouse prefix after a second flush instead of leaking it', () => {
|
||||
let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;')
|
||||
|
||||
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
|
||||
;[keys, state] = parseMultipleKeypresses(state, null) // first flush keeps it
|
||||
;[keys, state] = parseMultipleKeypresses(state, null) // second flush drops it
|
||||
|
||||
expect(keys).toEqual([])
|
||||
expect(state.incomplete).toBe('')
|
||||
})
|
||||
|
||||
it('re-synthesizes degraded SGR mouse bursts without leaking prompt text', () => {
|
||||
const [events] = parseMultipleKeypresses(INITIAL_STATE, '5;142;11M<35;159;11M35;124;26M35;119;26Mtyped')
|
||||
it('re-synthesizes an orphaned X10 wheel tail (legacy mouse) into a scroll key', () => {
|
||||
// X10 wheel-up = ESC[M + (0x40+32) + col + row. If the ESC was flushed as a
|
||||
// lone Escape and the `[M…` payload arrives as text, resynthesize it.
|
||||
const tail = '[M' + String.fromCharCode(0x60) + '!!'
|
||||
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, tail)
|
||||
|
||||
expect(events.slice(0, 4)).toEqual([
|
||||
expect.objectContaining({ kind: 'mouse', button: 5, col: 142, row: 11 }),
|
||||
expect.objectContaining({ kind: 'mouse', button: 35, col: 159, row: 11 }),
|
||||
expect.objectContaining({ kind: 'mouse', button: 35, col: 124, row: 26 }),
|
||||
expect.objectContaining({ kind: 'mouse', button: 35, col: 119, row: 26 })
|
||||
])
|
||||
expect(events[4]).toMatchObject({ kind: 'key', sequence: 'typed' })
|
||||
})
|
||||
|
||||
it('keeps isolated semicolon text that only resembles a prefixless mouse report', () => {
|
||||
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'see 1;2;3M for details')
|
||||
|
||||
expect(key).toMatchObject({ kind: 'key', sequence: 'see 1;2;3M for details' })
|
||||
})
|
||||
|
||||
it('does not match prefixless fragments inside longer digit runs', () => {
|
||||
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, '1234;56;78M9;10;11M')
|
||||
|
||||
expect(key).toMatchObject({ kind: 'key', sequence: '1234;56;78M9;10;11M' })
|
||||
})
|
||||
|
||||
it('swallows a fully degraded mouse-burst noise blob without leaking prompt text', () => {
|
||||
// Captured from Windows Terminal during a heavy tool-call render: the event
|
||||
// loop blocked past App's 50ms flush timer, so a long burst of SGR mouse
|
||||
// reports (mode 1003 any-motion) arrived as text with prefixes AND
|
||||
// too degraded for SGR_MOUSE_FRAGMENT_RE (1- and 2-param remnants, a
|
||||
// stray focus-in `[I`), so without the whole-text noise fast path the entire
|
||||
// blob types into the composer and locks the user out.
|
||||
const blob =
|
||||
'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M'
|
||||
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
|
||||
|
||||
expect(events).toEqual([])
|
||||
})
|
||||
|
||||
it('keeps plain prose that only contains scattered M and m letters', () => {
|
||||
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'Mmm MMM mmm yummy')
|
||||
|
||||
expect(key).toMatchObject({ kind: 'key', sequence: 'Mmm MMM mmm yummy' })
|
||||
})
|
||||
|
||||
it('swallows noise wholesale even when it contains intact recoverable fragments', () => {
|
||||
// A noise blob can carry a few intact `<b;c;r M` fragments amid the chewed
|
||||
// shards. The whole-text noise check must run BEFORE fragment recovery —
|
||||
// otherwise parseTextWithSgrMouseFragments returns non-null and emits a
|
||||
// pile of recovered mouse events instead of dropping the blob wholesale.
|
||||
const blob = '<35;159;11M;44M20;43M0M7M<35;124;26M;47;40M9;15;32M5M2M'
|
||||
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
|
||||
|
||||
expect(events).toEqual([])
|
||||
expect(key).toMatchObject({ name: 'wheelup' })
|
||||
})
|
||||
})
|
||||
|
||||
@ -63,35 +63,6 @@ const XTVERSION_RE = /^\x1bP>\|(.*?)(?:\x07|\x1b\\)$/s
|
||||
// Button 32=left-drag (0x20 | motion-bit). Plain 0/1/2 = left/mid/right click.
|
||||
// eslint-disable-next-line no-control-regex
|
||||
const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/
|
||||
const SGR_MOUSE_FRAGMENT_RE = /(?<!\d)(?:\[<|<)?(?:[0-9]|[1-9][0-9]|1\d{2}|2[0-4]\d|25[0-5]);\d+;\d+[Mm]/g
|
||||
|
||||
// Whole-text mouse-burst noise fast path. When a heavy render blocks the event
|
||||
// loop past App's 50ms flush watchdog, a long burst of SGR mouse reports (mode
|
||||
// 1003 any-motion / 1006 SGR) can arrive as a single text token with prefixes
|
||||
// AND coordinate digits chewed off across many partial reads. The surviving
|
||||
// shards (1- and 2-param remnants, stray focus-in `[I`, lone `M`/`m`
|
||||
// terminators) are too degraded for SGR_MOUSE_FRAGMENT_RE, so the leftover
|
||||
// tail leaks into the composer and locks the user out (they can't type or exit).
|
||||
//
|
||||
// If the ENTIRE text token is drawn only from the mouse-leak alphabet
|
||||
// (`[ ] < ; I M m`, digits, and the stray spaces a burst can carry) AND it
|
||||
// carries the structural signature of mouse coordinates — ≥3 `M`/`m`
|
||||
// terminators, at least one digit, and at least one `;` separator — swallow it
|
||||
// wholesale. All three constraints together preserve real prose: `Mmm MMM mmm`
|
||||
// has no digit and no `;`, `see 1;2;3M for details` contains disqualifying
|
||||
// letters, and `1234;56;78M9;10;11M` has only two terminators.
|
||||
// eslint-disable-next-line no-control-regex
|
||||
const MOUSE_BURST_NOISE_RE = /^(?=[\s\S]*\d)(?=[\s\S]*;)(?=(?:[^Mm]*[Mm]){3})[\d;<\[\]IMm \x1b]+$/
|
||||
|
||||
// Residual-shard variant for the gaps BETWEEN / AFTER recovered fragments
|
||||
// inside parseTextWithSgrMouseFragments. A real recovery run leaves degraded
|
||||
// remnants (e.g. `M6M`, `7M;220;1MM0M`, lone `;157;47M`) that are pure
|
||||
// mouse-leak alphabet but too short to satisfy the ≥3-terminator whole-text
|
||||
// rule. Swallow such a residue only when it is pure alphabet AND carries a
|
||||
// digit AND at least one `M`/`m` — a prose gap like ` for details ` contains
|
||||
// disqualifying letters and never matches.
|
||||
// eslint-disable-next-line no-control-regex
|
||||
const MOUSE_BURST_RESIDUE_RE = /^(?=[^\d]*\d)(?=[^Mm]*[Mm])[\d;<\[\]IMm \x1b]+$/
|
||||
|
||||
function createPasteKey(content: string): ParsedKey {
|
||||
return {
|
||||
@ -296,32 +267,18 @@ export function parseMultipleKeypresses(
|
||||
} else if (token.type === 'text') {
|
||||
if (inPaste) {
|
||||
pasteBuffer += token.value
|
||||
} else if (MOUSE_BURST_NOISE_RE.test(token.value)) {
|
||||
// Fully degraded mouse-burst noise — a heavy render (e.g. a sudo /
|
||||
// secret prompt repaint) blocked the event loop past App's 50ms flush
|
||||
// watchdog, so a long burst of SGR mouse reports arrived as text with
|
||||
// prefixes AND coordinate digits chewed off. Checked BEFORE fragment
|
||||
// recovery: a noise blob can still contain a few intact `<b;c;r M`
|
||||
// fragments, and parseTextWithSgrMouseFragments would then return
|
||||
// non-null and emit a pile of recovered mouse events instead of
|
||||
// dropping the blob wholesale. Swallow it here so it never leaks into
|
||||
// the composer (and we skip the extra fragment-recovery work mid-stall).
|
||||
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
|
||||
// Orphaned X10 wheel tail (legacy 1000/1002 terminals, fullscreen
|
||||
// only). If the buffered ESC was flushed as a lone Escape and the X10
|
||||
// payload (`[M` + 3 bytes) arrived as the next text token, re-synthesize
|
||||
// with ESC so the scroll event still fires instead of leaking. SGR mouse
|
||||
// reports no longer reach this branch — the tokenizer keeps an
|
||||
// incomplete CSI buffered across a flush and reassembles it (see
|
||||
// termio/tokenize.ts), so the old fragment/burst recovery is gone.
|
||||
const resynthesized = '\x1b' + token.value
|
||||
keys.push(parseKeypress(resynthesized))
|
||||
} else {
|
||||
const mouseFragments = parseTextWithSgrMouseFragments(token.value)
|
||||
|
||||
if (mouseFragments) {
|
||||
keys.push(...mouseFragments)
|
||||
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
|
||||
// Orphaned X10 wheel tail (fullscreen only — mouse tracking is off
|
||||
// otherwise). A heavy render blocked the event loop past App's 50ms
|
||||
// flush timer, so the buffered ESC was flushed as a lone Escape and
|
||||
// the continuation arrived as text. Re-synthesize with ESC so the
|
||||
// scroll event still fires instead of leaking into the prompt.
|
||||
const resynthesized = '\x1b' + token.value
|
||||
keys.push(parseKeypress(resynthesized))
|
||||
} else {
|
||||
keys.push(parseKeypress(token.value))
|
||||
}
|
||||
keys.push(parseKeypress(token.value))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -663,87 +620,6 @@ function parseMouseEvent(s: string): ParsedMouse | null {
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeSgrMouseFragment(fragment: string): string {
|
||||
if (fragment.startsWith('[<')) {
|
||||
return `\x1b${fragment}`
|
||||
}
|
||||
|
||||
if (fragment.startsWith('<')) {
|
||||
return `\x1b[${fragment}`
|
||||
}
|
||||
|
||||
return `\x1b[<${fragment}`
|
||||
}
|
||||
|
||||
function parseSgrMouseFragment(fragment: string): ParsedInput {
|
||||
const sequence = normalizeSgrMouseFragment(fragment)
|
||||
return parseMouseEvent(sequence) ?? parseKeypress(sequence)
|
||||
}
|
||||
|
||||
function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
|
||||
SGR_MOUSE_FRAGMENT_RE.lastIndex = 0
|
||||
|
||||
const matches = [...text.matchAll(SGR_MOUSE_FRAGMENT_RE)]
|
||||
if (matches.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const parsed: ParsedInput[] = []
|
||||
let cursor = 0
|
||||
let consumedAny = false
|
||||
|
||||
for (let i = 0; i < matches.length;) {
|
||||
const first = matches[i]!
|
||||
const run: RegExpMatchArray[] = [first]
|
||||
let runEnd = first.index! + first[0].length
|
||||
i++
|
||||
|
||||
while (i < matches.length && matches[i]!.index === runEnd) {
|
||||
run.push(matches[i]!)
|
||||
runEnd = matches[i]!.index! + matches[i]![0].length
|
||||
i++
|
||||
}
|
||||
|
||||
const hasExplicitMousePrefix = run.some(match => match[0].startsWith('[<') || match[0].startsWith('<'))
|
||||
const isFragmentBurst = run.length > 1
|
||||
|
||||
if (!hasExplicitMousePrefix && !isFragmentBurst) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (first.index! > cursor) {
|
||||
const gap = text.slice(cursor, first.index!)
|
||||
// Skip pure mouse-leak residue between recovered fragments; only emit
|
||||
// real text gaps as keypresses.
|
||||
if (!MOUSE_BURST_RESIDUE_RE.test(gap)) {
|
||||
parsed.push(parseKeypress(gap))
|
||||
}
|
||||
}
|
||||
|
||||
for (const match of run) {
|
||||
parsed.push(parseSgrMouseFragment(match[0]))
|
||||
}
|
||||
|
||||
cursor = runEnd
|
||||
consumedAny = true
|
||||
}
|
||||
|
||||
if (!consumedAny) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (cursor < text.length) {
|
||||
const tail = text.slice(cursor)
|
||||
// Swallow a pure mouse-leak residue tail (the head fragments recovered, but
|
||||
// the burst trailed off into chewed-up shards). Emit only real trailing text.
|
||||
if (!MOUSE_BURST_RESIDUE_RE.test(tail)) {
|
||||
parsed.push(parseKeypress(tail))
|
||||
}
|
||||
}
|
||||
|
||||
return parsed
|
||||
}
|
||||
|
||||
function parseKeypress(s: string = ''): ParsedKey {
|
||||
let parts
|
||||
|
||||
|
||||
185
ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts
Normal file
185
ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts
Normal file
@ -0,0 +1,185 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { createTokenizer, type Token } from './tokenize.js'
|
||||
|
||||
describe('tokenizer escape-sequence boundaries', () => {
|
||||
it('reassembles a CSI mouse sequence split across two feeds', () => {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
|
||||
expect(t.feed('\x1b[<0;35;')).toEqual([])
|
||||
expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
|
||||
expect(t.buffer()).toBe('')
|
||||
})
|
||||
})
|
||||
|
||||
describe('tokenizer state-aware flush', () => {
|
||||
it('does not emit an incomplete CSI on flush — it keeps it for reassembly', () => {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
|
||||
// A render stall lets App's watchdog flush mid-sequence. The buffered CSI
|
||||
// prefix must NOT be emitted (that is the `46M…` leak); it stays buffered.
|
||||
expect(t.feed('\x1b[<0;35;')).toEqual([])
|
||||
expect(t.flush()).toEqual([])
|
||||
expect(t.buffer()).toBe('\x1b[<0;35;')
|
||||
|
||||
// The continuation arrives on the next feed and the whole report
|
||||
// reassembles into a single clean sequence token — nothing leaked.
|
||||
expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
|
||||
expect(t.buffer()).toBe('')
|
||||
})
|
||||
|
||||
it('drops a partial control sequence that survives a second flush (truncation)', () => {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
|
||||
expect(t.feed('\x1b[<0;35;')).toEqual([])
|
||||
expect(t.flush()).toEqual([]) // first flush keeps the buffer
|
||||
expect(t.buffer()).toBe('\x1b[<0;35;')
|
||||
|
||||
// Continuation never arrived: the next flush sees the same buffer and
|
||||
// drops it so it can't fuse with the next keypress's bytes.
|
||||
expect(t.flush()).toEqual([])
|
||||
expect(t.buffer()).toBe('')
|
||||
})
|
||||
|
||||
it('still emits a bare ESC on flush so the Escape key works', () => {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
|
||||
expect(t.feed('\x1b')).toEqual([])
|
||||
expect(t.flush()).toEqual([{ type: 'sequence', value: '\x1b' }])
|
||||
expect(t.buffer()).toBe('')
|
||||
})
|
||||
|
||||
it('reassembles even when a flush fires between every byte of the report', () => {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
|
||||
// Pathological stall: a flush between each chunk. As long as the
|
||||
// continuation eventually arrives, no fragment is ever emitted as input.
|
||||
for (const chunk of ['\x1b[', '<', '0;', '35;', '46']) {
|
||||
expect(t.feed(chunk)).toEqual([])
|
||||
expect(t.flush()).toEqual([])
|
||||
}
|
||||
|
||||
expect(t.feed('M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }])
|
||||
expect(t.buffer()).toBe('')
|
||||
})
|
||||
})
|
||||
|
||||
// Battle-test: prove the leak class is structurally impossible, not just that
|
||||
// the known cases are patched. We hammer the tokenizer with the worst stalls a
|
||||
// terminal can produce (split + flush at every byte) and assert the two hard
|
||||
// invariants: nothing leaks as text, and every complete report reassembles.
|
||||
describe('tokenizer fuzz: fragments never leak under a flush storm', () => {
|
||||
const sgr = (btn: number, col: number, row: number, press: boolean): string =>
|
||||
`\x1b[<${btn};${col};${row}${press ? 'M' : 'm'}`
|
||||
|
||||
it('reassembles a report split + flushed at every interior byte', () => {
|
||||
const seq = sgr(0, 35, 46, true)
|
||||
|
||||
// Start at 2: an earlier split is the lone-ESC ESCDELAY boundary, which
|
||||
// intentionally flushes to the Escape key. Terminals never split a mouse
|
||||
// report there — a report is one atomic write — so it's not a real case.
|
||||
for (let i = 2; i < seq.length; i++) {
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
const tokens: Token[] = [...t.feed(seq.slice(0, i)), ...t.flush(), ...t.feed(seq.slice(i))]
|
||||
|
||||
expect(tokens).toEqual([{ type: 'sequence', value: seq }])
|
||||
expect(t.buffer()).toBe('')
|
||||
}
|
||||
})
|
||||
|
||||
it('feeds 200 random reports one byte at a time, flushing after every byte', () => {
|
||||
// Deterministic PRNG so a failure is reproducible.
|
||||
let s = 0x1234567
|
||||
|
||||
const rnd = (n: number): number => {
|
||||
s = (s * 1103515245 + 12345) & 0x7fffffff
|
||||
|
||||
return s % n
|
||||
}
|
||||
|
||||
const reports = Array.from({ length: 200 }, () => sgr(rnd(120), 1 + rnd(300), 1 + rnd(200), rnd(2) === 0))
|
||||
const stream = reports.join('')
|
||||
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
const seqTokens: string[] = []
|
||||
let textLeak = ''
|
||||
|
||||
const drain = (tokens: Token[]): void => {
|
||||
for (const tok of tokens) {
|
||||
if (tok.type === 'sequence') {
|
||||
seqTokens.push(tok.value)
|
||||
} else {
|
||||
textLeak += tok.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const ch of stream) {
|
||||
drain(t.feed(ch))
|
||||
|
||||
// Flush storm — but not at a lone-ESC boundary (the real watchdog
|
||||
// re-arms while bytes are pending; a single flush between feeds never
|
||||
// hits the truncation valve).
|
||||
if (t.buffer() !== '\x1b') {
|
||||
drain(t.flush())
|
||||
}
|
||||
}
|
||||
|
||||
expect(textLeak).toBe('')
|
||||
expect(seqTokens.join('')).toBe(stream)
|
||||
})
|
||||
|
||||
it('keeps real keystrokes intact while mouse reports reassemble around them', () => {
|
||||
let s = 0x0badf00d
|
||||
|
||||
const rnd = (n: number): number => {
|
||||
s = (s * 1103515245 + 12345) & 0x7fffffff
|
||||
|
||||
return s % n
|
||||
}
|
||||
|
||||
const typed = 'abc 123 xyz'
|
||||
const expectedKeys: string[] = []
|
||||
const expectedSeqs: string[] = []
|
||||
const parts: string[] = []
|
||||
|
||||
for (let k = 0; k < 120; k++) {
|
||||
if (rnd(3) === 0) {
|
||||
const ch = typed[rnd(typed.length)]!
|
||||
expectedKeys.push(ch)
|
||||
parts.push(ch)
|
||||
} else {
|
||||
const seq = sgr(rnd(64), 1 + rnd(200), 1 + rnd(100), rnd(2) === 0)
|
||||
expectedSeqs.push(seq)
|
||||
parts.push(seq)
|
||||
}
|
||||
}
|
||||
|
||||
const stream = parts.join('')
|
||||
const t = createTokenizer({ x10Mouse: true })
|
||||
const seqTokens: string[] = []
|
||||
let text = ''
|
||||
|
||||
const drain = (tokens: Token[]): void => {
|
||||
for (const tok of tokens) {
|
||||
if (tok.type === 'sequence') {
|
||||
seqTokens.push(tok.value)
|
||||
} else {
|
||||
text += tok.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const ch of stream) {
|
||||
drain(t.feed(ch))
|
||||
|
||||
if (t.buffer() !== '\x1b') {
|
||||
drain(t.flush())
|
||||
}
|
||||
}
|
||||
|
||||
// Every typed character survives, in order; every report reassembles whole.
|
||||
expect(text).toBe(expectedKeys.join(''))
|
||||
expect(seqTokens).toEqual(expectedSeqs)
|
||||
})
|
||||
})
|
||||
@ -47,10 +47,18 @@ type TokenizerOptions = {
|
||||
export function createTokenizer(options?: TokenizerOptions): Tokenizer {
|
||||
let currentState: State = 'ground'
|
||||
let currentBuffer = ''
|
||||
// The control-sequence buffer kept across the previous flush, if any. Used
|
||||
// as a one-tick truncation valve: a partial CSI mouse report normally
|
||||
// reassembles on the very next feed, so if a flush sees the exact same
|
||||
// buffer it kept last time (the continuation never arrived), we drop it.
|
||||
let lastFlushedBuffer = ''
|
||||
const x10Mouse = options?.x10Mouse ?? false
|
||||
|
||||
return {
|
||||
feed(input: string): Token[] {
|
||||
// Real bytes arrived — any kept partial is no longer stale.
|
||||
lastFlushedBuffer = ''
|
||||
|
||||
const result = tokenize(input, currentState, currentBuffer, false, x10Mouse)
|
||||
|
||||
currentState = result.state.state
|
||||
@ -64,12 +72,25 @@ export function createTokenizer(options?: TokenizerOptions): Tokenizer {
|
||||
currentState = result.state.state
|
||||
currentBuffer = result.state.buffer
|
||||
|
||||
// tokenize() keeps (doesn't emit) an incomplete control sequence on
|
||||
// flush. If two consecutive flushes see the same buffer with no feed in
|
||||
// between, the continuation is never coming (truncated write / killed
|
||||
// process) — drop it so it can't fuse with the next keypress's bytes.
|
||||
if (currentBuffer && currentBuffer === lastFlushedBuffer) {
|
||||
currentState = 'ground'
|
||||
currentBuffer = ''
|
||||
lastFlushedBuffer = ''
|
||||
} else {
|
||||
lastFlushedBuffer = currentBuffer
|
||||
}
|
||||
|
||||
return result.tokens
|
||||
},
|
||||
|
||||
reset(): void {
|
||||
currentState = 'ground'
|
||||
currentBuffer = ''
|
||||
lastFlushedBuffer = ''
|
||||
},
|
||||
|
||||
buffer(): string {
|
||||
@ -298,8 +319,10 @@ function tokenize(
|
||||
// Handle end of input
|
||||
if (result.state === 'ground') {
|
||||
flushText()
|
||||
} else if (flush) {
|
||||
// Force output incomplete sequence
|
||||
} else if (flush && result.state === 'escape') {
|
||||
// A bare ESC with nothing after it is the Escape key — the one incomplete
|
||||
// state a flush should turn into input (the classic ESCDELAY lone-ESC
|
||||
// disambiguation: ESC alone vs. ESC as a sequence/meta prefix).
|
||||
const remaining = data.slice(seqStart)
|
||||
|
||||
if (remaining) {
|
||||
@ -308,7 +331,18 @@ function tokenize(
|
||||
|
||||
result.state = 'ground'
|
||||
} else {
|
||||
// Buffer incomplete sequence for next call
|
||||
// Buffer the incomplete sequence. Two paths land here:
|
||||
// - streaming (flush=false): normal carry-over to the next feed.
|
||||
// - flush=true while still inside a multi-byte control sequence
|
||||
// (csi/osc/dcs/apc/ss3/escapeIntermediate): we deliberately do NOT
|
||||
// emit it. A half-arrived CSI mouse report (ESC[<btn;col;row M) is an
|
||||
// unfinished sequence, not user input — force-emitting it is what
|
||||
// injects `46M`/`35;46M` shards into the prompt during a render stall.
|
||||
// Keeping it buffered lets the continuation reassemble on the next
|
||||
// feed (the xterm.js state-machine discipline — partial sequences
|
||||
// never become text). createTokenizer.flush() drops the buffer if it
|
||||
// survives a second flush with no progress (a genuine truncation), so
|
||||
// a stuck partial can never merge into the next keypress's bytes.
|
||||
result.buffer = data.slice(seqStart)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user