fix(hermes-ink): collapse SGR mouse fragment guards into one flush-aware rule

When App's 50ms flush watchdog fires mid-CSI during a render stall, an
SGR mouse report (ESC[<btn;col;row M/m) is split across stdin chunks: the
tokenizer force-emits the buffered prefix and resets to ground, so both
the prefix and the ESC-less remainder reach InputEvent as nameless tokens.

The previous guard only matched a full `[<\d+;\d+;\d+[Mm]` fragment, so
the flushed prefixes (`ESC[<0;35;`) and the 1-/2-field and leading-`;`
tails (`46M`, `35;46M`, `;46M`) still leaked into the composer as
`46M35;40M...` during long sessions.

Replace the three would-be narrow regexes with one consolidated rule that
covers every split position. A `(?=...\d)` lookahead keeps typed `<`, `[`,
`;`, and `M` safe (no coordinate digit), and the embedded M/m terminator
in the param class leaves stuck-together fragments / prose intact. The
existing `!keypress.name` gate continues to protect real keystrokes, which
arrive one char per chunk with a name set.

Supersedes #29337 (covers the prefix-leak and leading-`;`/1-/2-field tail
cases that PR's two added guards missed).
This commit is contained in:
Brooklyn Nicholson
2026-06-03 19:05:26 -05:00
parent f99665f99a
commit 01c010e233
2 changed files with 121 additions and 9 deletions

View File

@ -0,0 +1,88 @@
import { describe, expect, it } from 'vitest'
import { INITIAL_STATE, type ParsedInput, type ParsedKey, parseMultipleKeypresses } from '../parse-keypress.js'
import { InputEvent } from './input-event.js'
/**
* Drive the real input pipeline (tokenizer → parseKeypress → InputEvent) for a
* sequence of stdin chunks. `null` chunks simulate App's 50ms flush watchdog
* firing mid-sequence. Returns the `.input` of the first key-kind token — i.e.
* what would actually be typed into the composer.
*/
function pipelineInput(...chunks: (string | null)[]): string {
let state = INITIAL_STATE
const all: ParsedInput[] = []
for (const chunk of chunks) {
const [keys, next] = parseMultipleKeypresses(state, chunk)
all.push(...keys)
state = next
}
const key = all.find((k): k is ParsedKey => k.kind === 'key')
return key ? new InputEvent(key).input : ''
}
describe('InputEvent SGR mouse fragment suppression', () => {
it('suppresses the buffered CSI prefix force-emitted by a mid-sequence flush', () => {
// The tokenizer buffers an incomplete CSI mouse sequence; the flush
// force-emits it as a nameless sequence token (ESC still attached). Intact
// `[<btn;col;row M` sequences are recovered as mouse/wheel events upstream,
// so only these terminatorless prefixes fall through to the guard.
expect(pipelineInput('\x1b[<0;35;', null)).toBe('')
expect(pipelineInput('\x1b[<0;35;46', null)).toBe('')
})
it('suppresses 1-, 2-, and 3-field ESC-less continuation tails', () => {
// These are the cases the older `/^\[<\d+;\d+;\d+[Mm]/` guard missed —
// the prefix was lost to the flush, only the tail reaches us as text.
for (const tail of ['46M', '6M', '35;46M', '0;35;46M']) {
expect(pipelineInput(tail)).toBe('')
}
})
it('suppresses leading-semicolon tails from a split at a `;` boundary', () => {
for (const tail of [';46M', ';35;46M']) {
expect(pipelineInput(tail)).toBe('')
}
})
it('suppresses both halves of a `ESC[<0; / 35;46M` split end to end', () => {
expect(pipelineInput('\x1b[<0;', null)).toBe('') // flushed prefix
expect(pipelineInput('35;46M')).toBe('') // continuation
})
it('suppresses release (`m`) terminators as well as press (`M`)', () => {
expect(pipelineInput('35;46m')).toBe('')
expect(pipelineInput('\x1b[<0;35;', null)).toBe('')
})
})
describe('InputEvent SGR mouse fragment guard does not eat real input', () => {
it('passes through lone bracket/angle/semicolon characters', () => {
// No coordinate digit → the `(?=…\d)` lookahead fails, so typing these
// characters is never swallowed.
expect(pipelineInput('<')).toBe('<')
expect(pipelineInput('[')).toBe('[')
expect(pipelineInput(';')).toBe(';')
})
it('passes through digits and the literal letter M', () => {
// These parse to a named key (number / m), so the `!keypress.name` gate
// skips suppression entirely.
expect(pipelineInput('5')).toBe('5')
expect(pipelineInput('M')).toBe('M')
})
it('passes through ordinary text', () => {
expect(pipelineInput('hello')).toBe('hello')
})
it('keeps two stuck-together fragments / coordinate-like prose intact', () => {
// An embedded M/m breaks the `[\d;]+...$` anchor, so a run like this is
// left for the upstream burst/recovery logic rather than blanked here.
expect(pipelineInput('1234;56;78M9;10;11M')).toBe('1234;56;78M9;10;11M')
})
})

View File

@ -5,6 +5,32 @@ import { Event } from './event.js'
const inputForSpecialSequence = (name: string): string =>
name === 'space' ? ' ' : name === 'return' || name === 'escape' ? '' : name
// SGR mouse-report fragment that leaked into a nameless text/sequence token.
// In alt-screen Ink enables MOUSE_ANY (DEC 1003), so every pixel of motion
// emits a CSI mouse report (ESC[<btn;col;row M/m). When a heavy React commit
// blocks the event loop past App's 50ms flush watchdog, that CSI can be split
// across stdin chunks at ANY byte boundary. The tokenizer flush force-emits
// the buffered prefix and resets to ground, so BOTH halves can surface as
// unparseable tokens that parseKeypress can't classify (name=''):
//
// - flushed prefix — ESC[< / [< / < + partial params, no terminator yet
// (e.g. `ESC[<0;35;`, `[<0;`, `<0;35;46`)
// - ESC-less tail — 1-, 2-, or 3-field digit run ending in M/m
// (e.g. `46M`, `;46M`, `35;46M`, `;35;46M`, `0;35;46M`)
//
// One regex covers every split position. The leading-`;` and 1-/2-field tails
// are the cases the older `/^\[<\d+;\d+;\d+[Mm]/` guard missed, which is how
// `46M35;40M...` ends up typed into the prompt during long sessions.
//
// Safety: the `(?=…\d)` lookahead requires at least one digit, so a typed `<`,
// `[`, `;`, or `M` (none of which carry a coordinate digit) is never matched;
// the embedded `M`/`m` in `[\d;]+` means a run like `1;2;3M9;10M` (two stuck-
// together fragments / prose) can't satisfy the `$` anchor and is left intact.
// Combined with the caller's `!keypress.name` gate — real typing arrives one
// char per chunk with a name set — no genuine keystroke is swallowed.
// eslint-disable-next-line no-control-regex
const SGR_MOUSE_FRAGMENT_LEAK_RE = /^(?:\x1b)?(?=(?:\[<|<)?[\d;]*\d)(?:\[<|<)?[\d;]+[Mm]?$/
export type Key = {
upArrow: boolean
downArrow: boolean
@ -83,15 +109,13 @@ function parseKey(keypress: ParsedKey): [Key, string] {
input = ''
}
// Suppress ESC-less SGR mouse fragments. When a heavy React commit blocks
// the event loop past App's 50ms NORMAL_TIMEOUT flush, a CSI split across
// stdin chunks gets its buffered ESC flushed as a lone Escape key, and the
// continuation arrives as a text token with name='' — which falls through
// all of parseKeypress's ESC-anchored regexes and the nonAlphanumericKeys
// clear below (name is falsy). The fragment then leaks into the prompt as
// literal `[<64;74;16M`. This is the same defensive sink as the F13 guard
// above; the underlying tokenizer-flush race is upstream of this layer.
if (!keypress.name && /^\[<\d+;\d+;\d+[Mm]/.test(input)) {
// Suppress SGR mouse-report fragments left over from a flush-boundary split
// (see SGR_MOUSE_FRAGMENT_LEAK_RE). Both the flushed CSI prefix and the
// ESC-less remainder reach here as nameless tokens that parseKeypress can't
// classify, so without this sink they leak into the prompt as `46M35;40M…`.
// This is the same defensive sink as the F13 guard above; the underlying
// tokenizer-flush race is upstream of this layer.
if (!keypress.name && SGR_MOUSE_FRAGMENT_LEAK_RE.test(input)) {
input = ''
}