test(hermes-ink): fuzz the tokenizer flush valve against fragment leaks
Hammer createTokenizer with the worst stalls a terminal can produce — split + flush at every interior byte, and a 200-report byte-by-byte feed that flushes after every single byte — and assert the two invariants that make the SGR-leak class structurally impossible: nothing ever leaks as a text token, and every complete report reassembles whole. A mixed mouse+keystroke variant proves real input survives the same storm.
This commit is contained in:
@ -1,6 +1,6 @@
|
|||||||
import { describe, expect, it } from 'vitest'
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
import { createTokenizer } from './tokenize.js'
|
import { createTokenizer, type Token } from './tokenize.js'
|
||||||
|
|
||||||
describe('tokenizer escape-sequence boundaries', () => {
|
describe('tokenizer escape-sequence boundaries', () => {
|
||||||
it('reassembles a CSI mouse sequence split across two feeds', () => {
|
it('reassembles a CSI mouse sequence split across two feeds', () => {
|
||||||
@ -63,3 +63,123 @@ describe('tokenizer state-aware flush', () => {
|
|||||||
expect(t.buffer()).toBe('')
|
expect(t.buffer()).toBe('')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Battle-test: prove the leak class is structurally impossible, not just that
|
||||||
|
// the known cases are patched. We hammer the tokenizer with the worst stalls a
|
||||||
|
// terminal can produce (split + flush at every byte) and assert the two hard
|
||||||
|
// invariants: nothing leaks as text, and every complete report reassembles.
|
||||||
|
describe('tokenizer fuzz: fragments never leak under a flush storm', () => {
|
||||||
|
const sgr = (btn: number, col: number, row: number, press: boolean): string =>
|
||||||
|
`\x1b[<${btn};${col};${row}${press ? 'M' : 'm'}`
|
||||||
|
|
||||||
|
it('reassembles a report split + flushed at every interior byte', () => {
|
||||||
|
const seq = sgr(0, 35, 46, true)
|
||||||
|
|
||||||
|
// Start at 2: an earlier split is the lone-ESC ESCDELAY boundary, which
|
||||||
|
// intentionally flushes to the Escape key. Terminals never split a mouse
|
||||||
|
// report there — a report is one atomic write — so it's not a real case.
|
||||||
|
for (let i = 2; i < seq.length; i++) {
|
||||||
|
const t = createTokenizer({ x10Mouse: true })
|
||||||
|
const tokens: Token[] = [...t.feed(seq.slice(0, i)), ...t.flush(), ...t.feed(seq.slice(i))]
|
||||||
|
|
||||||
|
expect(tokens).toEqual([{ type: 'sequence', value: seq }])
|
||||||
|
expect(t.buffer()).toBe('')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('feeds 200 random reports one byte at a time, flushing after every byte', () => {
|
||||||
|
// Deterministic PRNG so a failure is reproducible.
|
||||||
|
let s = 0x1234567
|
||||||
|
|
||||||
|
const rnd = (n: number): number => {
|
||||||
|
s = (s * 1103515245 + 12345) & 0x7fffffff
|
||||||
|
|
||||||
|
return s % n
|
||||||
|
}
|
||||||
|
|
||||||
|
const reports = Array.from({ length: 200 }, () => sgr(rnd(120), 1 + rnd(300), 1 + rnd(200), rnd(2) === 0))
|
||||||
|
const stream = reports.join('')
|
||||||
|
|
||||||
|
const t = createTokenizer({ x10Mouse: true })
|
||||||
|
const seqTokens: string[] = []
|
||||||
|
let textLeak = ''
|
||||||
|
|
||||||
|
const drain = (tokens: Token[]): void => {
|
||||||
|
for (const tok of tokens) {
|
||||||
|
if (tok.type === 'sequence') {
|
||||||
|
seqTokens.push(tok.value)
|
||||||
|
} else {
|
||||||
|
textLeak += tok.value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const ch of stream) {
|
||||||
|
drain(t.feed(ch))
|
||||||
|
|
||||||
|
// Flush storm — but not at a lone-ESC boundary (the real watchdog
|
||||||
|
// re-arms while bytes are pending; a single flush between feeds never
|
||||||
|
// hits the truncation valve).
|
||||||
|
if (t.buffer() !== '\x1b') {
|
||||||
|
drain(t.flush())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(textLeak).toBe('')
|
||||||
|
expect(seqTokens.join('')).toBe(stream)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('keeps real keystrokes intact while mouse reports reassemble around them', () => {
|
||||||
|
let s = 0x0badf00d
|
||||||
|
|
||||||
|
const rnd = (n: number): number => {
|
||||||
|
s = (s * 1103515245 + 12345) & 0x7fffffff
|
||||||
|
|
||||||
|
return s % n
|
||||||
|
}
|
||||||
|
|
||||||
|
const typed = 'abc 123 xyz'
|
||||||
|
const expectedKeys: string[] = []
|
||||||
|
const expectedSeqs: string[] = []
|
||||||
|
const parts: string[] = []
|
||||||
|
|
||||||
|
for (let k = 0; k < 120; k++) {
|
||||||
|
if (rnd(3) === 0) {
|
||||||
|
const ch = typed[rnd(typed.length)]!
|
||||||
|
expectedKeys.push(ch)
|
||||||
|
parts.push(ch)
|
||||||
|
} else {
|
||||||
|
const seq = sgr(rnd(64), 1 + rnd(200), 1 + rnd(100), rnd(2) === 0)
|
||||||
|
expectedSeqs.push(seq)
|
||||||
|
parts.push(seq)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = parts.join('')
|
||||||
|
const t = createTokenizer({ x10Mouse: true })
|
||||||
|
const seqTokens: string[] = []
|
||||||
|
let text = ''
|
||||||
|
|
||||||
|
const drain = (tokens: Token[]): void => {
|
||||||
|
for (const tok of tokens) {
|
||||||
|
if (tok.type === 'sequence') {
|
||||||
|
seqTokens.push(tok.value)
|
||||||
|
} else {
|
||||||
|
text += tok.value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const ch of stream) {
|
||||||
|
drain(t.feed(ch))
|
||||||
|
|
||||||
|
if (t.buffer() !== '\x1b') {
|
||||||
|
drain(t.flush())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Every typed character survives, in order; every report reassembles whole.
|
||||||
|
expect(text).toBe(expectedKeys.join(''))
|
||||||
|
expect(seqTokens).toEqual(expectedSeqs)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user