fix: UTF-16 surrogate handling.
The data pointer needs to move before decoding the second surrogate, and not after. The first surrogate was begin decoded again, resulting in an invalid codepoint. This affected clipboard operations originating on Windows machines, where the text is encoded in UTF-16 and copying characters from a high plane (like emojis) was broken.
This commit is contained in:
committed by
Chris Rizzitello
parent
ddb443b550
commit
8aec0dd5bb
@ -459,9 +459,9 @@ String Unicode::doUTF16ToUTF8(const UInt8 *data, UInt32 n, bool *errors)
|
||||
setError(errors);
|
||||
toUTF8(dst, s_replacement, NULL);
|
||||
} else if (c >= 0x0000d800 && c <= 0x0000dbff) {
|
||||
UInt32 c2 = decode16(data, byteSwapped);
|
||||
data += 2;
|
||||
--n;
|
||||
UInt32 c2 = decode16(data, byteSwapped);
|
||||
if (c2 < 0x0000dc00 || c2 > 0x0000dfff) {
|
||||
// error -- [d800,dbff] not followed by [dc00,dfff]
|
||||
setError(errors);
|
||||
|
||||
Reference in New Issue
Block a user