From 8aec0dd5bb8d66cabf782df6168f0d9fd711619c Mon Sep 17 00:00:00 2001 From: Pedro Navarro Date: Fri, 6 Dec 2024 19:42:29 -0800 Subject: [PATCH] fix: UTF-16 surrogate handling. The data pointer needs to move before decoding the second surrogate, and not after. The first surrogate was begin decoded again, resulting in an invalid codepoint. This affected clipboard operations originating on Windows machines, where the text is encoded in UTF-16 and copying characters from a high plane (like emojis) was broken. --- src/lib/base/Unicode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/base/Unicode.cpp b/src/lib/base/Unicode.cpp index 585bcad30..15acd8530 100644 --- a/src/lib/base/Unicode.cpp +++ b/src/lib/base/Unicode.cpp @@ -459,9 +459,9 @@ String Unicode::doUTF16ToUTF8(const UInt8 *data, UInt32 n, bool *errors) setError(errors); toUTF8(dst, s_replacement, NULL); } else if (c >= 0x0000d800 && c <= 0x0000dbff) { - UInt32 c2 = decode16(data, byteSwapped); data += 2; --n; + UInt32 c2 = decode16(data, byteSwapped); if (c2 < 0x0000dc00 || c2 > 0x0000dfff) { // error -- [d800,dbff] not followed by [dc00,dfff] setError(errors);