From 8aec0dd5bb8d66cabf782df6168f0d9fd711619c Mon Sep 17 00:00:00 2001
From: Pedro Navarro <pnavarro@netflix.com>
Date: Fri, 6 Dec 2024 19:42:29 -0800
Subject: [PATCH] fix: UTF-16 surrogate handling.

The data pointer needs to move before decoding the second
surrogate, and not after. The first surrogate was begin decoded
again, resulting in an invalid codepoint.

This affected clipboard operations originating on Windows machines,
where the text is encoded in UTF-16 and copying characters from a
high plane (like emojis) was broken.
---
 src/lib/base/Unicode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/base/Unicode.cpp b/src/lib/base/Unicode.cpp
index 585bcad30..15acd8530 100644
--- a/src/lib/base/Unicode.cpp
+++ b/src/lib/base/Unicode.cpp
@@ -459,9 +459,9 @@ String Unicode::doUTF16ToUTF8(const UInt8 *data, UInt32 n, bool *errors)
       setError(errors);
       toUTF8(dst, s_replacement, NULL);
     } else if (c >= 0x0000d800 && c <= 0x0000dbff) {
-      UInt32 c2 = decode16(data, byteSwapped);
       data += 2;
       --n;
+      UInt32 c2 = decode16(data, byteSwapped);
       if (c2 < 0x0000dc00 || c2 > 0x0000dfff) {
         // error -- [d800,dbff] not followed by [dc00,dfff]
         setError(errors);