From ccdc57dd966d80b09fafb93b05abce925b71e949 Mon Sep 17 00:00:00 2001 From: Chris Hopman Date: Tue, 23 Aug 2016 17:06:18 -0700 Subject: [PATCH] Fix handling of bad utf16 passed out of JS Summary: JSC's utf16 -> utf8 conversion crashes on encountering bad utf16. Instead, use our own conversion (conveniently copied from fbjni). Original fix thanks to rigdern (https://github.com/facebook/react-native/pull/9302) Reviewed By: mhorowitz Differential Revision: D3746947 fbshipit-source-id: 29887ca720f6a2b074f01f853bad28a083b273bc --- ReactCommon/cxxreact/Android.mk | 1 + ReactCommon/cxxreact/BUCK | 2 + ReactCommon/cxxreact/Unicode.cpp | 88 ++++++++++++++++++++++++++++++++ ReactCommon/cxxreact/Unicode.h | 14 +++++ ReactCommon/cxxreact/Value.h | 23 +++++++-- 5 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 ReactCommon/cxxreact/Unicode.cpp create mode 100644 ReactCommon/cxxreact/Unicode.h diff --git a/ReactCommon/cxxreact/Android.mk b/ReactCommon/cxxreact/Android.mk index 83f99b152..3be689a2c 100644 --- a/ReactCommon/cxxreact/Android.mk +++ b/ReactCommon/cxxreact/Android.mk @@ -21,6 +21,7 @@ LOCAL_SRC_FILES := \ NativeToJsBridge.cpp \ Platform.cpp \ Value.cpp \ + Unicode.cpp \ LOCAL_C_INCLUDES := $(LOCAL_PATH)/.. LOCAL_EXPORT_C_INCLUDES := $(LOCAL_C_INCLUDES) diff --git a/ReactCommon/cxxreact/BUCK b/ReactCommon/cxxreact/BUCK index dc4733746..2639aa51b 100644 --- a/ReactCommon/cxxreact/BUCK +++ b/ReactCommon/cxxreact/BUCK @@ -126,6 +126,7 @@ react_library( 'NativeToJsBridge.cpp', 'Platform.cpp', 'Value.cpp', + 'Unicode.cpp', ], headers = [ 'JSCLegacyProfiler.h', @@ -155,6 +156,7 @@ react_library( 'Platform.h', 'SystraceSection.h', 'Value.h', + 'Unicode.h', ], preprocessor_flags = [ '-DLOG_TAG="ReactNative"', diff --git a/ReactCommon/cxxreact/Unicode.cpp b/ReactCommon/cxxreact/Unicode.cpp new file mode 100644 index 000000000..5c515b4a7 --- /dev/null +++ b/ReactCommon/cxxreact/Unicode.cpp @@ -0,0 +1,88 @@ +// Copyright 2004-present Facebook. All Rights Reserved. + +#include "Unicode.h" + +namespace facebook { +namespace react { +namespace unicode { +namespace { + +// TODO(12827176): Don't duplicate this code here and fbjni. + +const uint16_t kUtf8OneByteBoundary = 0x80; +const uint16_t kUtf8TwoBytesBoundary = 0x800; +const uint16_t kUtf16HighSubLowBoundary = 0xD800; +const uint16_t kUtf16HighSubHighBoundary = 0xDC00; +const uint16_t kUtf16LowSubHighBoundary = 0xE000; + +// Calculate how many bytes are needed to convert an UTF16 string into UTF8 +// UTF16 string +size_t utf16toUTF8Length(const uint16_t* utf16String, size_t utf16StringLen) { + if (!utf16String || utf16StringLen == 0) { + return 0; + } + + uint32_t utf8StringLen = 0; + auto utf16StringEnd = utf16String + utf16StringLen; + auto idx16 = utf16String; + while (idx16 < utf16StringEnd) { + auto ch = *idx16++; + if (ch < kUtf8OneByteBoundary) { + utf8StringLen++; + } else if (ch < kUtf8TwoBytesBoundary) { + utf8StringLen += 2; + } else if ( + (ch >= kUtf16HighSubLowBoundary) && (ch < kUtf16HighSubHighBoundary) && + (idx16 < utf16StringEnd) && + (*idx16 >= kUtf16HighSubHighBoundary) && (*idx16 < kUtf16LowSubHighBoundary)) { + utf8StringLen += 4; + idx16++; + } else { + utf8StringLen += 3; + } + } + + return utf8StringLen; +} + +} // namespace + +std::string utf16toUTF8(const uint16_t* utf16String, size_t utf16StringLen) noexcept { + if (!utf16String || utf16StringLen <= 0) { + return ""; + } + + std::string utf8String(utf16toUTF8Length(utf16String, utf16StringLen), '\0'); + auto idx8 = utf8String.begin(); + auto idx16 = utf16String; + auto utf16StringEnd = utf16String + utf16StringLen; + while (idx16 < utf16StringEnd) { + auto ch = *idx16++; + if (ch < kUtf8OneByteBoundary) { + *idx8++ = (ch & 0x7F); + } else if (ch < kUtf8TwoBytesBoundary) { + *idx8++ = 0b11000000 | (ch >> 6); + *idx8++ = 0b10000000 | (ch & 0x3F); + } else if ( + (ch >= kUtf16HighSubLowBoundary) && (ch < kUtf16HighSubHighBoundary) && + (idx16 < utf16StringEnd) && + (*idx16 >= kUtf16HighSubHighBoundary) && (*idx16 < kUtf16LowSubHighBoundary)) { + auto ch2 = *idx16++; + uint8_t trunc_byte = (((ch >> 6) & 0x0F) + 1); + *idx8++ = 0b11110000 | (trunc_byte >> 2); + *idx8++ = 0b10000000 | ((trunc_byte & 0x03) << 4) | ((ch >> 2) & 0x0F); + *idx8++ = 0b10000000 | ((ch & 0x03) << 4) | ((ch2 >> 6) & 0x0F); + *idx8++ = 0b10000000 | (ch2 & 0x3F); + } else { + *idx8++ = 0b11100000 | (ch >> 12); + *idx8++ = 0b10000000 | ((ch >> 6) & 0x3F); + *idx8++ = 0b10000000 | (ch & 0x3F); + } + } + + return utf8String; +} + +} // namespace unicode +} // namespace react +} // namespace facebook diff --git a/ReactCommon/cxxreact/Unicode.h b/ReactCommon/cxxreact/Unicode.h new file mode 100644 index 000000000..cc1e25836 --- /dev/null +++ b/ReactCommon/cxxreact/Unicode.h @@ -0,0 +1,14 @@ +// Copyright 2004-present Facebook. All Rights Reserved. + +#pragma once + +#include +#include + +namespace facebook { +namespace react { +namespace unicode { +std::string utf16toUTF8(const uint16_t* utf16, size_t length) noexcept; +} +} +} diff --git a/ReactCommon/cxxreact/Value.h b/ReactCommon/cxxreact/Value.h index 8d924df15..e2326b095 100644 --- a/ReactCommon/cxxreact/Value.h +++ b/ReactCommon/cxxreact/Value.h @@ -15,6 +15,7 @@ #include #include "noncopyable.h" +#include "Unicode.h" #if WITH_FBJSCEXTENSIONS #include @@ -85,12 +86,24 @@ public: return JSStringGetMaximumUTF8CStringSize(m_string); } + /* + * JavaScriptCore is built with strict utf16 -> utf8 conversion. + * This means if JSC's built-in conversion function encounters a JavaScript + * string which contains half of a 32-bit UTF-16 symbol, it produces an error + * rather than returning a string. + * + * Instead of relying on this, we use our own utf16 -> utf8 conversion function + * which is more lenient and always returns a string. When an invalid UTF-16 + * string is provided, it'll likely manifest as a rendering glitch in the app for + * the invalid symbol. + * + * For details on JavaScript's unicode support see: + * https://mathiasbynens.be/notes/javascript-unicode + */ std::string str() const { - size_t reserved = utf8Size(); - char* bytes = new char[reserved]; - size_t length = JSStringGetUTF8CString(m_string, bytes, reserved) - 1; - std::unique_ptr retainedBytes(bytes); - return std::string(bytes, length); + const JSChar* utf16 = JSStringGetCharactersPtr(m_string); + int stringLength = JSStringGetLength(m_string); + return unicode::utf16toUTF8(utf16, stringLength); } // Assumes that utf8 is null terminated