diff --git a/NativeScript/runtime/ArgConverter.mm b/NativeScript/runtime/ArgConverter.mm index bd6956c6..2197c820 100644 --- a/NativeScript/runtime/ArgConverter.mm +++ b/NativeScript/runtime/ArgConverter.mm @@ -929,8 +929,8 @@ } if ([obj isKindOfClass:[NSString class]]) { - const char* str = [obj UTF8String]; - args.GetReturnValue().Set(tns::ToV8String(isolate, str)); + NSString* nativeStr = (NSString*)obj; + args.GetReturnValue().Set(tns::ToV8String(isolate, nativeStr)); return; } diff --git a/NativeScript/runtime/DictionaryAdapter.mm b/NativeScript/runtime/DictionaryAdapter.mm index f6e85ce6..1c548163 100644 --- a/NativeScript/runtime/DictionaryAdapter.mm +++ b/NativeScript/runtime/DictionaryAdapter.mm @@ -116,8 +116,7 @@ - (id)nextObject { bool success = properties->Get(context, (uint)self->index_).ToLocal(&value); tns::Assert(success, isolate); self->index_++; - std::string result = tns::ToString(isolate, value); - return [NSString stringWithUTF8String:result.c_str()]; + return tns::ToNSString(isolate, value); } return nil; @@ -139,8 +138,7 @@ - (NSArray*)allObjects { Local value; bool success = properties->Get(context, i).ToLocal(&value); tns::Assert(success, isolate); - std::string result = tns::ToString(isolate, value); - [array addObject:[NSString stringWithUTF8String:result.c_str()]]; + [array addObject:tns::ToNSString(isolate, value)]; } return array; @@ -214,7 +212,7 @@ - (id)objectForKey:(id)aKey { bool success = obj->Get(context, key).ToLocal(&value); tns::Assert(success, isolate); } else if ([aKey isKindOfClass:[NSString class]]) { - const char* key = [aKey UTF8String]; + NSString* key = (NSString*)aKey; Local keyV8Str = tns::ToV8String(isolate, key); if (obj->IsMap()) { diff --git a/NativeScript/runtime/Helpers.h b/NativeScript/runtime/Helpers.h index 41b725d6..359ec9cc 100644 --- a/NativeScript/runtime/Helpers.h +++ b/NativeScript/runtime/Helpers.h @@ -106,8 +106,8 @@ inline NSString* ToNSString(const std::string& v) { length:v.length() encoding:NSUTF8StringEncoding] S_AUTORELEASE]; } -// this method is a copy of ToString to avoid needless std::string<->NSString -// conversions +// Reads the V8 string's native UTF-16 buffer directly so lone surrogates and +// embedded NUL survive the bridge; a UTF-8 round-trip loses both. inline NSString* ToNSString(v8::Isolate* isolate, const v8::Local& value) { if (value.IsEmpty()) { @@ -119,16 +119,15 @@ inline NSString* ToNSString(v8::Isolate* isolate, return ToNSString(isolate, obj); } - v8::String::Utf8Value result(isolate, value); + v8::String::Value result(isolate, value); - const char* val = *result; + const uint16_t* val = *result; if (val == nullptr) { return @""; } - return [[[NSString alloc] initWithBytes:*result - length:result.length() - encoding:NSUTF8StringEncoding] S_AUTORELEASE]; + return [NSString stringWithCharacters:(const unichar*)val + length:result.length()]; } #endif std::u16string ToUtf16String(v8::Isolate* isolate, diff --git a/NativeScript/runtime/Helpers.mm b/NativeScript/runtime/Helpers.mm index d3174360..6d3cab9d 100644 --- a/NativeScript/runtime/Helpers.mm +++ b/NativeScript/runtime/Helpers.mm @@ -24,13 +24,26 @@ } // namespace std::u16string tns::ToUtf16String(Isolate* isolate, const Local& value) { - std::string valueStr = tns::ToString(isolate, value); -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - // FIXME: std::codecvt_utf8_utf16 is deprecated - std::wstring_convert, char16_t> convert; - std::u16string value16 = convert.from_bytes(valueStr); + // Read the V8 string's native UTF-16 buffer directly instead of round-tripping + // through UTF-8, which corrupts lone surrogates (replaced with U+FFFD) and is + // slower. This also drops the deprecated std::codecvt_utf8_utf16. + if (value.IsEmpty()) { + return std::u16string(); + } + + if (value->IsStringObject()) { + Local obj = value.As()->ValueOf(); + return tns::ToUtf16String(isolate, obj); + } + + v8::String::Value result(isolate, value); + + uint16_t* val = *result; + if (val == nullptr) { + return std::u16string(); + } - return value16; + return std::u16string((char16_t*)val, result.length()); } std::vector tns::ToVector(const std::string& value) { diff --git a/TestRunner/app/tests/ApiTests.js b/TestRunner/app/tests/ApiTests.js index 84834d17..676b8f92 100644 --- a/TestRunner/app/tests/ApiTests.js +++ b/TestRunner/app/tests/ApiTests.js @@ -12,6 +12,56 @@ describe(module.id, function () { expect(object.hash).toBe(3); }); + it("preserves a lone high surrogate when bridging a JS string to NSString", function () { + // A lone high surrogate (U+D834, range U+D800-U+DBFF) is a valid JS string + // code unit but has no UTF-8 encoding. The old UTF-8 round-trip replaced it + // with U+FFFD; faithful UTF-16 bridging keeps it. Read the code unit straight + // out of the bridged string's UTF-16 buffer as a number: reading it back as a + // JS string would re-corrupt a lone surrogate, and converting it to UTF-8 to + // measure it is not reliable across OS versions. + var ns = NSString.stringWithString("\uD834"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xD834); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + + it("preserves a lone low surrogate when bridging a JS string to NSString", function () { + // The low surrogate range (U+DC00-U+DFFF) is a different bit pattern that also + // has no UTF-8 encoding and must survive the bridge intact; observed the same + // way as the high-surrogate case above. + var ns = NSString.stringWithString("\uDC00"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xDC00); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + + it("preserves an embedded NUL when bridging a JS string to NSString", function () { + // U+0000 is a valid JS code unit but terminates a C string, so a bridge + // that went through char* would cut "a\0b" down to "a". Faithful UTF-16 + // bridging keeps all three units. Read the NUL unit straight out of the + // NSString's buffer so the check does not lean on a native-to-JS conversion. + var withNul = "a" + String.fromCharCode(0) + "b"; + var ns = NSString.stringWithString(withNul); + expect(ns.length).toBe(3); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(1, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0x0000); // a char* bridge would have stopped before this + }); + it("NSArray from native (uncached) array access", function () { const res = TNSObjCTypes.new().getNSArrayOfNSURLs(); console.log(res);