Fix for Unicode decoding issue when using incremental networking.

Summary:
This is **a critical issue**.

The issue arises when incremental networking is enabled from JS by setting `onprogress` or `onload` on an `XMLHttpRequest` object.

The results:

![example1](https://cloud.githubusercontent.com/assets/2270433/18829964/5a54ff30-83e7-11e6-9806-97857dce0430.png)

![example2](https://cloud.githubusercontent.com/assets/2270433/18829966/5bf40a66-83e7-11e6-84e6-9e4d76ba4f8b.png)

Unicode characters get corrupted seemingly in random. The issue is from the way Unicode character parsing is handled in `RCTNetworking.mm`. When incremental networking is enabled, each chunk of data is decoded and passed to JS:

```objective-c
incrementalDataBlock = ^(NSData *data, int64_t progress, int64_t total) {
NSString *responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
if (!responseString) {
  RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
  return;
}
NSArray<id> *responseJSON = @[task.requestID, responseString, @(prog
Closes https://github.com/facebook/react-native/pull/10110

Reviewed By: yungsters

Differential Revision: D4101533

Pulled By: fkgozali

fbshipit-source-id: 2674eaf0dd4568889070c6cde5cdf12edc5be521
This commit is contained in:
Leo Natan
2016-10-31 13:06:13 -07:00
committed by Facebook Github Bot
parent 6d3e074dd4
commit 3ac3749ac3
3 changed files with 146 additions and 11 deletions

View File

@@ -333,22 +333,60 @@ RCT_EXPORT_MODULE()
return callback(nil, nil);
}
+ (NSString *)decodeTextData:(NSData *)data fromResponse:(NSURLResponse *)response
+ (NSString *)decodeTextData:(NSData *)data fromResponse:(NSURLResponse *)response withCarryData:(NSMutableData*)inputCarryData
{
NSStringEncoding encoding = NSUTF8StringEncoding;
if (response.textEncodingName) {
CFStringEncoding cfEncoding = CFStringConvertIANACharSetNameToEncoding((CFStringRef)response.textEncodingName);
encoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding);
}
NSMutableData* currentCarryData = inputCarryData ?: [NSMutableData new];
[currentCarryData appendData:data];
// Attempt to decode text
NSString *encodedResponse = [[NSString alloc] initWithData:data encoding:encoding];
if (!encodedResponse && data.length) {
// We don't have an encoding, or the encoding is incorrect, so now we try to guess
[NSString stringEncodingForData:data
encodingOptions:nil
convertedString:&encodedResponse
usedLossyConversion:NULL];
NSString *encodedResponse = [[NSString alloc] initWithData:currentCarryData encoding:encoding];
if (!encodedResponse && data.length > 0) {
if (encoding == NSUTF8StringEncoding && inputCarryData) {
// If decode failed, we attempt to trim broken character bytes from the data.
// At this time, only UTF-8 support is enabled. Multibyte encodings, such as UTF-16 and UTF-32, require a lot of additional work
// to determine wether BOM was included in the first data packet. If so, save it, and attach it to each new data packet. If not,
// an encoding has to be selected with a suitable byte order (for ARM iOS, it would be little endianness).
CFStringEncoding cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding);
// Taking a single unichar is not good enough, due to Unicode combining character sequences or characters outside the BMP.
// See https://www.objc.io/issues/9-strings/unicode/#common-pitfalls
// We'll attempt with a sequence of two characters, the most common combining character sequence and characters outside the BMP (emojis).
CFIndex maxCharLength = CFStringGetMaximumSizeForEncoding(2, cfEncoding);
NSUInteger removedBytes = 1;
while (removedBytes < maxCharLength) {
encodedResponse = [[NSString alloc] initWithData:[currentCarryData subdataWithRange:NSMakeRange(0, currentCarryData.length - removedBytes)]
encoding:encoding];
if (encodedResponse != nil) {
break;
}
removedBytes += 1;
}
} else {
// We don't have an encoding, or the encoding is incorrect, so now we try to guess
[NSString stringEncodingForData:data
encodingOptions:@{ NSStringEncodingDetectionSuggestedEncodingsKey: @[ @(encoding) ] }
convertedString:&encodedResponse
usedLossyConversion:NULL];
}
}
if (inputCarryData) {
NSUInteger encodedResponseLength = [encodedResponse dataUsingEncoding:encoding].length;
NSData* newCarryData = [currentCarryData subdataWithRange:NSMakeRange(encodedResponseLength, currentCarryData.length - encodedResponseLength)];
[inputCarryData setData:newCarryData];
}
return encodedResponse;
}
@@ -364,7 +402,8 @@ RCT_EXPORT_MODULE()
NSString *responseString;
if ([responseType isEqualToString:@"text"]) {
responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
// No carry storage is required here because the entire data has been loaded.
responseString = [RCTNetworking decodeTextData:data fromResponse:task.response withCarryData:nil];
if (!responseString) {
RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
return;
@@ -417,13 +456,28 @@ RCT_EXPORT_MODULE()
RCTURLRequestProgressBlock downloadProgressBlock = nil;
if (incrementalUpdates) {
if ([responseType isEqualToString:@"text"]) {
// We need this to carry over bytes, which could not be decoded into text (such as broken UTF-8 characters).
// The incremental data block holds the ownership of this object, and will be released upon release of the block.
NSMutableData* incrementalDataCarry = [NSMutableData new];
incrementalDataBlock = ^(NSData *data, int64_t progress, int64_t total) {
NSString *responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
NSUInteger initialCarryLength = incrementalDataCarry.length;
NSString *responseString = [RCTNetworking decodeTextData:data
fromResponse:task.response
withCarryData:incrementalDataCarry];
if (!responseString) {
RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
return;
}
NSArray<id> *responseJSON = @[task.requestID, responseString, @(progress), @(total)];
// Update progress to include the previous carry length and reduce the current carry length.
NSArray<id> *responseJSON = @[task.requestID,
responseString,
@(progress + initialCarryLength - incrementalDataCarry.length),
@(total)];
[self sendEventWithName:@"didReceiveNetworkIncrementalData" body:responseJSON];
};
} else {