mirror of
https://github.com/zhigang1992/RestKit.git
synced 2026-04-02 22:42:45 +08:00
44 lines
1.4 KiB
Objective-C
44 lines
1.4 KiB
Objective-C
//
|
|
// RKStringTokenizer.m
|
|
// RestKit
|
|
//
|
|
// Created by Blake Watters on 7/30/12.
|
|
// Copyright (c) 2012 RestKit. All rights reserved.
|
|
//
|
|
|
|
#import "RKSearchTokenizer.h"
|
|
|
|
@implementation RKSearchTokenizer
|
|
|
|
- (NSSet *)tokenize:(NSString *)string
|
|
{
|
|
NSMutableSet *tokens = [NSMutableSet set];
|
|
|
|
CFLocaleRef locale = CFLocaleCopyCurrent();
|
|
|
|
// Remove diacratics and lowercase our input text
|
|
NSString *tokenizeText = string = [string stringByFoldingWithOptions:kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive locale:[NSLocale systemLocale]];
|
|
CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (CFStringRef)tokenizeText, CFRangeMake(0, CFStringGetLength((CFStringRef)tokenizeText)), kCFStringTokenizerUnitWord, locale);
|
|
CFStringTokenizerTokenType tokenType = kCFStringTokenizerTokenNone;
|
|
|
|
while(kCFStringTokenizerTokenNone != (tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer))) {
|
|
CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
|
|
|
|
NSRange range = NSMakeRange(tokenRange.location, tokenRange.length);
|
|
NSString *token = [string substringWithRange:range];
|
|
|
|
[tokens addObject:token];
|
|
}
|
|
|
|
CFRelease(tokenizer);
|
|
CFRelease(locale);
|
|
|
|
// Remove any stop words
|
|
if (self.stopWords) [tokens minusSet:self.stopWords];
|
|
|
|
return tokens;
|
|
}
|
|
|
|
|
|
@end
|