Files
RestKit/Code/Search/RKSearchTokenizer.m
2012-08-01 18:12:53 -04:00

44 lines
1.4 KiB
Objective-C

//
// RKStringTokenizer.m
// RestKit
//
// Created by Blake Watters on 7/30/12.
// Copyright (c) 2012 RestKit. All rights reserved.
//
#import "RKSearchTokenizer.h"
@implementation RKSearchTokenizer
- (NSSet *)tokenize:(NSString *)string
{
NSMutableSet *tokens = [NSMutableSet set];
CFLocaleRef locale = CFLocaleCopyCurrent();
// Remove diacratics and lowercase our input text
NSString *tokenizeText = string = [string stringByFoldingWithOptions:kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive locale:[NSLocale systemLocale]];
CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (CFStringRef)tokenizeText, CFRangeMake(0, CFStringGetLength((CFStringRef)tokenizeText)), kCFStringTokenizerUnitWord, locale);
CFStringTokenizerTokenType tokenType = kCFStringTokenizerTokenNone;
while(kCFStringTokenizerTokenNone != (tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer))) {
CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
NSRange range = NSMakeRange(tokenRange.location, tokenRange.length);
NSString *token = [string substringWithRange:range];
[tokens addObject:token];
}
CFRelease(tokenizer);
CFRelease(locale);
// Remove any stop words
if (self.stopWords) [tokens minusSet:self.stopWords];
return tokens;
}
@end