diff --git a/blockstack_search/README.md b/blockstack_search/README.md index 3153e1c1b..c22a62e6e 100644 --- a/blockstack_search/README.md +++ b/blockstack_search/README.md @@ -1,4 +1,4 @@ -fgsearch +onename-search ======== -Search API for FreeGraph +Search API for OneName diff --git a/blockstack_search/clients/python_client.py b/blockstack_search/client/python_client.py similarity index 100% rename from blockstack_search/clients/python_client.py rename to blockstack_search/client/python_client.py diff --git a/blockstack_search/search/test_client.py b/blockstack_search/client/test_client.py similarity index 100% rename from blockstack_search/search/test_client.py rename to blockstack_search/client/test_client.py diff --git a/blockstack_search/requirements.txt b/blockstack_search/requirements.txt index 5643ccfb8..4f7e9e662 100644 --- a/blockstack_search/requirements.txt +++ b/blockstack_search/requirements.txt @@ -10,4 +10,3 @@ pytz==2014.2 requests==2.2.1 six==1.6.1 urllib3==1.8 -wsgiref==0.1.2 diff --git a/blockstack_search/search/common.py b/blockstack_search/search/common.py old mode 100755 new mode 100644 diff --git a/blockstack_search/search/config.py b/blockstack_search/search/config.py old mode 100755 new mode 100644 index 1b0771a5e..e9c46d3e1 --- a/blockstack_search/search/config.py +++ b/blockstack_search/search/config.py @@ -4,7 +4,10 @@ # All Rights Reserved #----------------------- -PORT = 5001 -DEBUG = True +DEBUG = True + +DEFAULT_PORT = 5000 +DEFAULT_HOST = '127.0.0.1' + BULK_INSERT_LIMIT = 1000 DEFAULT_LIMIT = 50 \ No newline at end of file diff --git a/blockstack_search/search/onename_api.py b/blockstack_search/search/developer_api.py old mode 100644 new mode 100755 similarity index 99% rename from blockstack_search/search/onename_api.py rename to blockstack_search/search/developer_api.py index b39413480..387ea7694 --- a/blockstack_search/search/onename_api.py +++ b/blockstack_search/search/developer_api.py @@ -14,7 +14,7 @@ from search_api import get_people from flask import make_response,Response import json from bson import json_util -from helpers import * +from rate_limit import * app = Flask(__name__) @@ -166,6 +166,7 @@ def not_found(error): Returns a jsonified 500 error message instead of a HTTP 404 error. ''' return make_response(jsonify({ 'error': '500 something wrong' }), 500) + #---------------------------------------------- if __name__ == '__main__': app.run(debug=True, port=5003) diff --git a/blockstack_search/search/helpers.py b/blockstack_search/search/rate_limit.py similarity index 100% rename from blockstack_search/search/helpers.py rename to blockstack_search/search/rate_limit.py diff --git a/blockstack_search/search/search_api.py b/blockstack_search/search/search_api.py index 44c265509..2640965d8 100755 --- a/blockstack_search/search/search_api.py +++ b/blockstack_search/search/search_api.py @@ -5,19 +5,19 @@ #----------------------- ''' - a simple Flask based API for OneName + OneName Search ''' -from flask import request, jsonify, Flask +from flask import request, jsonify, Flask, make_response +app = Flask(__name__) + +from config import DEFAULT_HOST, DEFAULT_PORT, DEBUG import json from bson import json_util -DEFAULT_LIMIT = 30 - -#----------------------------------- -from pymongo import MongoClient -c = MongoClient() +import sys +from config import DEFAULT_LIMIT #import pylibmc """mc = pylibmc.Client(["127.0.0.1:11211"],binary=True, @@ -39,15 +39,21 @@ class QueryThread(threading.Thread): self.found_exact_match = False def run(self): - #if(self.query_type == 'people_search'): - #self.results = query_people_database(self.query, self.limit_results) + if(self.query_type == 'people_search'): + self.results = query_people_database(self.query, self.limit_results) #elif(self.query_type == 'company_search'): #self.found_exact_match, self.results = query_company_database(self.query) - if(self.query_type == 'lucene_search'): - self.results = query_lucene_index(self.query,'onename_people_index', self.limit_results) + #if(self.query_type == 'lucene_search'): + # self.results = query_lucene_index(self.query,'onename_people_index', self.limit_results) + +#--------------------------------- +def error_reply(msg, code = -1): + reply = {} + reply['status'] = code + reply['message'] = "ERROR: " + msg + return jsonify(reply) #------------------------- -""" def query_people_database(query,limit_results=DEFAULT_LIMIT): ''' @@ -55,29 +61,10 @@ def query_people_database(query,limit_results=DEFAULT_LIMIT): else returns False, [list of possible companies] ''' - from substring_search import search_people_by_name + from substring_search import search_people_by_name, fetch_profiles_from_names - people = search_people_by_name(query, limit_results) - - results = [] - mongo_query = [] - - if people is not None: - - if(len(people) == 0): - return results - else: - db = c['onename_search'] - - #the $in query is much faster but messes up intended results order - reply = db.nodes.find({"details":{'$in':people}}) - - #the reply is a cursor and need to load actual results first - for i in reply: - results.append(i['data']) - - temp = json.dumps(results, default=json_util.default) - return json.loads(temp) + name_search_results = search_people_by_name(query, limit_results) + return fetch_profiles_from_names(name_search_results) """ #----------------------------------- @@ -109,6 +96,7 @@ def query_lucene_index(query,index,limit_results=DEFAULT_LIMIT): break return results_list +""" #---------------------------------- def test_alphanumeric(query): @@ -124,7 +112,13 @@ def test_alphanumeric(query): return True #----------------------------------- -def get_people(query): +@app.route('/search') +def get_people(): + + query = request.args.get('query') + + if query == None: + return error_reply("No query given") new_limit = DEFAULT_LIMIT @@ -141,7 +135,7 @@ def get_people(query): threads = [] - t3 = QueryThread(query,'lucene_search',new_limit) + t3 = QueryThread(query,'people_search',new_limit) threads.append(t3) @@ -158,16 +152,33 @@ def get_people(query): results_people += results_lucene - results = {'people':results_people[:new_limit]} + results = {} + results['results'] = results_people[:new_limit] + + #print results #mc.set(cache_key,results) return jsonify(results) -#------------------------- -def debug(query): +#----------------------------------- +@app.route('/') +def index(): + return 'Welcome to the search API server of Halfmoon Labs.' - return +#----------------------------------- +@app.errorhandler(500) +def internal_error(error): -#------------------ + reply = [] + return json.dumps(reply) +#----------------------------------- +@app.errorhandler(404) +def not_found(error): + return make_response(jsonify( { 'error': 'Not found' } ), 404) + +#----------------------------------- +if __name__ == '__main__': + + app.run(host=DEFAULT_HOST, port=DEFAULT_PORT,debug=DEBUG) \ No newline at end of file diff --git a/blockstack_search/search/substring_search.py b/blockstack_search/search/substring_search.py index 82f5fb63b..cf59a2900 100755 --- a/blockstack_search/search/substring_search.py +++ b/blockstack_search/search/substring_search.py @@ -6,68 +6,86 @@ ''' functions for substring search + usage: './substring_search --create_cache --search ' ''' + import sys +import json +from common import log from pymongo import MongoClient -c = MongoClient() +client = MongoClient() +db = client['onename_user_db'] +local_users = db.users from config import DEFAULT_LIMIT -INPUT_OPTIONS = '--create_cache --search ' - #------------------------- -def create_dedup_names_cache(): +def create_search_index(): ''' - takes people/company names from crunchbase DB and writes deduped names in a 'cache' + takes people names from blockchain and writes deduped names in a 'cache' ''' - fg = c['freegraph'] + #delete any old cache/index + client.drop_database('search_db') + client.drop_database('search_cache') - #delete any old cache - c.drop_database('fg_search_cache') + search_db = client['search_db'] + search_profiles = search_db.profiles - search_cache = c['fg_search_cache'] - people_cache = search_cache.people_cache + search_cache = client['search_cache'] + people_cache = search_cache.people - nodes = fg.nodes - #------------------------------ - #for creating people cache + # create people name cache counter = 0 people_names = [] - for i in nodes.find(): + for user in local_users.find(): + + search_profile = {} counter += 1 if(counter % 1000 == 0): print counter - try: - name = i['data']['name']['first'].lower() + ' ' + i['data']['name']['last'].lower() - except: - pass - else: + profile = json.loads(user['profile']) + + if 'name' in profile: + name = profile['name'] + + try: + name = name['formatted'].lower() + except: + name = name.lower() + people_names.append(name) + #------------------------------ + # create index for looking up profiles by people name - dedup_people_names = list(set(people_names)) + search_profile['name'] = name + search_profile['profile'] = profile + search_profile['username'] = user['username'] + search_profiles.save(search_profile) - insert_people_names = {'dedup_people_names':dedup_people_names} + + #dedup names + people_names = list(set(people_names)) + + people_names = {'people':people_names} #save final dedup results to mongodb (using it as a cache) - people_cache.save(insert_people_names) + people_cache.save(people_names) - #print '-' * 5 - #log.debug('Created deduped people_cache: %s from %s', len(dedup_people_names), len(people_names)) - #log.debug('Creating company cache ...') - - #db.posts.ensure_index('full_name') - #log.debug('DONE! All set for searching now.') + search_cache.people.ensure_index('people') + search_db.profiles.ensure_index('name') + + log.debug('Created people_cache and search_profile index') #------------------------- def anyword_substring_search_inner(query_word,target_words): @@ -147,20 +165,41 @@ def search_people_by_name(query,limit_results=DEFAULT_LIMIT): #--------------------- #using mongodb as a cache, load data in people_names - search_cache = c['fg_search_cache'] + search_cache = client['search_cache'] people_names = [] - for i in search_cache.people_cache.find(): - people_names = i['dedup_people_names'] + for i in search_cache.people.find(): + people_names = i['people'] #--------------------- results = substring_search(query,people_names,limit_results) - return results + return order_search_results(query,results) #------------------------- -def fix_search_order(query, search_results): +def fetch_profiles_from_names(name_search_results): + + search_db = client['search_db'] + search_profiles = search_db.profiles + + results = [] + + for name in name_search_results: + + result = search_profiles.find_one({"name":name}) + del result['name'] + del result['_id'] + results.append(result) + + return results + +#------------------------- +def order_search_results(query, search_results): + + ''' + order of results should be a) query in first name, b) query in last name + ''' results = search_results @@ -188,7 +227,7 @@ def fix_search_order(query, search_results): #------------------------ for result in results: - result_list = result['full_name'].split(' ') + result_list = result.split(' ') try: if(result_list[0].startswith(first_word)): @@ -201,7 +240,7 @@ def fix_search_order(query, search_results): #------------------------ for result in results_second: - result_list = result['full_name'].split(' ') + result_list = result.split(' ') try: if(result_list[1].startswith(first_word)): @@ -247,12 +286,14 @@ if __name__ == "__main__": option = sys.argv[1] - if(option == '--create_cache'): - create_dedup_names_cache() + if(option == '--create_index'): + create_search_index() elif(option == '--search'): query = sys.argv[2] - print search_people_by_name(query,DEFAULT_LIMIT) - + name_search_results = search_people_by_name(query,DEFAULT_LIMIT) + print name_search_results + print '-' * 5 + print fetch_profiles_from_names(name_search_results) else: print "Usage error"