Added support for doing sub-string search on OneName users.

Currently the index is created from the user DB of OneName.io (blockchain support can be added later) API returns OneName JSON profiles as results small update
2026-01-12 22:43:42 +08:00 · 2014-06-14 22:34:16 -07:00
parent 58f4736d27
commit a1b1918f3f
10 changed files with 141 additions and 86 deletions
--- a/blockstack_search/README.md
+++ b/blockstack_search/README.md
@@ -1,4 +1,4 @@
-fgsearch
+onename-search
 ========

-Search API for FreeGraph
+Search API for OneName
--- a/blockstack_search/clients/python_client.py
+++ b/blockstack_search/clients/python_client.py
--- a/blockstack_search/client/test_client.py
+++ b/blockstack_search/client/test_client.py
--- a/blockstack_search/requirements.txt
+++ b/blockstack_search/requirements.txt
@@ -10,4 +10,3 @@ pytz==2014.2
 requests==2.2.1
 six==1.6.1
 urllib3==1.8
-wsgiref==0.1.2
--- a/blockstack_search/search/common.py
+++ b/blockstack_search/search/common.py
--- a/blockstack_search/search/config.py
+++ b/blockstack_search/search/config.py
@@ -4,7 +4,10 @@
 # All Rights Reserved
 #-----------------------

-PORT = 5001
-DEBUG = True 
+DEBUG = True
+
+DEFAULT_PORT = 5000
+DEFAULT_HOST = '127.0.0.1'
+
 BULK_INSERT_LIMIT = 1000
 DEFAULT_LIMIT = 50
--- a/blockstack_search/search/developer_api.py
+++ b/blockstack_search/search/developer_api.py
@@ -14,7 +14,7 @@ from search_api import get_people
 from flask import make_response,Response
 import json
 from bson import json_util
-from helpers import *
+from rate_limit import *

 app = Flask(__name__)

@@ -166,6 +166,7 @@ def not_found(error):
    Returns a jsonified 500 error message instead of a HTTP 404 error.
    '''
    return make_response(jsonify({ 'error': '500 something wrong' }), 500)
+    
 #----------------------------------------------
 if __name__ == '__main__':
 	app.run(debug=True, port=5003)
--- a/blockstack_search/search/rate_limit.py
+++ b/blockstack_search/search/rate_limit.py
--- a/blockstack_search/search/search_api.py
+++ b/blockstack_search/search/search_api.py
@@ -5,19 +5,19 @@
 #-----------------------

 '''
-	a simple Flask based API for OneName 
+	OneName Search 
 '''

-from flask import request, jsonify, Flask
+from flask import request, jsonify, Flask, make_response

+app = Flask(__name__)
+
+from config import DEFAULT_HOST, DEFAULT_PORT, DEBUG
 import json
 from bson import json_util

-DEFAULT_LIMIT = 30
-
-#-----------------------------------
-from pymongo import MongoClient
-c = MongoClient()
+import sys
+from config import DEFAULT_LIMIT

 #import pylibmc
 """mc = pylibmc.Client(["127.0.0.1:11211"],binary=True,
@@ -39,15 +39,21 @@ class QueryThread(threading.Thread):
 		self.found_exact_match = False

 	def run(self):
-		#if(self.query_type == 'people_search'):
-			#self.results = query_people_database(self.query, self.limit_results)
+		if(self.query_type == 'people_search'):
+			self.results = query_people_database(self.query, self.limit_results)
 		#elif(self.query_type == 'company_search'):
 			#self.found_exact_match, self.results = query_company_database(self.query)
-		if(self.query_type == 'lucene_search'):
-			self.results = query_lucene_index(self.query,'onename_people_index', self.limit_results)
+		#if(self.query_type == 'lucene_search'):
+		#	self.results = query_lucene_index(self.query,'onename_people_index', self.limit_results)
+
+#---------------------------------
+def error_reply(msg, code = -1):
+	reply = {}
+	reply['status'] = code
+	reply['message'] = "ERROR: " + msg
+	return jsonify(reply)

 #-------------------------
-"""
 def query_people_database(query,limit_results=DEFAULT_LIMIT):

 	'''
@@ -55,29 +61,10 @@ def query_people_database(query,limit_results=DEFAULT_LIMIT):
 		else returns False, [list of possible companies]  
 	'''

-	from substring_search import search_people_by_name
+	from substring_search import search_people_by_name, fetch_profiles_from_names

-	people = search_people_by_name(query, limit_results)
-
-	results = []
-	mongo_query = []
-
-	if people is not None:
-
-		if(len(people) == 0):
-			return results 
-		else:
-			db = c['onename_search']
-
-			#the $in query is much faster but messes up intended results order
-			reply = db.nodes.find({"details":{'$in':people}})
-
-			#the reply is a cursor and need to load actual results first
-			for i in reply:
-				results.append(i['data'])
-
-	temp = json.dumps(results, default=json_util.default)
-	return json.loads(temp)
+	name_search_results = search_people_by_name(query, limit_results)
+	return fetch_profiles_from_names(name_search_results)

 """
 #-----------------------------------
@@ -109,6 +96,7 @@ def query_lucene_index(query,index,limit_results=DEFAULT_LIMIT):
 			break

 	return results_list 
+"""

 #----------------------------------
 def test_alphanumeric(query):
@@ -124,7 +112,13 @@ def test_alphanumeric(query):
 	return True 

 #-----------------------------------
-def get_people(query):
+@app.route('/search')
+def get_people():
+
+	query = request.args.get('query')
+
+	if query == None:
+		return error_reply("No query given")

 	new_limit = DEFAULT_LIMIT

@@ -141,7 +135,7 @@ def get_people(query):

 		threads = [] 

-		t3 = QueryThread(query,'lucene_search',new_limit)
+		t3 = QueryThread(query,'people_search',new_limit)

 		threads.append(t3)

@@ -158,16 +152,33 @@ def get_people(query):
 		results_people += results_lucene


-	results = {'people':results_people[:new_limit]}
+	results = {}
+	results['results'] = results_people[:new_limit]
+
+	#print results

 	#mc.set(cache_key,results)

 	return jsonify(results)

-#-------------------------
-def debug(query):
+#-----------------------------------
+@app.route('/')
+def index():
+	return 'Welcome to the search API server of <a href="http://halfmoonlabs.com">Halfmoon Labs</a>.'

-	return
+#-----------------------------------
+@app.errorhandler(500)
+def internal_error(error):

-#------------------
+	reply = []
+	return json.dumps(reply)

+#-----------------------------------
+@app.errorhandler(404)
+def not_found(error):
+	return make_response(jsonify( { 'error': 'Not found' } ), 404)
+
+#-----------------------------------
+if __name__ == '__main__':
+
+	app.run(host=DEFAULT_HOST, port=DEFAULT_PORT,debug=DEBUG)
--- a/blockstack_search/search/substring_search.py
+++ b/blockstack_search/search/substring_search.py
@@ -6,68 +6,86 @@

 '''
 	functions for substring search  
+	usage: './substring_search --create_cache --search <query>'
 '''
+
 import sys
+import json
+from common import log 

 from pymongo import MongoClient
-c = MongoClient()
+client = MongoClient()
+db = client['onename_user_db']
+local_users = db.users

 from config import DEFAULT_LIMIT

-INPUT_OPTIONS = '--create_cache --search <query>'
-
 #-------------------------
-def create_dedup_names_cache(): 
+def create_search_index(): 
 	 
 	'''
-		takes people/company names from crunchbase DB and writes deduped names in a 'cache'
+		takes people names from blockchain and writes deduped names in a 'cache'
 	'''

-	fg = c['freegraph']
+	#delete any old cache/index
+	client.drop_database('search_db')
+	client.drop_database('search_cache')

-	#delete any old cache
-	c.drop_database('fg_search_cache')
+	search_db = client['search_db']
+	search_profiles = search_db.profiles 

-	search_cache = c['fg_search_cache']
-	people_cache = search_cache.people_cache
+	search_cache = client['search_cache']
+	people_cache = search_cache.people

-	nodes = fg.nodes
-	
 	#------------------------------
-	#for creating people cache 
+	# create people name cache 

 	counter = 0

 	people_names = [] 

-	for i in nodes.find():
+	for user in local_users.find():
+
+		search_profile = {} 

 		counter += 1

 		if(counter % 1000 == 0):
 			print counter

-		try:
-			name = i['data']['name']['first'].lower() + ' ' + i['data']['name']['last'].lower()  
-		except:
-			pass
-		else:
+		profile = json.loads(user['profile'])
+
+		if 'name' in profile: 
+			name = profile['name']
+
+			try:
+				name = name['formatted'].lower()
+			except:
+				name = name.lower()
+
 			people_names.append(name)

+			#------------------------------
+			# create index for looking up profiles by people name

-	dedup_people_names = list(set(people_names))
+			search_profile['name'] = name
+			search_profile['profile'] = profile
+			search_profile['username'] = user['username']
+			search_profiles.save(search_profile)

-	insert_people_names = {'dedup_people_names':dedup_people_names}
+
+	#dedup names
+	people_names = list(set(people_names))
+
+	people_names = {'people':people_names}

 	#save final dedup results to mongodb (using it as a cache)
-	people_cache.save(insert_people_names)
+	people_cache.save(people_names)

-	#print '-' * 5
-	#log.debug('Created deduped people_cache: %s from %s', len(dedup_people_names), len(people_names))
-	#log.debug('Creating company cache ...')
-
-	#db.posts.ensure_index('full_name')
-	#log.debug('DONE! All set for searching now.')
+	search_cache.people.ensure_index('people')
+	search_db.profiles.ensure_index('name')
+	
+	log.debug('Created people_cache and search_profile index')

 #-------------------------
 def anyword_substring_search_inner(query_word,target_words):
@@ -147,20 +165,41 @@ def search_people_by_name(query,limit_results=DEFAULT_LIMIT):

 	#---------------------
 	#using mongodb as a cache, load data in people_names
-	search_cache = c['fg_search_cache']
+	search_cache = client['search_cache']

 	people_names = []

-	for i in search_cache.people_cache.find():
-		people_names = i['dedup_people_names']
+	for i in search_cache.people.find():
+		people_names = i['people']
 	#---------------------

 	results = substring_search(query,people_names,limit_results)

-	return results
+	return order_search_results(query,results)

 #-------------------------
-def fix_search_order(query, search_results):
+def fetch_profiles_from_names(name_search_results):
+
+	search_db = client['search_db']
+	search_profiles = search_db.profiles 
+
+	results = [] 
+
+	for name in name_search_results:
+
+		result = search_profiles.find_one({"name":name})
+		del result['name']
+		del result['_id']
+		results.append(result)
+
+	return results 
+
+#-------------------------
+def order_search_results(query, search_results):
+
+	'''
+		order of results should be a) query in first name, b) query in last name
+	'''

 	results = search_results

@@ -188,7 +227,7 @@ def fix_search_order(query, search_results):
 	#------------------------
 	for result in results:

-		result_list = result['full_name'].split(' ')
+		result_list = result.split(' ')

 		try:
 			if(result_list[0].startswith(first_word)):
@@ -201,7 +240,7 @@ def fix_search_order(query, search_results):
 	#------------------------
 	for result in results_second:

-		result_list = result['full_name'].split(' ')
+		result_list = result.split(' ')

 		try:
 			if(result_list[1].startswith(first_word)):
@@ -247,12 +286,14 @@ if __name__ == "__main__":

 		option = sys.argv[1]

-		if(option == '--create_cache'):
-			create_dedup_names_cache()
+		if(option == '--create_index'):
+			create_search_index()
 		elif(option == '--search'):
 			query = sys.argv[2]
-			print search_people_by_name(query,DEFAULT_LIMIT)
-
+			name_search_results = search_people_by_name(query,DEFAULT_LIMIT)
+			print name_search_results
+			print '-' * 5
+			print fetch_profiles_from_names(name_search_results)
 		else:
 			print "Usage error"