mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-04-29 12:15:22 +08:00
173 lines
3.8 KiB
Python
Executable File
173 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#-----------------------
|
|
# Copyright 2014 Halfmoon Labs, Inc.
|
|
# All Rights Reserved
|
|
#-----------------------
|
|
|
|
'''
|
|
a simple Flask based API for OneName
|
|
'''
|
|
|
|
from flask import request, jsonify, Flask
|
|
|
|
import json
|
|
from bson import json_util
|
|
|
|
DEFAULT_LIMIT = 30
|
|
|
|
#-----------------------------------
|
|
from pymongo import MongoClient
|
|
c = MongoClient()
|
|
|
|
#import pylibmc
|
|
"""mc = pylibmc.Client(["127.0.0.1:11211"],binary=True,
|
|
behaviors={'tcp_nodelay':True,
|
|
'connect_timeout':100,
|
|
'no_block':True})"""
|
|
|
|
import threading
|
|
|
|
#-------------------------
|
|
#class for performing multi-threaded search on three search sub-systems
|
|
class QueryThread(threading.Thread):
|
|
def __init__(self,query,query_type,limit_results):
|
|
threading.Thread.__init__(self)
|
|
self.query=query
|
|
self.query_type=query_type
|
|
self.results = []
|
|
self.limit_results = limit_results
|
|
self.found_exact_match = False
|
|
|
|
def run(self):
|
|
if(self.query_type == 'people_search'):
|
|
self.results = query_people_database(self.query, self.limit_results)
|
|
elif(self.query_type == 'company_search'):
|
|
self.found_exact_match, self.results = query_company_database(self.query)
|
|
elif(self.query_type == 'lucene_search'):
|
|
self.results = query_lucene_index(self.query,'onename_people_index', self.limit_results)
|
|
|
|
#-------------------------
|
|
def query_people_database(query,limit_results=DEFAULT_LIMIT):
|
|
|
|
'''
|
|
returns True, {names of employees} if exact match of company name
|
|
else returns False, [list of possible companies]
|
|
'''
|
|
|
|
from substring_search import search_people_by_name
|
|
|
|
people = search_people_by_name(query, limit_results)
|
|
|
|
results = []
|
|
mongo_query = []
|
|
|
|
if people is not None:
|
|
|
|
if(len(people) == 0):
|
|
return results
|
|
else:
|
|
db = c['freegraph']
|
|
|
|
#the $in query is much faster but messes up intended results order
|
|
reply = db.nodes.find({"full_name":{'$in':people}})
|
|
|
|
#the reply is a cursor and need to load actual results first
|
|
for i in reply:
|
|
results.append(i['data'])
|
|
|
|
temp = json.dumps(results, default=json_util.default)
|
|
return json.loads(temp)
|
|
|
|
#-----------------------------------
|
|
def query_lucene_index(query,index,limit_results=DEFAULT_LIMIT):
|
|
|
|
from pyes import StringQuery, ES
|
|
conn = ES()
|
|
|
|
q = StringQuery(query, search_fields = ['full_name','twitter'], default_operator = 'and')
|
|
results = conn.search(query = q, size=20, indices=[index])
|
|
count = results.total
|
|
|
|
#having or gives more results but results quality goes down
|
|
if(count == 0):
|
|
q = StringQuery(query, search_fields = ['full_name','twitter'], default_operator = 'or')
|
|
results = conn.search(query = q, size=20, indices=[index])
|
|
|
|
results_list = []
|
|
counter = 0
|
|
|
|
for i in results:
|
|
|
|
temp = json.loads(i['details'])
|
|
results_list.append(temp)
|
|
|
|
counter += 1
|
|
|
|
if(counter == limit_results):
|
|
break
|
|
|
|
return results_list
|
|
|
|
#----------------------------------
|
|
def test_alphanumeric(query):
|
|
|
|
'''
|
|
check if query has only alphanumeric characters or not
|
|
'''
|
|
|
|
import re
|
|
valid = re.match('^(\w+(\s)*\w*)+$', query) is not None
|
|
|
|
#return valid
|
|
return True
|
|
|
|
#-----------------------------------
|
|
#@app.route('/search/people', methods = ['GET'])
|
|
def get_people(query):
|
|
|
|
new_limit = DEFAULT_LIMIT
|
|
|
|
try:
|
|
new_limit = int(request.values['limit_results'])
|
|
except:
|
|
pass
|
|
|
|
results_people = []
|
|
|
|
if test_alphanumeric(query) is False:
|
|
pass
|
|
else:
|
|
|
|
threads = []
|
|
|
|
t3 = QueryThread(query,'lucene_search',new_limit)
|
|
|
|
threads.append(t3)
|
|
|
|
#start all threads
|
|
[x.start() for x in threads]
|
|
|
|
#wait for all of them to finish
|
|
[x.join() for x in threads]
|
|
|
|
#at this point all threads have finished and all queries have been performed
|
|
|
|
results_lucene = t3.results
|
|
|
|
results_people += results_lucene
|
|
|
|
|
|
results = {'people':results_people[:new_limit]}
|
|
|
|
#mc.set(cache_key,results)
|
|
|
|
return jsonify(results)
|
|
|
|
#-------------------------
|
|
def debug(query):
|
|
|
|
return
|
|
|
|
#------------------
|
|
|