tabs->spaces in search/server.py

This commit is contained in:
Aaron Blankstein
2017-08-23 09:10:36 -04:00
parent 13798e5fa2
commit 0310926e99

View File

@@ -1,26 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Search
~~~~~
Search
~~~~~
copyright: (c) 2014-2016 by Halfmoon Labs, Inc.
copyright: (c) 2016 by Blockstack.org
copyright: (c) 2014-2016 by Halfmoon Labs, Inc.
copyright: (c) 2016 by Blockstack.org
This file is part of Search.
Search is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Search is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Search is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Search is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Search. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with Search. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
@@ -48,67 +48,67 @@ from api.utils import get_mc_client
mc = get_mc_client()
class QueryThread(threading.Thread):
""" for performing multi-threaded search on three search sub-systems
"""
def __init__(self, query, query_type, limit_results):
threading.Thread.__init__(self)
self.query = query
self.query_type = query_type
self.results = []
self.limit_results = limit_results
self.found_exact_match = False
""" for performing multi-threaded search on three search sub-systems
"""
def __init__(self, query, query_type, limit_results):
threading.Thread.__init__(self)
self.query = query
self.query_type = query_type
self.results = []
self.limit_results = limit_results
self.found_exact_match = False
def run(self):
if(self.query_type == 'people_search'):
self.results = query_people_database(self.query, self.limit_results)
elif(self.query_type == 'twitter_search'):
self.results = query_twitter_database(self.query, self.limit_results)
elif(self.query_type == 'username_search'):
self.results = query_username_database(self.query, self.limit_results)
#self.found_exact_match, self.results = query_company_database(self.query)
if(self.query_type == 'lucene_search'):
self.results = query_lucene_index(self.query, self.limit_results)
def run(self):
if(self.query_type == 'people_search'):
self.results = query_people_database(self.query, self.limit_results)
elif(self.query_type == 'twitter_search'):
self.results = query_twitter_database(self.query, self.limit_results)
elif(self.query_type == 'username_search'):
self.results = query_username_database(self.query, self.limit_results)
#self.found_exact_match, self.results = query_company_database(self.query)
if(self.query_type == 'lucene_search'):
self.results = query_lucene_index(self.query, self.limit_results)
def error_reply(msg, code=-1):
reply = {}
reply['status'] = code
reply['message'] = "ERROR: " + msg
return jsonify(reply)
reply = {}
reply['status'] = code
reply['message'] = "ERROR: " + msg
return jsonify(reply)
def query_people_database(query, limit_results=DEFAULT_LIMIT):
name_search_results = search_people_by_name(query, limit_results)
return fetch_profiles(name_search_results, search_type="name")
name_search_results = search_people_by_name(query, limit_results)
return fetch_profiles(name_search_results, search_type="name")
def query_twitter_database(query, limit_results=DEFAULT_LIMIT):
twitter_search_results = search_people_by_twitter(query, limit_results)
return fetch_profiles(twitter_search_results, search_type="twitter")
twitter_search_results = search_people_by_twitter(query, limit_results)
return fetch_profiles(twitter_search_results, search_type="twitter")
def query_username_database(query, limit_results=DEFAULT_LIMIT):
username_search_results = search_people_by_username(query, limit_results)
return fetch_profiles(username_search_results, search_type="username")
username_search_results = search_people_by_username(query, limit_results)
return fetch_profiles(username_search_results, search_type="username")
def query_lucene_index(query, index, limit_results=DEFAULT_LIMIT):
username_search_results = search_people_by_bio(query, limit_results)
return fetch_profiles(username_search_results, search_type="username")
username_search_results = search_people_by_bio(query, limit_results)
return fetch_profiles(username_search_results, search_type="username")
def test_alphanumeric(query):
""" check if query has only alphanumeric characters or not
"""
""" check if query has only alphanumeric characters or not
"""
import re
valid = re.match(r'^\w+[\s\w]*$', query) is not None
import re
valid = re.match(r'^\w+[\s\w]*$', query) is not None
return True
return True
@searcher.route('/search', methods = ["GET", "POST"], strict_slashes = False)
@@ -116,110 +116,110 @@ def test_alphanumeric(query):
@cache_control(MEMCACHED_TIMEOUT)
def search_by_name():
query = request.args.get('query')
query = request.args.get('query')
results_people = []
results_people = []
if query is None:
return error_reply("No query given")
elif query == '' or query == ' ':
return json.dumps({})
if query is None:
return error_reply("No query given")
elif query == '' or query == ' ':
return json.dumps({})
if MEMCACHED_ENABLED:
if MEMCACHED_ENABLED:
cache_key = str('search_cache_' + query.lower())
cache_reply = mc.get(cache_key)
cache_key = str('search_cache_' + query.lower())
cache_reply = mc.get(cache_key)
# if a cache hit, respond straight away
if(cache_reply is not None):
return jsonify(cache_reply)
# if a cache hit, respond straight away
if(cache_reply is not None):
return jsonify(cache_reply)
new_limit = DEFAULT_LIMIT
new_limit = DEFAULT_LIMIT
try:
new_limit = int(request.values['limit_results'])
except:
pass
try:
new_limit = int(request.values['limit_results'])
except:
pass
if validProofQuery(query):
return search_proofs_index(query)
if validProofQuery(query):
return search_proofs_index(query)
elif test_alphanumeric(query) is False:
pass
elif test_alphanumeric(query) is False:
pass
else:
else:
threads = []
threads = []
t1 = QueryThread(query, 'username_search', new_limit)
t2 = QueryThread(query, 'twitter_search', new_limit)
t3 = QueryThread(query, 'people_search', new_limit)
t1 = QueryThread(query, 'username_search', new_limit)
t2 = QueryThread(query, 'twitter_search', new_limit)
t3 = QueryThread(query, 'people_search', new_limit)
if LUCENE_ENABLED:
t4 = QueryThread(query, 'lucene_search', new_limit)
if LUCENE_ENABLED:
t4 = QueryThread(query, 'lucene_search', new_limit)
threads.append(t1)
threads.append(t2)
threads.append(t3)
threads.append(t1)
threads.append(t2)
threads.append(t3)
if LUCENE_ENABLED:
threads.append(t4)
if LUCENE_ENABLED:
threads.append(t4)
# start all threads
[x.start() for x in threads]
# start all threads
[x.start() for x in threads]
# wait for all of them to finish
[x.join() for x in threads]
# wait for all of them to finish
[x.join() for x in threads]
# at this point all threads have finished and all queries have been performed
# at this point all threads have finished and all queries have been performed
results_username = t1.results
results_twitter = t2.results
results_people = t3.results
results_username = t1.results
results_twitter = t2.results
results_people = t3.results
if LUCENE_ENABLED:
results_bio = t4.results
if LUCENE_ENABLED:
results_bio = t4.results
results_people += results_username + results_twitter
if LUCENE_ENABLED:
results_people += results_bio
results_people += results_username + results_twitter
if LUCENE_ENABLED:
results_people += results_bio
# dedup all results before sending out
from substring_search import dedup_search_results
results_people = dedup_search_results(results_people)
# dedup all results before sending out
from substring_search import dedup_search_results
results_people = dedup_search_results(results_people)
results = {}
results['results'] = results_people[:new_limit]
results = {}
results['results'] = results_people[:new_limit]
if MEMCACHED_ENABLED:
mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
if MEMCACHED_ENABLED:
mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
return jsonify(results)
return jsonify(results)
def search_proofs_index(query):
results = {}
results = {}
query = request.args.get('query')
query = request.args.get('query')
if query is None:
return error_reply("No query given")
elif query == '' or query == ' ':
return json.dumps({})
if query is None:
return error_reply("No query given")
elif query == '' or query == ' ':
return json.dumps({})
if MEMCACHED_ENABLED:
if MEMCACHED_ENABLED:
cache_key = str('search_cache_' + query.lower())
cache_reply = mc.get(cache_key)
cache_key = str('search_cache_' + query.lower())
cache_reply = mc.get(cache_key)
# if a cache hit, respond straight away
if(cache_reply is not None):
return jsonify(cache_reply)
# if a cache hit, respond straight away
if(cache_reply is not None):
return jsonify(cache_reply)
results['results'] = search_proofs(query)
results['results'] = search_proofs(query)
if MEMCACHED_ENABLED:
mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
if MEMCACHED_ENABLED:
mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
return jsonify(results)
return jsonify(results)