first commit

This commit is contained in:
Muneeb Ali
2014-01-28 17:31:25 -05:00
parent eb7b92ab44
commit a688562009
19 changed files with 1222 additions and 1 deletions

View File

@@ -1,4 +1,4 @@
fgsearch
nodepath
========
Search API for FreeGraph

View File

View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2013 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
import json
from json import JSONEncoder
from bson.objectid import ObjectId
import logging
from config import DEBUG
#-------------------------
def get_logger(log_name=None,log_type='stream'):
if(DEBUG):
log = logging.getLogger(log_name)
log.setLevel(logging.DEBUG)
formatter_stream = logging.Formatter('[%(levelname)s] %(message)s')
handler_stream = logging.StreamHandler()
handler_stream.setFormatter(formatter_stream)
formatter_file = logging.Formatter('[%(levelname)s] %(message)s')
handler_file = logging.FileHandler('log/debug.log',mode='w')
handler_file.setFormatter(formatter_file)
if(log_type == 'stream'):
log.addHandler(handler_stream)
elif(log_type == 'file'):
log.addHandler(handler_file)
else:
log = None
return log
#-------------------------
#common logger
log = get_logger()
class MongoEncoder(JSONEncoder):
def default(self, obj, **kwargs):
if isinstance(obj, ObjectId):
return str(obj)
else:
return JSONEncoder.default(obj, **kwargs)
#-------------------------
def pretty_dump(input):
return json.dumps(input, cls=MongoEncoder, sort_keys=False, indent=4, separators=(',', ': '))
#-------------------------
def pretty_print(input):
print pretty_dump(input)
#---------------------------------
def error_reply(msg):
reply = {}
reply['status'] = -1
reply['message'] = "ERROR: " + msg
return pretty_dump(reply)

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
PORT = 5001
DEBUG = True
FG_API_SLUG = '/api/users'
SUBDOMAINS = ['freegraph','fg']
SCANPORTS = ['80','5000','8555']

View File

@@ -0,0 +1,145 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
import json
from flask import Flask, render_template, request
from common import pretty_dump, error_reply
import requests
app = Flask(__name__)
app.config.from_object('config')
from pymongo import MongoClient
c = MongoClient()
fg = c['freegraph']
#-------------------------
def get_domain_from_url(url):
from urlparse import urlparse
o = urlparse(url)
domain = o.hostname
return domain.lower()
#-------------------------
def check_host_url_inner(url):
#headers = {'Content-type': 'application/json', 'Accept': 'text/plain', 'Authorization': 'Basic'}
print "checking: " + url
try:
r = requests.get(url)
except:
return False, None
print r.status_code
if(r.status_code == 200):
try:
data = r.json()
except:
return False, None
if 'users' in data.keys():
return True, data
else:
return False, None
#-------------------------
def check_host_url(domain):
check_urls = []
check_servers = []
check_servers.append(domain)
for i in app.config['SUBDOMAINS']:
check_servers.append(i + '.' + domain)
for server in check_servers:
for port in app.config['SCANPORTS']:
check_urls.append('http://' + server + ':' + port + app.config['FG_API_SLUG'])
for url in check_urls:
reply, data = check_host_url_inner(url)
if(reply):
return url, data
return False, None
#-----------------------------------
@app.route('/')
def index():
return render_template('index.html')
#-----------------------------------
@app.route('/host', methods=['GET'])
def get_host():
try:
input_url = request.values['url']
#check if 'http' or 'https' was entered, if not then append 'http'
if((input_url.find('http://') == -1) and (input_url.find('https://') == -1)):
input_url = 'http://' + input_url
except:
return error_reply("No URL given")
domain = get_domain_from_url(str(input_url))
host_url, data = check_host_url(domain)
nodes = []
if(host_url is not False):
reply = fg.hosts.find_one({'domain':domain})
if(reply):
fg.hosts.remove(reply)
host = {}
host['domain'] = domain
host['host_url'] = host_url
host['data'] = data
fg.hosts.insert(host)
nodes = data['users'].keys()
print nodes
for username in nodes:
node = {}
node['node_url'] = host_url + '/' + username
reply = fg.nodes.find_one({'node_url':node['node_url']})
if(reply):
fg.nodes.remove(reply)
node['data'] = requests.get(node['node_url']).json()
try:
full_name = node['data']['name']['first'].lower() + ' ' + node['data']['name']['last'].lower()
except:
node['full_name'] = ""
else:
node['full_name'] = full_name
fg.nodes.insert(node)
return render_template('node.html',domain=domain,host_url=host_url,nodes=nodes)
#------------------
if __name__ == '__main__':
app.run(debug=app.config['DEBUG'], port=app.config['PORT'])

View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
import json
from flask import Flask, render_template
from common import pretty_dump, error_reply
app = Flask(__name__)
#-----------------------------------
@app.route('/')
def index():
from datetime import datetime
time = datetime.now()
return render_template('discovery.html',time=time.strftime('%X'))
#-----------------------------------
@app.route('/poll/<string:target>', methods = ['GET'])
def poll_target(target):
reply = {}
blocks = '270941'
if(target == 'blockchain'):
reply['status'] = 1
reply['message'] = "Refreshed discovery_queue from source 'bitcoin blockchain'. Latest blocks: " + blocks
elif(target == 'crawlindex'):
from datetime import datetime, timedelta
diff = timedelta(hours=24)
last_crawled = datetime.now() - diff
reply['status'] = 1
reply['message'] = "Refreshed discovery_queue from source 'crawl index'. Oldest crawled URL: " + last_crawled.strftime('%Y-%m-%d %X')
else:
reply = "Target '" + target + "' not recognized"
return error_reply(reply)
return pretty_dump(reply)
#-----------------------------------
@app.errorhandler(500)
def internal_error(error):
return error_reply("Something went wrong with the server")
#-----------------------------------
@app.errorhandler(404)
def internal_error(error):
return error_reply('URL not found on this server')
#------------------
if __name__ == '__main__':
app.run(debug=True)

View File

View File

@@ -0,0 +1,27 @@
<html>
<head>
<script type="text/javascript">
//Because the page will automatically refresh, should mention it on the webpage
function reFresh() {
location.reload(true)
}
// Set the number below to the amount of delay, in milliseconds,
//you want between page reloads: 1 minute = 60000 milliseconds.
window.setInterval("reFresh()",300000);
</script>
</head>
<body>
This page refreshes every 5 minutes.<br><br>
Time right now is: {{time}}<br><br>
</body>
</html>

View File

@@ -0,0 +1,22 @@
<html>
<head>
</head>
<body>
FreeGraph crawler is starting up ... <br><br>
Initializing ... <br><br>
Current nodes in the index: <br><br>
http://halfmoonlabs.com <br>
http://cs.princeton.edu <br><br>
Current users in the index: <br><br>
Ryan Shea, Halfmoon Labs<br>
Muneeb Ali, Halfmoon Labs<br>
JP Singh, Princeton CS<br>
</body>
</html>

View File

@@ -0,0 +1,13 @@
<html>
<head>
</head>
<body>
Checking domain: {{domain}} <br><br>
FreeGraph API found: {{host_url}} <br><br>
Added users (nodes): {% for node in nodes %}{{node}} {% endfor %}
</body>
</html>

View File

@@ -0,0 +1,13 @@
Flask==0.10.1
Jinja2==2.7.2
MarkupSafe==0.18
Werkzeug==0.9.4
itsdangerous==0.23
pyes==0.90.1
pylibmc==1.2.3
pymongo==2.6.3
pytz==2013.9
requests==2.2.1
six==1.5.2
urllib3==1.7.1
wsgiref==0.1.2

View File

@@ -0,0 +1,100 @@
# Scope Search
We currently have three search sub-systems to handle search queries:
* Substring search on people names
* Substring search on company names
* Search on the raw lucene index
We assume that the user is entering either a *person's name* OR a *company's name* in the search query. The API expects an input of the format:
{
"query": "the search query/term",
"limit_results": "numeric limit on number of results e.g., 50, this parameter is optional"
}
The API returns a JSON object of the format:
{
"companies": [],
"people": []
}
### Quick Testing
You can test the search API using curl:
> curl http://54.200.33.184/search/api/v1.0/people -G -d "query=peter%20thiel"
OR by using the [test_client.py](test_client.py)
> ./test_client.py "peter thiel"
Make sure that the packages listed in requirements.txt are installed before using the test_client.py
### Search API
#### People API
The people API can be accessed via:
> curl http://54.200.33.184/search/api/v1.0/people -G -d "query=peter%20thiel"
This will currently return upto a max of 20 results (can be less depending on the query) with the following data:
* 'first_name'
* 'last_name'
* 'overview' -- overview of the person
* 'companies' -- each company has 1) title of person, 2) name of company, and 3) permalink of company
* 'crunchbase_slug' -- this can be used to get the crunchbase URL as http://www.crunchbase.com/person/ + 'crunchbase_slug'
* 'twitter_handle' -- twitter username
* 'linkedin_url' -- linkedin URL
#### Company API
The company API can be accessed via:
> curl http://54.200.33.184/search/api/v1.0/company -G -d "query=bank%20simple"
This will currently return upto a max of 20 results (can be less depending on the query) with the following data:
* 'name' -- company name
* 'homepage_url' -- company website
* 'email_address' -- email, if given on crunchbase
* 'email_info' -- has information on url_domain, email_domain and if can verify on them
* 'total_money_raised' -- the total $$ raised
* 'people' -- list of current employees
* 'board' -- list of board members
* 'overview' -- overview text from crunchbase
* 'tag_list' -- combination of tags and categories from crunchbase (crunchbase treats them separately, we don't)
* 'crunchbase_slug' -- this can be used to get the crunchbase URL as http://www.crunchbase.com/company/ + 'crunchbase_slug'
* 'offices' -- info on company office(s)
* 'acquisition' -- if acquired, the year it was acquired in
## Installing on UNIX
### Requirements
All required packages for Python are listed in 'requirements.txt'. In addition to those, also requires Elastic Search.
### Elastic Search
Elastic Search library is not in github and resides at
unix/lib/elastic
the current version we're using is *0.90.2*. Download from:
> wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.2.zip
### Converting RAW data to search index
Right now, the steps required for going from raw data to "ready for searching" are:
> python scope/datasets/crunchbase/filter_crunchbase_data.py --filter_people
> python scope/datasets/crunchbase/filter_crunchbase_data.py --filter_company
> python scopesearch/substring_search.py --create_cache
> python scopesearch/create_search_index.py --create_people_index
> python scopesearch/create_search_index.py --create_company_index
We'll simplify these steps in an upcoming release. We assume that both MongoDB and Elastic Search is running on the server.

View File

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2013 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
import json
from json import JSONEncoder
from bson.objectid import ObjectId
import logging
from config import DEBUG
#-------------------------
def get_logger(log_name=None,log_type='stream'):
if(DEBUG):
log = logging.getLogger(log_name)
log.setLevel(logging.DEBUG)
formatter_stream = logging.Formatter('[%(levelname)s] %(message)s')
handler_stream = logging.StreamHandler()
handler_stream.setFormatter(formatter_stream)
log.addHandler(handler_stream)
else:
log = None
return log
#-------------------------
#common logger
log = get_logger()
class MongoEncoder(JSONEncoder):
def default(self, obj, **kwargs):
if isinstance(obj, ObjectId):
return str(obj)
else:
return JSONEncoder.default(obj, **kwargs)
#-------------------------
def pretty_dump(input):
return json.dumps(input, cls=MongoEncoder, sort_keys=False, indent=4, separators=(',', ': '))
#-------------------------
def pretty_print(input):
print pretty_dump(input)
#---------------------------------
def error_reply(msg):
reply = {}
reply['status'] = -1
reply['message'] = "ERROR: " + msg
return pretty_dump(reply)

View File

@@ -0,0 +1,10 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
PORT = 5001
DEBUG = True
BULK_INSERT_LIMIT = 1000
DEFAULT_LIMIT = 50

View File

@@ -0,0 +1,146 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
'''
functions for building the ES/lucene search index and mappings
'''
import sys
from pyes import *
conn = ES()
from pymongo import MongoClient
c = MongoClient()
INPUT_OPTIONS = '--create_index --search'
from config import BULK_INSERT_LIMIT
from common import log
#-------------------------
def create_mapping(index_name,index_type):
'''
for creating lucene mapping
can add different mappings for different index_types
'''
try:
#delete the old mapping, if exists
conn.indices.delete_index(index_name)
except:
pass
conn.indices.create_index(index_name)
mapping = { u'full_name': {'boost': 3.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"},
u'bio': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"},
u'data': {'boost': 2.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"},}
conn.indices.put_mapping(index_type, {'properties':mapping}, [index_name])
#-------------------------
def create_people_index():
create_mapping("fg_people_index","fg_people_type")
from pymongo import MongoClient
from bson import json_util
import json
c = MongoClient()
db = c['freegraph']
nodes = db.nodes
counter = 0
for i in nodes.find():
data = i['data']
print i
conn.index({'full_name' : i['data']['name']['full'],
'bio' : i['data']['bio'],
'data': json.dumps(i['data'], sort_keys=True, default=json_util.default),
'_boost' : 1,},
"fg_people_index",
"fg_people_type",
bulk=True)
counter += 1
conn.indices.refresh(["fg_people_index"])
#write in bulk
if(counter % BULK_INSERT_LIMIT == 0):
print '-' * 5
print counter
conn.refresh(["fg_people_index"])
conn.indices.force_bulk()
#----------------------------------
def test_query(query,index=['fg_people_index']):
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'and')
count = conn.count(query = q)
count = count.count
if(count == 0):
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'or')
results = conn.search(query = q, size=20, indices=index)
counter = 0
results_list = []
for i in results:
counter += 1
print i['full_name']
temp = json.loads(i['data'])
results_list.append(temp)
#print counter
#print results_list
#-------------------------
if __name__ == "__main__":
try:
if(len(sys.argv) < 2):
print "Usage error"
option = sys.argv[1]
if(option == '--create_index'):
create_people_index()
elif(option == '--search'):
test_query(query=sys.argv[2])
else:
print "Usage error"
except Exception as e:
print e

View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2014 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
'''
a simple Flask based API for FreeGraph
'''
from flask import request, jsonify, Flask
app = Flask(__name__)
import json
from bson import json_util
DEFAULT_LIMIT = 30
#-----------------------------------
from pymongo import MongoClient
c = MongoClient()
import pylibmc
mc = pylibmc.Client(["127.0.0.1:11211"],binary=True,
behaviors={'tcp_nodelay':True,
'connect_timeout':100,
'no_block':True})
import threading
#-------------------------
#class for performing multi-threaded search on three search sub-systems
class QueryThread(threading.Thread):
def __init__(self,query,query_type,limit_results):
threading.Thread.__init__(self)
self.query=query
self.query_type=query_type
self.results = []
self.limit_results = limit_results
self.found_exact_match = False
def run(self):
if(self.query_type == 'people_search'):
self.results = query_people_database(self.query, self.limit_results)
elif(self.query_type == 'company_search'):
self.found_exact_match, self.results = query_company_database(self.query)
elif(self.query_type == 'lucene_search'):
self.results = query_lucene_index(self.query,'fg_people_index', self.limit_results)
#-------------------------
def query_people_database(query,limit_results=DEFAULT_LIMIT):
'''
returns True, {names of employees} if exact match of company name
else returns False, [list of possible companies]
'''
from substring_search import search_people_by_name
people = search_people_by_name(query, limit_results)
results = []
mongo_query = []
if people is not None:
if(len(people) == 0):
return results
else:
db = c['freegraph']
#the $in query is much faster but messes up intended results order
reply = db.nodes.find({"full_name":{'$in':people}})
#the reply is a cursor and need to load actual results first
for i in reply:
results.append(i['data'])
temp = json.dumps(results, default=json_util.default)
return json.loads(temp)
#-----------------------------------
def query_lucene_index(query,index,limit_results=DEFAULT_LIMIT):
from pyes import StringQuery, ES
conn = ES()
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'and')
count = conn.count(query = q)
count = count.count
#having or gives more results but results quality goes down
if(count == 0):
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'or')
results = conn.search(query = q, size=20, indices=[index])
results_list = []
counter = 0
for i in results:
temp = json.loads(i['data'])
results_list.append(temp)
counter += 1
if(counter == limit_results):
break
return results_list
#----------------------------------
def test_alphanumeric(query):
'''
check if query has only alphanumeric characters or not
'''
import re
valid = re.match('^(\w+(\s)*\w*)+$', query) is not None
#return valid
return True
#-----------------------------------
@app.route('/search/people', methods = ['GET'])
def get_people():
query = request.values['query']
new_limit = DEFAULT_LIMIT
try:
new_limit = int(request.values['limit_results'])
except:
pass
'''
cache_key = str('scopesearch_cache_' + query.lower())
cache_reply = mc.get(cache_key)
#if a cache hit, respond straight away
if(cache_reply != None):
return jsonify(cache_reply)
'''
results_people = []
if test_alphanumeric(query) is False:
pass
else:
threads = []
t1 = QueryThread(query,'people_search',new_limit)
#t2 = QueryThread(query,'company_search',new_limit)
t3 = QueryThread(query,'lucene_search',new_limit)
threads.append(t1)
#threads.append(t2)
threads.append(t3)
#start all threads
[x.start() for x in threads]
#wait for all of them to finish
[x.join() for x in threads]
#at this point all threads have finished and all queries have been performed
#first, check people names
people_first_source = t1.results
#people_first_source = []
results_people += people_first_source
'''
#second, check company names
found_exact_match, results_second_source = t2.found_exact_match, t2.results
#if found exact match then results are people working in that company
if(found_exact_match):
results_people += results_second_source
#else results are list of possible companies
else:
results_companies = results_second_source
'''
#third, component is lucene results
results_lucene = t3.results
#lucene results are people
results_people += results_lucene
'''
#dedup all results before sending out
from substring_search import dedup_search_results
results_people = dedup_search_results(results_people)
from substring_search import fix_search_order
results_people = fix_search_order(query,results_people)
'''
results = {'people':results_people[:new_limit]}
#mc.set(cache_key,results)
return jsonify(results)
#-------------------------
def debug(query):
return
#------------------
if __name__ == '__main__':
app.run(debug=True, port=5003)

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2013 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
'''
functions for substring search
'''
import sys
from pymongo import MongoClient
c = MongoClient()
from config import DEFAULT_LIMIT
INPUT_OPTIONS = '--create_cache --search <query>'
#-------------------------
def create_dedup_names_cache():
'''
takes people/company names from crunchbase DB and writes deduped names in a 'cache'
'''
fg = c['freegraph']
#delete any old cache
c.drop_database('fg_search_cache')
search_cache = c['fg_search_cache']
people_cache = search_cache.people_cache
nodes = fg.nodes
#------------------------------
#for creating people cache
counter = 0
people_names = []
for i in nodes.find():
counter += 1
if(counter % 1000 == 0):
print counter
try:
name = i['data']['name']['first'].lower() + ' ' + i['data']['name']['last'].lower()
except:
pass
else:
people_names.append(name)
dedup_people_names = list(set(people_names))
insert_people_names = {'dedup_people_names':dedup_people_names}
#save final dedup results to mongodb (using it as a cache)
people_cache.save(insert_people_names)
#print '-' * 5
#log.debug('Created deduped people_cache: %s from %s', len(dedup_people_names), len(people_names))
#log.debug('Creating company cache ...')
#db.posts.ensure_index('full_name')
#log.debug('DONE! All set for searching now.')
#-------------------------
def anyword_substring_search_inner(query_word,target_words):
'''
return True if ANY target_word matches a query_word
'''
for target_word in target_words:
if(target_word.startswith(query_word)):
return query_word
return False
#-------------------------
def anyword_substring_search(target_words,query_words):
'''
return True if all query_words match
'''
matches_required = len(query_words)
matches_found = 0
for query_word in query_words:
reply = anyword_substring_search_inner(query_word,target_words)
if reply is not False:
matches_found += 1
else:
#this is imp, otherwise will keep checking when the final answer is already False
return False
if(matches_found == matches_required):
return True
else:
return False
#-------------------------
def substring_search(query,list_of_strings,limit_results=DEFAULT_LIMIT):
'''
main function to call for searching
'''
matching = []
query_words = query.split(' ')
#sort by longest word (higest probability of not finding a match)
query_words.sort(key=len, reverse=True)
counter = 0
for s in list_of_strings:
target_words = s.split(' ')
#the anyword searching function is separate
if(anyword_substring_search(target_words,query_words)):
matching.append(s)
#limit results
counter += 1
if(counter == limit_results):
break
return matching
#-------------------------
def search_people_by_name(query,limit_results=DEFAULT_LIMIT):
query = query.lower()
#---------------------
#using mongodb as a cache, load data in people_names
search_cache = c['fg_search_cache']
people_names = []
for i in search_cache.people_cache.find():
people_names = i['dedup_people_names']
#---------------------
results = substring_search(query,people_names,limit_results)
return results
#-------------------------
def fix_search_order(query, search_results):
results = search_results
results_names = []
old_query = query
query = query.split(' ')
first_word = ''
second_word = ''
third_word = ''
if(len(query) < 2):
first_word = old_query
else:
first_word = query[0]
second_word = query[1]
if(len(query) > 2):
third_word = query[2]
#save results for multiple passes
results_second = []
results_third = []
#------------------------
for result in results:
result_list = result['full_name'].split(' ')
try:
if(result_list[0].startswith(first_word)):
results_names.append(result)
else:
results_second.append(result)
except:
results_second.append(result)
#------------------------
for result in results_second:
result_list = result['full_name'].split(' ')
try:
if(result_list[1].startswith(first_word)):
results_names.append(result)
else:
results_third.append(result)
except:
results_third.append(result)
#------------------------
#results are either in results_names (filtered) or unprocessed in results_third (last pass)
return results_names + results_third
#-------------------------
def dedup_search_results(search_results):
'''
dedup results based on 'slug'
'''
known_links = set()
deduped_results = []
for i in search_results:
link = i['url']
if link in known_links:
continue
deduped_results.append(i)
known_links.add(link)
return deduped_results
#-------------------------
if __name__ == "__main__":
try:
if(len(sys.argv) < 2):
print "Usage error"
option = sys.argv[1]
if(option == '--create_cache'):
create_dedup_names_cache()
elif(option == '--search'):
query = sys.argv[2]
print search_people_by_name(query,DEFAULT_LIMIT)
else:
print "Usage error"
except Exception as e:
print e

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python
#-----------------------
# Copyright 2013 Halfmoon Labs, Inc.
# All Rights Reserved
#-----------------------
'''
For testing the search API from command line
'''
import sys
import requests
import json
#-------------------------
def search_client(query,server):
print '-' * 10
print "Searching for: " + query
print '-' * 10
url = 'http://localhost:5000/search/people'
if(server == 'remote'):
url = 'http://54.200.209.148/search/people'
print url
data = {'query': query, 'limit_results': 35}
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
r = requests.get(url, params=data, headers=headers)
print r
temp = r.json()
print '-' * 10
print "People: "
for i in temp['people']:
print i
#print i['first_name'] + ' ' + i['last_name'] + ' | ' + 'http://www.crunchbase.com/person/' + i['crunchbase_slug']
if(len(temp['companies']) > 0):
print '-' * 10
print "Companies: "
for i in temp['companies']:
print i
print '-' * 10
#-------------------------
if __name__ == "__main__":
if(len(sys.argv) < 2): print "Error more arguments needed"
query=sys.argv[1]
server = 'local'
try:
server = sys.argv[2]
except:
pass
search_client(query, server)