mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-04-30 12:42:10 +08:00
first commit
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
fgsearch
|
nodepath
|
||||||
========
|
========
|
||||||
|
|
||||||
Search API for FreeGraph
|
Search API for FreeGraph
|
||||||
|
|||||||
0
blockstack_search/crawler/__init__.py
Normal file
0
blockstack_search/crawler/__init__.py
Normal file
61
blockstack_search/crawler/common.py
Executable file
61
blockstack_search/crawler/common.py
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2013 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
import json
|
||||||
|
from json import JSONEncoder
|
||||||
|
from bson.objectid import ObjectId
|
||||||
|
import logging
|
||||||
|
from config import DEBUG
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def get_logger(log_name=None,log_type='stream'):
|
||||||
|
|
||||||
|
if(DEBUG):
|
||||||
|
log = logging.getLogger(log_name)
|
||||||
|
log.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter_stream = logging.Formatter('[%(levelname)s] %(message)s')
|
||||||
|
handler_stream = logging.StreamHandler()
|
||||||
|
handler_stream.setFormatter(formatter_stream)
|
||||||
|
|
||||||
|
formatter_file = logging.Formatter('[%(levelname)s] %(message)s')
|
||||||
|
handler_file = logging.FileHandler('log/debug.log',mode='w')
|
||||||
|
handler_file.setFormatter(formatter_file)
|
||||||
|
|
||||||
|
if(log_type == 'stream'):
|
||||||
|
log.addHandler(handler_stream)
|
||||||
|
elif(log_type == 'file'):
|
||||||
|
log.addHandler(handler_file)
|
||||||
|
else:
|
||||||
|
log = None
|
||||||
|
|
||||||
|
return log
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
#common logger
|
||||||
|
log = get_logger()
|
||||||
|
|
||||||
|
class MongoEncoder(JSONEncoder):
|
||||||
|
def default(self, obj, **kwargs):
|
||||||
|
if isinstance(obj, ObjectId):
|
||||||
|
return str(obj)
|
||||||
|
else:
|
||||||
|
return JSONEncoder.default(obj, **kwargs)
|
||||||
|
#-------------------------
|
||||||
|
def pretty_dump(input):
|
||||||
|
|
||||||
|
return json.dumps(input, cls=MongoEncoder, sort_keys=False, indent=4, separators=(',', ': '))
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def pretty_print(input):
|
||||||
|
print pretty_dump(input)
|
||||||
|
|
||||||
|
#---------------------------------
|
||||||
|
def error_reply(msg):
|
||||||
|
reply = {}
|
||||||
|
reply['status'] = -1
|
||||||
|
reply['message'] = "ERROR: " + msg
|
||||||
|
return pretty_dump(reply)
|
||||||
11
blockstack_search/crawler/config.py
Executable file
11
blockstack_search/crawler/config.py
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
PORT = 5001
|
||||||
|
DEBUG = True
|
||||||
|
FG_API_SLUG = '/api/users'
|
||||||
|
SUBDOMAINS = ['freegraph','fg']
|
||||||
|
SCANPORTS = ['80','5000','8555']
|
||||||
145
blockstack_search/crawler/crawler.py
Executable file
145
blockstack_search/crawler/crawler.py
Executable file
@@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
import json
|
||||||
|
from flask import Flask, render_template, request
|
||||||
|
from common import pretty_dump, error_reply
|
||||||
|
import requests
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
app.config.from_object('config')
|
||||||
|
|
||||||
|
from pymongo import MongoClient
|
||||||
|
c = MongoClient()
|
||||||
|
|
||||||
|
fg = c['freegraph']
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def get_domain_from_url(url):
|
||||||
|
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
o = urlparse(url)
|
||||||
|
|
||||||
|
domain = o.hostname
|
||||||
|
|
||||||
|
return domain.lower()
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def check_host_url_inner(url):
|
||||||
|
|
||||||
|
#headers = {'Content-type': 'application/json', 'Accept': 'text/plain', 'Authorization': 'Basic'}
|
||||||
|
|
||||||
|
print "checking: " + url
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.get(url)
|
||||||
|
except:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
print r.status_code
|
||||||
|
|
||||||
|
if(r.status_code == 200):
|
||||||
|
try:
|
||||||
|
data = r.json()
|
||||||
|
except:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
if 'users' in data.keys():
|
||||||
|
return True, data
|
||||||
|
else:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def check_host_url(domain):
|
||||||
|
|
||||||
|
check_urls = []
|
||||||
|
check_servers = []
|
||||||
|
|
||||||
|
check_servers.append(domain)
|
||||||
|
|
||||||
|
for i in app.config['SUBDOMAINS']:
|
||||||
|
check_servers.append(i + '.' + domain)
|
||||||
|
|
||||||
|
for server in check_servers:
|
||||||
|
|
||||||
|
for port in app.config['SCANPORTS']:
|
||||||
|
check_urls.append('http://' + server + ':' + port + app.config['FG_API_SLUG'])
|
||||||
|
|
||||||
|
for url in check_urls:
|
||||||
|
reply, data = check_host_url_inner(url)
|
||||||
|
if(reply):
|
||||||
|
return url, data
|
||||||
|
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
|
||||||
|
return render_template('index.html')
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.route('/host', methods=['GET'])
|
||||||
|
def get_host():
|
||||||
|
|
||||||
|
try:
|
||||||
|
input_url = request.values['url']
|
||||||
|
|
||||||
|
#check if 'http' or 'https' was entered, if not then append 'http'
|
||||||
|
if((input_url.find('http://') == -1) and (input_url.find('https://') == -1)):
|
||||||
|
input_url = 'http://' + input_url
|
||||||
|
|
||||||
|
except:
|
||||||
|
return error_reply("No URL given")
|
||||||
|
|
||||||
|
domain = get_domain_from_url(str(input_url))
|
||||||
|
|
||||||
|
host_url, data = check_host_url(domain)
|
||||||
|
nodes = []
|
||||||
|
|
||||||
|
if(host_url is not False):
|
||||||
|
reply = fg.hosts.find_one({'domain':domain})
|
||||||
|
|
||||||
|
if(reply):
|
||||||
|
fg.hosts.remove(reply)
|
||||||
|
|
||||||
|
host = {}
|
||||||
|
host['domain'] = domain
|
||||||
|
host['host_url'] = host_url
|
||||||
|
host['data'] = data
|
||||||
|
fg.hosts.insert(host)
|
||||||
|
|
||||||
|
nodes = data['users'].keys()
|
||||||
|
|
||||||
|
print nodes
|
||||||
|
|
||||||
|
for username in nodes:
|
||||||
|
|
||||||
|
node = {}
|
||||||
|
node['node_url'] = host_url + '/' + username
|
||||||
|
|
||||||
|
reply = fg.nodes.find_one({'node_url':node['node_url']})
|
||||||
|
|
||||||
|
if(reply):
|
||||||
|
fg.nodes.remove(reply)
|
||||||
|
|
||||||
|
node['data'] = requests.get(node['node_url']).json()
|
||||||
|
|
||||||
|
try:
|
||||||
|
full_name = node['data']['name']['first'].lower() + ' ' + node['data']['name']['last'].lower()
|
||||||
|
except:
|
||||||
|
node['full_name'] = ""
|
||||||
|
else:
|
||||||
|
node['full_name'] = full_name
|
||||||
|
|
||||||
|
fg.nodes.insert(node)
|
||||||
|
|
||||||
|
return render_template('node.html',domain=domain,host_url=host_url,nodes=nodes)
|
||||||
|
|
||||||
|
#------------------
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=app.config['DEBUG'], port=app.config['PORT'])
|
||||||
62
blockstack_search/crawler/discovery.py
Executable file
62
blockstack_search/crawler/discovery.py
Executable file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
import json
|
||||||
|
from flask import Flask, render_template
|
||||||
|
from common import pretty_dump, error_reply
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
time = datetime.now()
|
||||||
|
return render_template('discovery.html',time=time.strftime('%X'))
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.route('/poll/<string:target>', methods = ['GET'])
|
||||||
|
def poll_target(target):
|
||||||
|
|
||||||
|
reply = {}
|
||||||
|
|
||||||
|
blocks = '270941'
|
||||||
|
|
||||||
|
if(target == 'blockchain'):
|
||||||
|
reply['status'] = 1
|
||||||
|
reply['message'] = "Refreshed discovery_queue from source 'bitcoin blockchain'. Latest blocks: " + blocks
|
||||||
|
|
||||||
|
elif(target == 'crawlindex'):
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
diff = timedelta(hours=24)
|
||||||
|
|
||||||
|
last_crawled = datetime.now() - diff
|
||||||
|
|
||||||
|
reply['status'] = 1
|
||||||
|
reply['message'] = "Refreshed discovery_queue from source 'crawl index'. Oldest crawled URL: " + last_crawled.strftime('%Y-%m-%d %X')
|
||||||
|
|
||||||
|
else:
|
||||||
|
reply = "Target '" + target + "' not recognized"
|
||||||
|
return error_reply(reply)
|
||||||
|
|
||||||
|
return pretty_dump(reply)
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.errorhandler(500)
|
||||||
|
def internal_error(error):
|
||||||
|
|
||||||
|
return error_reply("Something went wrong with the server")
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.errorhandler(404)
|
||||||
|
def internal_error(error):
|
||||||
|
|
||||||
|
return error_reply('URL not found on this server')
|
||||||
|
|
||||||
|
#------------------
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
||||||
0
blockstack_search/crawler/log/debug.log
Normal file
0
blockstack_search/crawler/log/debug.log
Normal file
27
blockstack_search/crawler/templates/discovery.html
Normal file
27
blockstack_search/crawler/templates/discovery.html
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
|
||||||
|
//Because the page will automatically refresh, should mention it on the webpage
|
||||||
|
|
||||||
|
function reFresh() {
|
||||||
|
location.reload(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the number below to the amount of delay, in milliseconds,
|
||||||
|
//you want between page reloads: 1 minute = 60000 milliseconds.
|
||||||
|
window.setInterval("reFresh()",300000);
|
||||||
|
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
This page refreshes every 5 minutes.<br><br>
|
||||||
|
|
||||||
|
Time right now is: {{time}}<br><br>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
22
blockstack_search/crawler/templates/index.html
Normal file
22
blockstack_search/crawler/templates/index.html
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
FreeGraph crawler is starting up ... <br><br>
|
||||||
|
Initializing ... <br><br>
|
||||||
|
|
||||||
|
Current nodes in the index: <br><br>
|
||||||
|
|
||||||
|
http://halfmoonlabs.com <br>
|
||||||
|
http://cs.princeton.edu <br><br>
|
||||||
|
|
||||||
|
Current users in the index: <br><br>
|
||||||
|
|
||||||
|
Ryan Shea, Halfmoon Labs<br>
|
||||||
|
Muneeb Ali, Halfmoon Labs<br>
|
||||||
|
JP Singh, Princeton CS<br>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
13
blockstack_search/crawler/templates/node.html
Normal file
13
blockstack_search/crawler/templates/node.html
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
Checking domain: {{domain}} <br><br>
|
||||||
|
FreeGraph API found: {{host_url}} <br><br>
|
||||||
|
|
||||||
|
Added users (nodes): {% for node in nodes %}{{node}} {% endfor %}
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
13
blockstack_search/requirements.txt
Normal file
13
blockstack_search/requirements.txt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
Flask==0.10.1
|
||||||
|
Jinja2==2.7.2
|
||||||
|
MarkupSafe==0.18
|
||||||
|
Werkzeug==0.9.4
|
||||||
|
itsdangerous==0.23
|
||||||
|
pyes==0.90.1
|
||||||
|
pylibmc==1.2.3
|
||||||
|
pymongo==2.6.3
|
||||||
|
pytz==2013.9
|
||||||
|
requests==2.2.1
|
||||||
|
six==1.5.2
|
||||||
|
urllib3==1.7.1
|
||||||
|
wsgiref==0.1.2
|
||||||
100
blockstack_search/search/README.md
Normal file
100
blockstack_search/search/README.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
# Scope Search
|
||||||
|
|
||||||
|
We currently have three search sub-systems to handle search queries:
|
||||||
|
|
||||||
|
* Substring search on people names
|
||||||
|
* Substring search on company names
|
||||||
|
* Search on the raw lucene index
|
||||||
|
|
||||||
|
We assume that the user is entering either a *person's name* OR a *company's name* in the search query. The API expects an input of the format:
|
||||||
|
|
||||||
|
{
|
||||||
|
"query": "the search query/term",
|
||||||
|
"limit_results": "numeric limit on number of results e.g., 50, this parameter is optional"
|
||||||
|
}
|
||||||
|
|
||||||
|
The API returns a JSON object of the format:
|
||||||
|
|
||||||
|
{
|
||||||
|
"companies": [],
|
||||||
|
"people": []
|
||||||
|
}
|
||||||
|
|
||||||
|
### Quick Testing
|
||||||
|
|
||||||
|
You can test the search API using curl:
|
||||||
|
|
||||||
|
> curl http://54.200.33.184/search/api/v1.0/people -G -d "query=peter%20thiel"
|
||||||
|
|
||||||
|
OR by using the [test_client.py](test_client.py)
|
||||||
|
|
||||||
|
> ./test_client.py "peter thiel"
|
||||||
|
|
||||||
|
Make sure that the packages listed in requirements.txt are installed before using the test_client.py
|
||||||
|
|
||||||
|
### Search API
|
||||||
|
|
||||||
|
#### People API
|
||||||
|
|
||||||
|
The people API can be accessed via:
|
||||||
|
|
||||||
|
> curl http://54.200.33.184/search/api/v1.0/people -G -d "query=peter%20thiel"
|
||||||
|
|
||||||
|
This will currently return upto a max of 20 results (can be less depending on the query) with the following data:
|
||||||
|
|
||||||
|
* 'first_name'
|
||||||
|
* 'last_name'
|
||||||
|
* 'overview' -- overview of the person
|
||||||
|
* 'companies' -- each company has 1) title of person, 2) name of company, and 3) permalink of company
|
||||||
|
* 'crunchbase_slug' -- this can be used to get the crunchbase URL as http://www.crunchbase.com/person/ + 'crunchbase_slug'
|
||||||
|
* 'twitter_handle' -- twitter username
|
||||||
|
* 'linkedin_url' -- linkedin URL
|
||||||
|
|
||||||
|
#### Company API
|
||||||
|
|
||||||
|
The company API can be accessed via:
|
||||||
|
|
||||||
|
> curl http://54.200.33.184/search/api/v1.0/company -G -d "query=bank%20simple"
|
||||||
|
|
||||||
|
This will currently return upto a max of 20 results (can be less depending on the query) with the following data:
|
||||||
|
|
||||||
|
* 'name' -- company name
|
||||||
|
* 'homepage_url' -- company website
|
||||||
|
* 'email_address' -- email, if given on crunchbase
|
||||||
|
* 'email_info' -- has information on url_domain, email_domain and if can verify on them
|
||||||
|
* 'total_money_raised' -- the total $$ raised
|
||||||
|
* 'people' -- list of current employees
|
||||||
|
* 'board' -- list of board members
|
||||||
|
* 'overview' -- overview text from crunchbase
|
||||||
|
* 'tag_list' -- combination of tags and categories from crunchbase (crunchbase treats them separately, we don't)
|
||||||
|
* 'crunchbase_slug' -- this can be used to get the crunchbase URL as http://www.crunchbase.com/company/ + 'crunchbase_slug'
|
||||||
|
* 'offices' -- info on company office(s)
|
||||||
|
* 'acquisition' -- if acquired, the year it was acquired in
|
||||||
|
|
||||||
|
## Installing on UNIX
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
|
||||||
|
All required packages for Python are listed in 'requirements.txt'. In addition to those, also requires Elastic Search.
|
||||||
|
|
||||||
|
### Elastic Search
|
||||||
|
|
||||||
|
Elastic Search library is not in github and resides at
|
||||||
|
|
||||||
|
unix/lib/elastic
|
||||||
|
|
||||||
|
the current version we're using is *0.90.2*. Download from:
|
||||||
|
|
||||||
|
> wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.2.zip
|
||||||
|
|
||||||
|
### Converting RAW data to search index
|
||||||
|
|
||||||
|
Right now, the steps required for going from raw data to "ready for searching" are:
|
||||||
|
|
||||||
|
> python scope/datasets/crunchbase/filter_crunchbase_data.py --filter_people
|
||||||
|
> python scope/datasets/crunchbase/filter_crunchbase_data.py --filter_company
|
||||||
|
> python scopesearch/substring_search.py --create_cache
|
||||||
|
> python scopesearch/create_search_index.py --create_people_index
|
||||||
|
> python scopesearch/create_search_index.py --create_company_index
|
||||||
|
|
||||||
|
We'll simplify these steps in an upcoming release. We assume that both MongoDB and Elastic Search is running on the server.
|
||||||
0
blockstack_search/search/__init__.py
Normal file
0
blockstack_search/search/__init__.py
Normal file
55
blockstack_search/search/common.py
Executable file
55
blockstack_search/search/common.py
Executable file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2013 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
import json
|
||||||
|
from json import JSONEncoder
|
||||||
|
from bson.objectid import ObjectId
|
||||||
|
import logging
|
||||||
|
from config import DEBUG
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def get_logger(log_name=None,log_type='stream'):
|
||||||
|
|
||||||
|
if(DEBUG):
|
||||||
|
log = logging.getLogger(log_name)
|
||||||
|
log.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter_stream = logging.Formatter('[%(levelname)s] %(message)s')
|
||||||
|
handler_stream = logging.StreamHandler()
|
||||||
|
handler_stream.setFormatter(formatter_stream)
|
||||||
|
|
||||||
|
log.addHandler(handler_stream)
|
||||||
|
|
||||||
|
else:
|
||||||
|
log = None
|
||||||
|
|
||||||
|
return log
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
#common logger
|
||||||
|
log = get_logger()
|
||||||
|
|
||||||
|
class MongoEncoder(JSONEncoder):
|
||||||
|
def default(self, obj, **kwargs):
|
||||||
|
if isinstance(obj, ObjectId):
|
||||||
|
return str(obj)
|
||||||
|
else:
|
||||||
|
return JSONEncoder.default(obj, **kwargs)
|
||||||
|
#-------------------------
|
||||||
|
def pretty_dump(input):
|
||||||
|
|
||||||
|
return json.dumps(input, cls=MongoEncoder, sort_keys=False, indent=4, separators=(',', ': '))
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def pretty_print(input):
|
||||||
|
print pretty_dump(input)
|
||||||
|
|
||||||
|
#---------------------------------
|
||||||
|
def error_reply(msg):
|
||||||
|
reply = {}
|
||||||
|
reply['status'] = -1
|
||||||
|
reply['message'] = "ERROR: " + msg
|
||||||
|
return pretty_dump(reply)
|
||||||
10
blockstack_search/search/config.py
Executable file
10
blockstack_search/search/config.py
Executable file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
PORT = 5001
|
||||||
|
DEBUG = True
|
||||||
|
BULK_INSERT_LIMIT = 1000
|
||||||
|
DEFAULT_LIMIT = 50
|
||||||
146
blockstack_search/search/create_search_index.py
Executable file
146
blockstack_search/search/create_search_index.py
Executable file
@@ -0,0 +1,146 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
'''
|
||||||
|
functions for building the ES/lucene search index and mappings
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
from pyes import *
|
||||||
|
conn = ES()
|
||||||
|
|
||||||
|
from pymongo import MongoClient
|
||||||
|
c = MongoClient()
|
||||||
|
|
||||||
|
INPUT_OPTIONS = '--create_index --search'
|
||||||
|
|
||||||
|
from config import BULK_INSERT_LIMIT
|
||||||
|
from common import log
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def create_mapping(index_name,index_type):
|
||||||
|
|
||||||
|
'''
|
||||||
|
for creating lucene mapping
|
||||||
|
can add different mappings for different index_types
|
||||||
|
'''
|
||||||
|
|
||||||
|
try:
|
||||||
|
#delete the old mapping, if exists
|
||||||
|
conn.indices.delete_index(index_name)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
conn.indices.create_index(index_name)
|
||||||
|
|
||||||
|
mapping = { u'full_name': {'boost': 3.0,
|
||||||
|
'index': 'analyzed',
|
||||||
|
'store': 'yes',
|
||||||
|
'type': u'string',
|
||||||
|
"term_vector" : "with_positions_offsets"},
|
||||||
|
|
||||||
|
u'bio': {'boost': 1.0,
|
||||||
|
'index': 'analyzed',
|
||||||
|
'store': 'yes',
|
||||||
|
'type': u'string',
|
||||||
|
"term_vector" : "with_positions_offsets"},
|
||||||
|
|
||||||
|
u'data': {'boost': 2.0,
|
||||||
|
'index': 'analyzed',
|
||||||
|
'store': 'yes',
|
||||||
|
'type': u'string',
|
||||||
|
"term_vector" : "with_positions_offsets"},}
|
||||||
|
|
||||||
|
conn.indices.put_mapping(index_type, {'properties':mapping}, [index_name])
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def create_people_index():
|
||||||
|
|
||||||
|
create_mapping("fg_people_index","fg_people_type")
|
||||||
|
|
||||||
|
from pymongo import MongoClient
|
||||||
|
from bson import json_util
|
||||||
|
import json
|
||||||
|
|
||||||
|
c = MongoClient()
|
||||||
|
|
||||||
|
db = c['freegraph']
|
||||||
|
nodes = db.nodes
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
for i in nodes.find():
|
||||||
|
|
||||||
|
data = i['data']
|
||||||
|
|
||||||
|
print i
|
||||||
|
|
||||||
|
conn.index({'full_name' : i['data']['name']['full'],
|
||||||
|
'bio' : i['data']['bio'],
|
||||||
|
'data': json.dumps(i['data'], sort_keys=True, default=json_util.default),
|
||||||
|
'_boost' : 1,},
|
||||||
|
"fg_people_index",
|
||||||
|
"fg_people_type",
|
||||||
|
bulk=True)
|
||||||
|
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
conn.indices.refresh(["fg_people_index"])
|
||||||
|
|
||||||
|
#write in bulk
|
||||||
|
if(counter % BULK_INSERT_LIMIT == 0):
|
||||||
|
print '-' * 5
|
||||||
|
print counter
|
||||||
|
conn.refresh(["fg_people_index"])
|
||||||
|
|
||||||
|
conn.indices.force_bulk()
|
||||||
|
|
||||||
|
#----------------------------------
|
||||||
|
def test_query(query,index=['fg_people_index']):
|
||||||
|
|
||||||
|
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'and')
|
||||||
|
count = conn.count(query = q)
|
||||||
|
count = count.count
|
||||||
|
|
||||||
|
if(count == 0):
|
||||||
|
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'or')
|
||||||
|
|
||||||
|
results = conn.search(query = q, size=20, indices=index)
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
results_list = []
|
||||||
|
|
||||||
|
for i in results:
|
||||||
|
counter += 1
|
||||||
|
print i['full_name']
|
||||||
|
|
||||||
|
temp = json.loads(i['data'])
|
||||||
|
|
||||||
|
results_list.append(temp)
|
||||||
|
|
||||||
|
#print counter
|
||||||
|
|
||||||
|
#print results_list
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
if(len(sys.argv) < 2):
|
||||||
|
print "Usage error"
|
||||||
|
|
||||||
|
option = sys.argv[1]
|
||||||
|
|
||||||
|
if(option == '--create_index'):
|
||||||
|
create_people_index()
|
||||||
|
elif(option == '--search'):
|
||||||
|
test_query(query=sys.argv[2])
|
||||||
|
else:
|
||||||
|
print "Usage error"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print e
|
||||||
224
blockstack_search/search/search_api.py
Executable file
224
blockstack_search/search/search_api.py
Executable file
@@ -0,0 +1,224 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2014 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
'''
|
||||||
|
a simple Flask based API for FreeGraph
|
||||||
|
'''
|
||||||
|
|
||||||
|
from flask import request, jsonify, Flask
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
import json
|
||||||
|
from bson import json_util
|
||||||
|
|
||||||
|
DEFAULT_LIMIT = 30
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
from pymongo import MongoClient
|
||||||
|
c = MongoClient()
|
||||||
|
|
||||||
|
import pylibmc
|
||||||
|
mc = pylibmc.Client(["127.0.0.1:11211"],binary=True,
|
||||||
|
behaviors={'tcp_nodelay':True,
|
||||||
|
'connect_timeout':100,
|
||||||
|
'no_block':True})
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
#class for performing multi-threaded search on three search sub-systems
|
||||||
|
class QueryThread(threading.Thread):
|
||||||
|
def __init__(self,query,query_type,limit_results):
|
||||||
|
threading.Thread.__init__(self)
|
||||||
|
self.query=query
|
||||||
|
self.query_type=query_type
|
||||||
|
self.results = []
|
||||||
|
self.limit_results = limit_results
|
||||||
|
self.found_exact_match = False
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
if(self.query_type == 'people_search'):
|
||||||
|
self.results = query_people_database(self.query, self.limit_results)
|
||||||
|
elif(self.query_type == 'company_search'):
|
||||||
|
self.found_exact_match, self.results = query_company_database(self.query)
|
||||||
|
elif(self.query_type == 'lucene_search'):
|
||||||
|
self.results = query_lucene_index(self.query,'fg_people_index', self.limit_results)
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def query_people_database(query,limit_results=DEFAULT_LIMIT):
|
||||||
|
|
||||||
|
'''
|
||||||
|
returns True, {names of employees} if exact match of company name
|
||||||
|
else returns False, [list of possible companies]
|
||||||
|
'''
|
||||||
|
|
||||||
|
from substring_search import search_people_by_name
|
||||||
|
|
||||||
|
people = search_people_by_name(query, limit_results)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
mongo_query = []
|
||||||
|
|
||||||
|
if people is not None:
|
||||||
|
|
||||||
|
if(len(people) == 0):
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
db = c['freegraph']
|
||||||
|
|
||||||
|
#the $in query is much faster but messes up intended results order
|
||||||
|
reply = db.nodes.find({"full_name":{'$in':people}})
|
||||||
|
|
||||||
|
#the reply is a cursor and need to load actual results first
|
||||||
|
for i in reply:
|
||||||
|
results.append(i['data'])
|
||||||
|
|
||||||
|
|
||||||
|
temp = json.dumps(results, default=json_util.default)
|
||||||
|
return json.loads(temp)
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
def query_lucene_index(query,index,limit_results=DEFAULT_LIMIT):
|
||||||
|
|
||||||
|
from pyes import StringQuery, ES
|
||||||
|
conn = ES()
|
||||||
|
|
||||||
|
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'and')
|
||||||
|
count = conn.count(query = q)
|
||||||
|
count = count.count
|
||||||
|
|
||||||
|
#having or gives more results but results quality goes down
|
||||||
|
if(count == 0):
|
||||||
|
q = StringQuery(query, search_fields = ['full_name', 'bio', 'data'], default_operator = 'or')
|
||||||
|
|
||||||
|
results = conn.search(query = q, size=20, indices=[index])
|
||||||
|
|
||||||
|
results_list = []
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
for i in results:
|
||||||
|
|
||||||
|
temp = json.loads(i['data'])
|
||||||
|
|
||||||
|
results_list.append(temp)
|
||||||
|
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
if(counter == limit_results):
|
||||||
|
break
|
||||||
|
|
||||||
|
return results_list
|
||||||
|
|
||||||
|
#----------------------------------
|
||||||
|
def test_alphanumeric(query):
|
||||||
|
|
||||||
|
'''
|
||||||
|
check if query has only alphanumeric characters or not
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
valid = re.match('^(\w+(\s)*\w*)+$', query) is not None
|
||||||
|
|
||||||
|
#return valid
|
||||||
|
return True
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
@app.route('/search/people', methods = ['GET'])
|
||||||
|
def get_people():
|
||||||
|
|
||||||
|
query = request.values['query']
|
||||||
|
new_limit = DEFAULT_LIMIT
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_limit = int(request.values['limit_results'])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
'''
|
||||||
|
cache_key = str('scopesearch_cache_' + query.lower())
|
||||||
|
cache_reply = mc.get(cache_key)
|
||||||
|
|
||||||
|
#if a cache hit, respond straight away
|
||||||
|
if(cache_reply != None):
|
||||||
|
return jsonify(cache_reply)
|
||||||
|
'''
|
||||||
|
|
||||||
|
results_people = []
|
||||||
|
|
||||||
|
if test_alphanumeric(query) is False:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
t1 = QueryThread(query,'people_search',new_limit)
|
||||||
|
#t2 = QueryThread(query,'company_search',new_limit)
|
||||||
|
t3 = QueryThread(query,'lucene_search',new_limit)
|
||||||
|
|
||||||
|
threads.append(t1)
|
||||||
|
#threads.append(t2)
|
||||||
|
threads.append(t3)
|
||||||
|
|
||||||
|
#start all threads
|
||||||
|
[x.start() for x in threads]
|
||||||
|
|
||||||
|
#wait for all of them to finish
|
||||||
|
[x.join() for x in threads]
|
||||||
|
|
||||||
|
#at this point all threads have finished and all queries have been performed
|
||||||
|
|
||||||
|
|
||||||
|
#first, check people names
|
||||||
|
people_first_source = t1.results
|
||||||
|
#people_first_source = []
|
||||||
|
|
||||||
|
results_people += people_first_source
|
||||||
|
|
||||||
|
'''
|
||||||
|
#second, check company names
|
||||||
|
found_exact_match, results_second_source = t2.found_exact_match, t2.results
|
||||||
|
|
||||||
|
#if found exact match then results are people working in that company
|
||||||
|
if(found_exact_match):
|
||||||
|
results_people += results_second_source
|
||||||
|
#else results are list of possible companies
|
||||||
|
else:
|
||||||
|
results_companies = results_second_source
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
#third, component is lucene results
|
||||||
|
results_lucene = t3.results
|
||||||
|
|
||||||
|
#lucene results are people
|
||||||
|
results_people += results_lucene
|
||||||
|
|
||||||
|
'''
|
||||||
|
#dedup all results before sending out
|
||||||
|
from substring_search import dedup_search_results
|
||||||
|
results_people = dedup_search_results(results_people)
|
||||||
|
|
||||||
|
from substring_search import fix_search_order
|
||||||
|
results_people = fix_search_order(query,results_people)
|
||||||
|
'''
|
||||||
|
|
||||||
|
results = {'people':results_people[:new_limit]}
|
||||||
|
|
||||||
|
#mc.set(cache_key,results)
|
||||||
|
|
||||||
|
return jsonify(results)
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def debug(query):
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
#------------------
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
app.run(debug=True, port=5003)
|
||||||
260
blockstack_search/search/substring_search.py
Executable file
260
blockstack_search/search/substring_search.py
Executable file
@@ -0,0 +1,260 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2013 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
'''
|
||||||
|
functions for substring search
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from pymongo import MongoClient
|
||||||
|
c = MongoClient()
|
||||||
|
|
||||||
|
from config import DEFAULT_LIMIT
|
||||||
|
|
||||||
|
INPUT_OPTIONS = '--create_cache --search <query>'
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def create_dedup_names_cache():
|
||||||
|
|
||||||
|
'''
|
||||||
|
takes people/company names from crunchbase DB and writes deduped names in a 'cache'
|
||||||
|
'''
|
||||||
|
|
||||||
|
fg = c['freegraph']
|
||||||
|
|
||||||
|
#delete any old cache
|
||||||
|
c.drop_database('fg_search_cache')
|
||||||
|
|
||||||
|
search_cache = c['fg_search_cache']
|
||||||
|
people_cache = search_cache.people_cache
|
||||||
|
|
||||||
|
nodes = fg.nodes
|
||||||
|
|
||||||
|
#------------------------------
|
||||||
|
#for creating people cache
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
people_names = []
|
||||||
|
|
||||||
|
for i in nodes.find():
|
||||||
|
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
if(counter % 1000 == 0):
|
||||||
|
print counter
|
||||||
|
|
||||||
|
try:
|
||||||
|
name = i['data']['name']['first'].lower() + ' ' + i['data']['name']['last'].lower()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
people_names.append(name)
|
||||||
|
|
||||||
|
|
||||||
|
dedup_people_names = list(set(people_names))
|
||||||
|
|
||||||
|
insert_people_names = {'dedup_people_names':dedup_people_names}
|
||||||
|
|
||||||
|
#save final dedup results to mongodb (using it as a cache)
|
||||||
|
people_cache.save(insert_people_names)
|
||||||
|
|
||||||
|
#print '-' * 5
|
||||||
|
#log.debug('Created deduped people_cache: %s from %s', len(dedup_people_names), len(people_names))
|
||||||
|
#log.debug('Creating company cache ...')
|
||||||
|
|
||||||
|
#db.posts.ensure_index('full_name')
|
||||||
|
#log.debug('DONE! All set for searching now.')
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def anyword_substring_search_inner(query_word,target_words):
|
||||||
|
|
||||||
|
'''
|
||||||
|
return True if ANY target_word matches a query_word
|
||||||
|
'''
|
||||||
|
|
||||||
|
for target_word in target_words:
|
||||||
|
|
||||||
|
if(target_word.startswith(query_word)):
|
||||||
|
return query_word
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def anyword_substring_search(target_words,query_words):
|
||||||
|
|
||||||
|
'''
|
||||||
|
return True if all query_words match
|
||||||
|
'''
|
||||||
|
|
||||||
|
matches_required = len(query_words)
|
||||||
|
matches_found = 0
|
||||||
|
|
||||||
|
for query_word in query_words:
|
||||||
|
|
||||||
|
reply = anyword_substring_search_inner(query_word,target_words)
|
||||||
|
|
||||||
|
if reply is not False:
|
||||||
|
|
||||||
|
matches_found += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
#this is imp, otherwise will keep checking when the final answer is already False
|
||||||
|
return False
|
||||||
|
|
||||||
|
if(matches_found == matches_required):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def substring_search(query,list_of_strings,limit_results=DEFAULT_LIMIT):
|
||||||
|
|
||||||
|
'''
|
||||||
|
main function to call for searching
|
||||||
|
'''
|
||||||
|
|
||||||
|
matching = []
|
||||||
|
|
||||||
|
query_words = query.split(' ')
|
||||||
|
#sort by longest word (higest probability of not finding a match)
|
||||||
|
query_words.sort(key=len, reverse=True)
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
for s in list_of_strings:
|
||||||
|
|
||||||
|
target_words = s.split(' ')
|
||||||
|
|
||||||
|
#the anyword searching function is separate
|
||||||
|
if(anyword_substring_search(target_words,query_words)):
|
||||||
|
matching.append(s)
|
||||||
|
|
||||||
|
#limit results
|
||||||
|
counter += 1
|
||||||
|
if(counter == limit_results):
|
||||||
|
break
|
||||||
|
|
||||||
|
return matching
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def search_people_by_name(query,limit_results=DEFAULT_LIMIT):
|
||||||
|
|
||||||
|
query = query.lower()
|
||||||
|
|
||||||
|
#---------------------
|
||||||
|
#using mongodb as a cache, load data in people_names
|
||||||
|
search_cache = c['fg_search_cache']
|
||||||
|
|
||||||
|
people_names = []
|
||||||
|
|
||||||
|
for i in search_cache.people_cache.find():
|
||||||
|
people_names = i['dedup_people_names']
|
||||||
|
#---------------------
|
||||||
|
|
||||||
|
results = substring_search(query,people_names,limit_results)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def fix_search_order(query, search_results):
|
||||||
|
|
||||||
|
results = search_results
|
||||||
|
|
||||||
|
results_names = []
|
||||||
|
old_query = query
|
||||||
|
query = query.split(' ')
|
||||||
|
|
||||||
|
first_word = ''
|
||||||
|
second_word = ''
|
||||||
|
third_word = ''
|
||||||
|
|
||||||
|
if(len(query) < 2):
|
||||||
|
first_word = old_query
|
||||||
|
else:
|
||||||
|
first_word = query[0]
|
||||||
|
second_word = query[1]
|
||||||
|
|
||||||
|
if(len(query) > 2):
|
||||||
|
third_word = query[2]
|
||||||
|
|
||||||
|
#save results for multiple passes
|
||||||
|
results_second = []
|
||||||
|
results_third = []
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
for result in results:
|
||||||
|
|
||||||
|
result_list = result['full_name'].split(' ')
|
||||||
|
|
||||||
|
try:
|
||||||
|
if(result_list[0].startswith(first_word)):
|
||||||
|
results_names.append(result)
|
||||||
|
else:
|
||||||
|
results_second.append(result)
|
||||||
|
except:
|
||||||
|
results_second.append(result)
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
for result in results_second:
|
||||||
|
|
||||||
|
result_list = result['full_name'].split(' ')
|
||||||
|
|
||||||
|
try:
|
||||||
|
if(result_list[1].startswith(first_word)):
|
||||||
|
results_names.append(result)
|
||||||
|
else:
|
||||||
|
results_third.append(result)
|
||||||
|
except:
|
||||||
|
results_third.append(result)
|
||||||
|
#------------------------
|
||||||
|
|
||||||
|
#results are either in results_names (filtered) or unprocessed in results_third (last pass)
|
||||||
|
return results_names + results_third
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def dedup_search_results(search_results):
|
||||||
|
'''
|
||||||
|
dedup results based on 'slug'
|
||||||
|
'''
|
||||||
|
|
||||||
|
known_links = set()
|
||||||
|
deduped_results = []
|
||||||
|
|
||||||
|
for i in search_results:
|
||||||
|
|
||||||
|
link = i['url']
|
||||||
|
|
||||||
|
if link in known_links:
|
||||||
|
continue
|
||||||
|
|
||||||
|
deduped_results.append(i)
|
||||||
|
|
||||||
|
known_links.add(link)
|
||||||
|
|
||||||
|
return deduped_results
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
if(len(sys.argv) < 2):
|
||||||
|
print "Usage error"
|
||||||
|
|
||||||
|
option = sys.argv[1]
|
||||||
|
|
||||||
|
if(option == '--create_cache'):
|
||||||
|
create_dedup_names_cache()
|
||||||
|
elif(option == '--search'):
|
||||||
|
query = sys.argv[2]
|
||||||
|
print search_people_by_name(query,DEFAULT_LIMIT)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print "Usage error"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print e
|
||||||
72
blockstack_search/search/test_client.py
Executable file
72
blockstack_search/search/test_client.py
Executable file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#-----------------------
|
||||||
|
# Copyright 2013 Halfmoon Labs, Inc.
|
||||||
|
# All Rights Reserved
|
||||||
|
#-----------------------
|
||||||
|
|
||||||
|
'''
|
||||||
|
For testing the search API from command line
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
def search_client(query,server):
|
||||||
|
|
||||||
|
|
||||||
|
print '-' * 10
|
||||||
|
print "Searching for: " + query
|
||||||
|
print '-' * 10
|
||||||
|
|
||||||
|
url = 'http://localhost:5000/search/people'
|
||||||
|
|
||||||
|
if(server == 'remote'):
|
||||||
|
url = 'http://54.200.209.148/search/people'
|
||||||
|
|
||||||
|
print url
|
||||||
|
|
||||||
|
data = {'query': query, 'limit_results': 35}
|
||||||
|
|
||||||
|
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
|
||||||
|
|
||||||
|
r = requests.get(url, params=data, headers=headers)
|
||||||
|
|
||||||
|
print r
|
||||||
|
|
||||||
|
temp = r.json()
|
||||||
|
|
||||||
|
print '-' * 10
|
||||||
|
|
||||||
|
print "People: "
|
||||||
|
|
||||||
|
for i in temp['people']:
|
||||||
|
|
||||||
|
print i
|
||||||
|
#print i['first_name'] + ' ' + i['last_name'] + ' | ' + 'http://www.crunchbase.com/person/' + i['crunchbase_slug']
|
||||||
|
|
||||||
|
if(len(temp['companies']) > 0):
|
||||||
|
|
||||||
|
print '-' * 10
|
||||||
|
print "Companies: "
|
||||||
|
|
||||||
|
for i in temp['companies']:
|
||||||
|
print i
|
||||||
|
|
||||||
|
print '-' * 10
|
||||||
|
|
||||||
|
#-------------------------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
if(len(sys.argv) < 2): print "Error more arguments needed"
|
||||||
|
|
||||||
|
query=sys.argv[1]
|
||||||
|
server = 'local'
|
||||||
|
|
||||||
|
try:
|
||||||
|
server = sys.argv[2]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
search_client(query, server)
|
||||||
Reference in New Issue
Block a user