Files
stacks-puppet-node/api/search/basic_index.py

289 lines
6.8 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Search
~~~~~
copyright: (c) 2014-2017 by Blockstack Inc.
copyright: (c) 2017 by Blockstack.org
This file is part of Blockstack.
Blockstack is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Blockstack is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Blockstack. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
import json
import requests
from pymongo import MongoClient
from .utils import validUsername
from .utils import get_json, config_log, pretty_print
from api.config import SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE
from .db import namespace, profile_data
from .db import search_profiles
from .db import people_cache, twitter_cache, username_cache
""" create the basic index
"""
log = config_log(__name__)
def fetch_profile_data_from_file():
""" takes profile data from file and saves in the profile_data DB
"""
profile_data_file = open(SEARCH_PROFILE_DATA_FILE, 'r')
profiles = profile_data_file.read()
profiles = json.loads(profiles)
counter = 0
log.debug("-" * 5)
log.debug("Fetching profile data from file")
for entry in profiles:
new_entry = {}
new_entry['key'] = entry['fqu']
new_entry['value'] = entry['profile']
profile_data.save(new_entry)
counter += 1
if counter % 1000 == 0:
log.debug("Processed entries: %s" % counter)
profile_data_file.close()
profile_data.ensure_index('key')
return
def fetch_namespace_from_file():
blockchain_file = open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r')
blockchain_state = blockchain_file.read()
blockchain_state = json.loads(blockchain_state)
counter = 0
log.debug("-" * 5)
log.debug("Fetching namespace from file")
for entry in blockchain_state:
new_entry = {}
username = entry.rstrip('id')
username = username.rstrip('.')
key = entry
check_entry = profile_data.find_one({"key": key})
if check_entry is None:
# profile data not available, skip
continue
new_entry['username'] = username
new_entry['profile'] = check_entry['value']
namespace.save(new_entry)
counter += 1
if counter % 1000 == 0:
log.debug("Processed entries: %s" % counter)
blockchain_file.close()
return
def flush_db():
client = MongoClient()
# delete any old cache/index
client.drop_database('search_db')
client.drop_database('search_cache')
log.debug("Flushed DB")
def optimize_db():
people_cache.ensure_index('name')
twitter_cache.ensure_index('twitter_handle')
username_cache.ensure_index('username')
search_profiles.ensure_index('name')
search_profiles.ensure_index('twitter_handle')
search_profiles.ensure_index('username')
log.debug("Optimized DB")
def create_search_index():
""" takes people names from blockchain and writes deduped names in a 'cache'
"""
# create people name cache
counter = 0
people_names = []
twitter_handles = []
usernames = []
log.debug("-" * 5)
log.debug("Creating search index")
for user in namespace.find():
# the profile/info to be inserted
search_profile = {}
counter += 1
if(counter % 1000 == 0):
log.debug("Processed entries: %s" % counter)
if validUsername(user['username']):
pass
else:
# print "ignoring: " + user['username']
continue
profile = get_json(user['profile'])
hasBazaarId=False
# search for openbazaar id in the profile
if 'account' in profile:
for accounts in profile['account']:
if accounts['service'] == 'openbazaar':
hasBazaarId = True
search_profile['openbazaar']=accounts['identifier']
# pretty_print(search_profile['openbazaar'])
if (hasBazaarId == False):
search_profile['openbazaar'] = None
if 'name' in profile:
try:
name = profile['name']
except:
continue
try:
name = name['formatted'].lower()
except:
name = name.lower()
people_names.append(name)
search_profile['name'] = name
else:
search_profile['name'] = None
if 'twitter' in profile:
twitter_handle = profile['twitter']
try:
twitter_handle = twitter_handle['username'].lower()
except:
try:
twitter_handle = profile['twitter'].lower()
except:
continue
twitter_handles.append(twitter_handle)
search_profile['twitter_handle'] = twitter_handle
else:
search_profile['twitter_handle'] = None
search_profile['username'] = user['username']
usernames.append(user['username'])
search_profile['profile'] = profile
search_profiles.save(search_profile)
# dedup names
people_names = list(set(people_names))
people_names = {'name': people_names}
twitter_handles = list(set(twitter_handles))
twitter_handles = {'twitter_handle': twitter_handles}
usernames = list(set(usernames))
usernames = {'username': usernames}
# save final dedup results to mongodb (using it as a cache)
people_cache.save(people_names)
twitter_cache.save(twitter_handles)
username_cache.save(usernames)
optimize_db()
log.debug('Created name/twitter/username search index')
if __name__ == "__main__":
if(len(sys.argv) < 2):
print "Usage error"
exit(0)
option = sys.argv[1]
if(option == '--flush'):
# Step 0
flush_db()
elif(option == '--create_db'):
# Step 2
#fetch_profile_data_from_file()
fetch_namespace_from_file()
elif(option == '--create_index'):
# Step 3
create_search_index()
elif(option == '--optimize'):
optimize_db()
elif(option == '--refresh'):
flush_db()
fetch_profile_data_from_file()
fetch_namespace_from_file()
create_search_index()
else:
print "Usage error"