mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-04-08 22:35:15 +08:00
289 lines
6.8 KiB
Python
289 lines
6.8 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Search
|
|
~~~~~
|
|
|
|
copyright: (c) 2014-2017 by Blockstack Inc.
|
|
copyright: (c) 2017 by Blockstack.org
|
|
|
|
This file is part of Blockstack.
|
|
|
|
Blockstack is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Blockstack is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Blockstack. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import requests
|
|
|
|
from pymongo import MongoClient
|
|
|
|
from .utils import validUsername
|
|
from .utils import get_json, config_log, pretty_print
|
|
|
|
from api.config import SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE
|
|
|
|
from .db import namespace, profile_data
|
|
from .db import search_profiles
|
|
from .db import people_cache, twitter_cache, username_cache
|
|
|
|
""" create the basic index
|
|
"""
|
|
|
|
log = config_log(__name__)
|
|
|
|
|
|
def fetch_profile_data_from_file():
|
|
""" takes profile data from file and saves in the profile_data DB
|
|
"""
|
|
|
|
profile_data_file = open(SEARCH_PROFILE_DATA_FILE, 'r')
|
|
|
|
profiles = profile_data_file.read()
|
|
profiles = json.loads(profiles)
|
|
|
|
counter = 0
|
|
|
|
log.debug("-" * 5)
|
|
log.debug("Fetching profile data from file")
|
|
|
|
for entry in profiles:
|
|
|
|
new_entry = {}
|
|
new_entry['key'] = entry['fqu']
|
|
new_entry['value'] = entry['profile']
|
|
|
|
profile_data.save(new_entry)
|
|
|
|
counter += 1
|
|
|
|
if counter % 1000 == 0:
|
|
log.debug("Processed entries: %s" % counter)
|
|
|
|
profile_data_file.close()
|
|
|
|
profile_data.ensure_index('key')
|
|
|
|
return
|
|
|
|
|
|
def fetch_namespace_from_file():
|
|
|
|
blockchain_file = open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r')
|
|
|
|
blockchain_state = blockchain_file.read()
|
|
blockchain_state = json.loads(blockchain_state)
|
|
|
|
counter = 0
|
|
|
|
log.debug("-" * 5)
|
|
log.debug("Fetching namespace from file")
|
|
|
|
for entry in blockchain_state:
|
|
|
|
new_entry = {}
|
|
|
|
username = entry.rstrip('id')
|
|
username = username.rstrip('.')
|
|
|
|
key = entry
|
|
check_entry = profile_data.find_one({"key": key})
|
|
|
|
if check_entry is None:
|
|
|
|
# profile data not available, skip
|
|
continue
|
|
|
|
new_entry['username'] = username
|
|
new_entry['profile'] = check_entry['value']
|
|
namespace.save(new_entry)
|
|
counter += 1
|
|
|
|
if counter % 1000 == 0:
|
|
log.debug("Processed entries: %s" % counter)
|
|
|
|
blockchain_file.close()
|
|
return
|
|
|
|
|
|
def flush_db():
|
|
|
|
client = MongoClient()
|
|
|
|
# delete any old cache/index
|
|
client.drop_database('search_db')
|
|
client.drop_database('search_cache')
|
|
|
|
log.debug("Flushed DB")
|
|
|
|
|
|
def optimize_db():
|
|
|
|
people_cache.ensure_index('name')
|
|
twitter_cache.ensure_index('twitter_handle')
|
|
username_cache.ensure_index('username')
|
|
|
|
search_profiles.ensure_index('name')
|
|
search_profiles.ensure_index('twitter_handle')
|
|
search_profiles.ensure_index('username')
|
|
|
|
log.debug("Optimized DB")
|
|
|
|
|
|
def create_search_index():
|
|
""" takes people names from blockchain and writes deduped names in a 'cache'
|
|
"""
|
|
|
|
# create people name cache
|
|
counter = 0
|
|
|
|
people_names = []
|
|
twitter_handles = []
|
|
usernames = []
|
|
|
|
log.debug("-" * 5)
|
|
log.debug("Creating search index")
|
|
|
|
for user in namespace.find():
|
|
|
|
# the profile/info to be inserted
|
|
search_profile = {}
|
|
|
|
counter += 1
|
|
|
|
if(counter % 1000 == 0):
|
|
log.debug("Processed entries: %s" % counter)
|
|
|
|
if validUsername(user['username']):
|
|
pass
|
|
else:
|
|
# print "ignoring: " + user['username']
|
|
continue
|
|
|
|
profile = get_json(user['profile'])
|
|
|
|
|
|
hasBazaarId=False
|
|
# search for openbazaar id in the profile
|
|
if 'account' in profile:
|
|
for accounts in profile['account']:
|
|
if accounts['service'] == 'openbazaar':
|
|
hasBazaarId = True
|
|
search_profile['openbazaar']=accounts['identifier']
|
|
# pretty_print(search_profile['openbazaar'])
|
|
|
|
if (hasBazaarId == False):
|
|
search_profile['openbazaar'] = None
|
|
|
|
if 'name' in profile:
|
|
|
|
try:
|
|
name = profile['name']
|
|
except:
|
|
continue
|
|
|
|
try:
|
|
name = name['formatted'].lower()
|
|
except:
|
|
name = name.lower()
|
|
|
|
people_names.append(name)
|
|
search_profile['name'] = name
|
|
|
|
else:
|
|
search_profile['name'] = None
|
|
|
|
if 'twitter' in profile:
|
|
|
|
twitter_handle = profile['twitter']
|
|
|
|
try:
|
|
twitter_handle = twitter_handle['username'].lower()
|
|
except:
|
|
try:
|
|
twitter_handle = profile['twitter'].lower()
|
|
except:
|
|
continue
|
|
|
|
twitter_handles.append(twitter_handle)
|
|
search_profile['twitter_handle'] = twitter_handle
|
|
|
|
else:
|
|
search_profile['twitter_handle'] = None
|
|
|
|
search_profile['username'] = user['username']
|
|
usernames.append(user['username'])
|
|
|
|
search_profile['profile'] = profile
|
|
search_profiles.save(search_profile)
|
|
|
|
|
|
|
|
|
|
# dedup names
|
|
people_names = list(set(people_names))
|
|
people_names = {'name': people_names}
|
|
|
|
twitter_handles = list(set(twitter_handles))
|
|
twitter_handles = {'twitter_handle': twitter_handles}
|
|
|
|
usernames = list(set(usernames))
|
|
usernames = {'username': usernames}
|
|
|
|
# save final dedup results to mongodb (using it as a cache)
|
|
|
|
|
|
|
|
people_cache.save(people_names)
|
|
twitter_cache.save(twitter_handles)
|
|
username_cache.save(usernames)
|
|
|
|
optimize_db()
|
|
|
|
log.debug('Created name/twitter/username search index')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if(len(sys.argv) < 2):
|
|
print "Usage error"
|
|
exit(0)
|
|
|
|
option = sys.argv[1]
|
|
|
|
if(option == '--flush'):
|
|
# Step 0
|
|
flush_db()
|
|
|
|
elif(option == '--create_db'):
|
|
# Step 2
|
|
#fetch_profile_data_from_file()
|
|
fetch_namespace_from_file()
|
|
|
|
elif(option == '--create_index'):
|
|
# Step 3
|
|
create_search_index()
|
|
|
|
elif(option == '--optimize'):
|
|
optimize_db()
|
|
|
|
elif(option == '--refresh'):
|
|
flush_db()
|
|
fetch_profile_data_from_file()
|
|
fetch_namespace_from_file()
|
|
create_search_index()
|
|
|
|
else:
|
|
print "Usage error"
|