Files
stacks-puppet-node/api/search/fetch_data.py
2018-04-05 17:39:42 -04:00

276 lines
9.2 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Search
~~~~~
copyright: (c) 2014-2017 by Blockstack Inc.
copyright: (c) 2017 by Blockstack.org
This file is part of Blockstack.
Blockstack is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Blockstack is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Blockstack. If not, see <http://www.gnu.org/licenses/>.
"""
import sys, os, time
import tempfile
import json
from datetime import datetime
from api.config import (
SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE,
SEARCH_LAST_INDEX_DATA_FILE, SEARCH_LOCKFILE)
from .utils import validUsername
from .utils import get_json, config_log
import blockstack
from api.utils import profile_log
import logging
log = config_log(__name__)
blockstack_working_dir = blockstack.lib.config.default_working_dir()
blockstack_config = blockstack.lib.load_configuration(blockstack_working_dir)
blockstack_indexer_url = blockstack_config['blockstack-api']['indexer_url']
def fetch_namespace():
"""
Fetch all names in a namespace that should be indexed.
Data is saved in data/ directory
"""
resp = blockstack.lib.client.get_all_names(hostport=blockstack_indexer_url)
subdomain_names = blockstack.lib.subdomains.get_all_subdomains()
all_names = list(resp) + list(subdomain_names)
with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout:
fout.write(json.dumps(all_names))
def print_status_bar(filled, total):
pct = float(filled) / total
bar = max((int(pct * 60) - 1), 0)
out = "\r[%s>%s] %.1f%%" % ( ("=" * bar), " " * (59 - bar), pct * 100)
sys.stdout.write(out)
sys.stdout.flush()
def update_profiles():
if not os.path.exists(SEARCH_LAST_INDEX_DATA_FILE):
return {'error' : 'No last index, you need to rebuild the whole index.'}
with open(SEARCH_LAST_INDEX_DATA_FILE, 'r') as fin:
search_indexer_info = json.load(fin)
last_block_processed = search_indexer_info['last_block_height']
last_full_index = search_indexer_info['last_full_index']
last_subdomain_seq = search_indexer_info['last_subdomain_seq']
info_resp = blockstack.lib.client.getinfo(hostport=blockstack_indexer_url)
try:
new_block_height = info_resp['last_block_processed']
except:
print info_resp
raise
with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin:
existing_names = set(json.load(fin))
if last_block_processed - 1 > new_block_height:
return {'status' : True, 'message' : 'No new blocks since last indexing'}
subdomain_names = [ name for name in
blockstack.lib.subdomains.get_all_subdomains(min_sequence=last_subdomain_seq)
if name not in existing_names ]
last_subdomain_seq = blockstack.lib.subdomains.get_sudomain_last_sequence()
# aaron: note, sometimes it may take a little while for
# new zonefiles to have propagated to the network, so
# we over-fetch a little bit
zonefiles_resp = blockstack.lib.client.get_zonefiles_by_block(
last_block_processed - 1, new_block_height, hostport=blockstack_indexer_url)
zonefiles_updated = zonefiles_resp['zonefile_info']
names_updated = [ zf_info['name'] for zf_info in zonefiles_updated
if 'name' in zf_info ]
names_updated += subdomain_names
names_to_insert = set([ name for name in names_updated if name not in existing_names ])
updated_profiles = {}
actually_updated_names = set()
print "Updating {} entries...".format(len(names_updated))
for ix, name in enumerate(names_to_insert):
print_status_bar(ix+1, len(names_to_insert))
profile_entry = {}
profile_entry['fqu'] = name
try:
profile_resp = blockstack.lib.client.resolve_profile(name, hostport=blockstack_indexer_url)
profile_entry['profile'] = profile_resp['profile']
updated_profiles[name] = (profile_entry)
actually_updated_names.add(name)
except KeyboardInterrupt as e:
raise e
except:
import traceback as tb; tb.print_exc()
names_updated = actually_updated_names
if len(names_updated) == 0:
return {'status' : True, 'message' : 'No new profiles'}
with open(SEARCH_PROFILE_DATA_FILE, 'r') as fin:
all_profiles = json.load(fin)
existing_names = list(existing_names)
for name_to_add in names_updated:
all_profiles.append(updated_profiles[name_to_add])
existing_names.append(name_to_add)
if not obtain_lockfile():
return {'error' : 'Could not obtain lockfile, abandoning my update.'}
with open(SEARCH_LAST_INDEX_DATA_FILE, 'r') as fin:
search_indexer_info = json.load(fin)
if search_indexer_info['last_full_index'] != last_full_index:
return {'error' : 'Full re-index written during our update. Abandoning'}
with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout:
json.dump(existing_names, fout)
with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout:
json.dump(all_profiles, fout)
with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout:
search_indexer_info['last_block_height'] = new_block_height
search_indexer_info['last_subdomain_seq'] = last_subdomain_seq
json.dump(search_indexer_info, fout)
return {'status' : True, 'message' : 'Indexed {} profiles'.format(len(names_updated))}
def fetch_profiles(max_to_fetch = None, just_test_set = False):
"""
Fetch profile data using Blockstack Core and save the data.
Data is saved in: data/profile_data.json
Format of the data is <fqu, profile>
* fqu: fully-qualified name
* profile: json profile data
"""
with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin:
all_names = json.load(fin)
info_resp = blockstack.lib.client.getinfo(hostport=blockstack_indexer_url)
last_block_processed = info_resp['last_block_processed']
all_profiles = []
if max_to_fetch == None:
max_to_fetch = len(all_names)
if just_test_set:
from api.tests.search_tests import SEARCH_TEST_USERS
all_names = ["{}.id".format(u) for u in SEARCH_TEST_USERS]
for ix, fqu in enumerate(all_names):
if ix % 100 == 0:
print_status_bar(ix, max_to_fetch)
if ix >= max_to_fetch:
break
resp = {}
resp['fqu'] = fqu
try:
resp['profile'] = blockstack.lib.client.resolve_profile(
fqu, hostport=blockstack_indexer_url)['profile']
all_profiles.append(resp)
except KeyboardInterrupt as e:
raise e
except:
pass
attempts = 0
while not obtain_lockfile():
attempts += 1
time.sleep(5)
if attempts > 10:
print "ERROR! Could not obtain lockfile"
return
last_subdomain_seq = blockstack.lib.subdomains.get_subdomain_last_sequence()
with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout:
json.dump(all_profiles, fout)
with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout:
search_index_data = {
'last_block_height' : last_block_processed,
'last_full_index' : datetime.now().isoformat(),
'last_subdomain_seq' : last_subdomain_seq
}
json.dump(search_index_data, fout)
def obtain_lockfile():
if os.path.exists(SEARCH_LOCKFILE):
with open(SEARCH_LOCKFILE, 'r') as fin:
pid = json.load(fin)
try:
os.kill(pid, 0)
return False # lockfile exists, pid still running.
except:
pass
# lockfile stale. unlink it
os.unlink(SEARCH_LOCKFILE)
fd, path = tempfile.mkstemp(prefix=".indexer.lock.", dir=os.path.dirname(SEARCH_LOCKFILE))
try:
with os.fdopen(fd, 'w') as fout:
json.dump(os.getpid(), fout)
os.link( path, SEARCH_LOCKFILE )
os.unlink( path )
except:
import traceback as tb; tb.print_exc()
return False
# make sure we got it
with open(SEARCH_LOCKFILE, 'r') as fin:
pid = json.load(fin)
if pid == os.getpid():
return True
print "Wrong pid : {} != {}".format(pid, os.getpid())
return False
if __name__ == "__main__":
if(len(sys.argv) < 2):
print "Usage error"
exit(0)
option = sys.argv[1]
if(option == '--fetch_namespace'):
# Step 1
fetch_namespace()
elif(option == '--fetch_profiles'):
# Step 2
args = {}
if len(sys.argv) > 2:
if sys.argv[2] == '--test':
args['just_test_set'] = True
else:
args['max_to_fetch'] = int(sys.argv[2])
fetch_profiles(**args)
elif(option == '--update_profiles'):
print json.dumps(update_profiles(),
indent = 2)
else:
print "Usage error"