From 994156ab70de113fbe8c6f88b76823ab5f421ebc Mon Sep 17 00:00:00 2001 From: Aaron Blankstein Date: Tue, 22 Aug 2017 12:00:06 -0400 Subject: [PATCH 1/8] Update SUSE instructions Per issue #549, the SUSE/openSUSE instructions needed some updating. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 710a14ebf..bb428d33d 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ $ sudo pip2 install blockstack --upgrade ``` For SUSE and openSUSE ``` -$ sudo zypper update && zypper install rng-tools python-devel libffi-devel +$ sudo zypper update && zypper install rng-tools gcc python-devel python2-pip libffi-devel libopenssl-devel $ sudo pip install blockstack --upgrade ``` From 87dd8b0cef0d031f6d730236a22f05815479ed38 Mon Sep 17 00:00:00 2001 From: Aaron Blankstein Date: Tue, 22 Aug 2017 13:36:01 -0400 Subject: [PATCH 2/8] adds --update_profiles flag to fetch_data, to update the profiles which have new zonefiles since last indexing --- api/config.py | 1 + api/search/fetch_data.py | 93 ++++++++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 22 deletions(-) diff --git a/api/config.py b/api/config.py index 124ae7ce1..1d0ad5c9e 100644 --- a/api/config.py +++ b/api/config.py @@ -64,6 +64,7 @@ NEW_NAMES_FILE = os.path.join(CURRENT_DIR, NEW_NAMES_FILENAME) SEARCH_API_ENDPOINT_ENABLED = True SEARCH_BLOCKCHAIN_DATA_FILE = "/var/blockstack-search/blockchain_data.json" SEARCH_PROFILE_DATA_FILE = "/var/blockstack-search/profile_data.json" +SEARCH_LAST_INDEX_DATA_FILE = "/var/blockstack-search/last_indexed.json" SEARCH_BULK_INSERT_LIMIT = 1000 SEARCH_DEFAULT_LIMIT = 50 SEARCH_LUCENE_ENABLED = False diff --git a/api/search/fetch_data.py b/api/search/fetch_data.py index 868e57bb7..ad392b332 100644 --- a/api/search/fetch_data.py +++ b/api/search/fetch_data.py @@ -23,16 +23,17 @@ This file is part of Blockstack. along with Blockstack. If not, see . """ -import sys +import sys, os import json -from api.config import SEARCH_BLOCKCHAIN_DATA_FILE as BLOCKCHAIN_DATA_FILE, \ - SEARCH_PROFILE_DATA_FILE as PROFILE_DATA_FILE +from api.config import ( + SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE, + SEARCH_LAST_INDEX_DATA_FILE) from .utils import validUsername from .utils import get_json, config_log -from blockstack_client.proxy import get_all_names +from blockstack_client import proxy from blockstack_client.profile import get_profile from api.utils import profile_log import logging @@ -45,13 +46,15 @@ def fetch_namespace(): Data is saved in data/ directory """ - resp = get_all_names() + info_resp = proxy.getinfo() + last_block_processed = info_resp['last_block_processed'] - fout = open(BLOCKCHAIN_DATA_FILE, 'w') - fout.write(json.dumps(resp)) - fout.close() + resp = proxy.get_all_names() - return + with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout: + fout.write(json.dumps(resp)) + with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout: + fout.write(json.dumps(last_block_processed)) def print_status_bar(filled, total): pct = float(filled) / total @@ -60,6 +63,57 @@ def print_status_bar(filled, total): sys.stdout.write(out) sys.stdout.flush() +def update_profiles(): + if not os.path.exists(SEARCH_LAST_INDEX_DATA_FILE): + return {'error' : 'No last index, you need to rebuild the whole index.'} + with open(SEARCH_LAST_INDEX_DATA_FILE, 'r') as fin: + last_block_processed = json.load(fin) + + info_resp = proxy.getinfo() + new_block_height = info_resp['last_block_processed'] + + if last_block_processed - 1 > new_block_height: + return {'status' : True, 'message' : 'No new blocks since last indexing'} + + # aaron: note, sometimes it may take a little while for + # new zonefiles to have propagated to the network, so + # we over-fetch a little bit + zonefiles_resp = proxy.get_zonefiles_by_block( + last_block_processed - 1, new_block_height) + zonefiles_updated = zonefiles_resp['zonefile_info'] + names_updated = set( + [ zf_info['name'] for zf_info in zonefiles_updated + if 'name' in zf_info ]) + updated_profiles = {} + actually_updated_names = set() + print "Updating {} entries...".format(len(names_updated)) + for ix, name in enumerate(names_updated): + print_status_bar(ix, len(names_updated)) + profile_entry = {} + profile_entry['fqu'] = name + + try: + profile_entry['profile'] = get_profile(name, use_legacy = True)['profile'] + updated_profiles[name] = (profile_entry) + actually_updated_names.add(name) + except KeyboardInterrupt as e: + raise e + except: + import traceback as tb; tb.print_exc() + + names_updated = actually_updated_names + with open(SEARCH_PROFILE_DATA_FILE, 'r') as fin: + all_profiles = json.load(fin) + to_remove = [] + for ix, profile in enumerate(all_profiles): + if profile['fqu'] in names_updated: + all_profiles[ix] = updated_profiles[profile['fqu']] + + with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout: + json.dump(all_profiles, fout) + + return {'status' : True, 'message' : 'Indexed {} profiles'.format(len(names_updated))} + def fetch_profiles(max_to_fetch = None, just_test_set = False): """ Fetch profile data using Blockstack Core and save the data. @@ -69,14 +123,11 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False): * profile: json profile data """ - fin = open(BLOCKCHAIN_DATA_FILE, 'r') - file = fin.read() - fin.close() - - all_names = json.loads(file) + with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin: + all_names = json.load(file) all_profiles = [] - + if max_to_fetch == None: max_to_fetch = len(all_names) @@ -101,12 +152,8 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False): except: pass - fout = open(PROFILE_DATA_FILE, 'w') - fout.write(json.dumps(all_profiles)) - fout.close() - - return - + with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout: + json.dump(all_profiles, fout) if __name__ == "__main__": @@ -129,6 +176,8 @@ if __name__ == "__main__": else: args['max_to_fetch'] = int(sys.argv[2]) fetch_profiles(**args) - + elif(option == '--update_profiles'): + print json.dumps(update_profiles(), + indent = 2) else: print "Usage error" From 7d9ae1304d552aaeb83f43c08562ac8549e3afad Mon Sep 17 00:00:00 2001 From: Aaron Blankstein Date: Tue, 22 Aug 2017 14:29:50 -0400 Subject: [PATCH 3/8] write last indexed file on updates --- api/search/fetch_data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/api/search/fetch_data.py b/api/search/fetch_data.py index ad392b332..41538e0a1 100644 --- a/api/search/fetch_data.py +++ b/api/search/fetch_data.py @@ -70,7 +70,11 @@ def update_profiles(): last_block_processed = json.load(fin) info_resp = proxy.getinfo() - new_block_height = info_resp['last_block_processed'] + try: + new_block_height = info_resp['last_block_processed'] + except: + print info_resp + raise if last_block_processed - 1 > new_block_height: return {'status' : True, 'message' : 'No new blocks since last indexing'} @@ -111,6 +115,8 @@ def update_profiles(): with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout: json.dump(all_profiles, fout) + with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout: + json.dump(new_block_height, fout) return {'status' : True, 'message' : 'Indexed {} profiles'.format(len(names_updated))} From 69a1139c7ad2c345958d7a90835a95eac2ac3a85 Mon Sep 17 00:00:00 2001 From: Aaron Blankstein Date: Tue, 22 Aug 2017 15:17:29 -0400 Subject: [PATCH 4/8] status bar should fill all the way now --- api/search/fetch_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/search/fetch_data.py b/api/search/fetch_data.py index 41538e0a1..af2afce39 100644 --- a/api/search/fetch_data.py +++ b/api/search/fetch_data.py @@ -92,7 +92,7 @@ def update_profiles(): actually_updated_names = set() print "Updating {} entries...".format(len(names_updated)) for ix, name in enumerate(names_updated): - print_status_bar(ix, len(names_updated)) + print_status_bar(ix+1, len(names_updated)) profile_entry = {} profile_entry['fqu'] = name From 8ef26e58177f9fc816aeb76de734a51149aee2ac Mon Sep 17 00:00:00 2001 From: Valentin Sundermann Date: Tue, 22 Aug 2017 22:01:29 +0200 Subject: [PATCH 5/8] Change path for index template of the API Needed to get moved manually from `/tmp/index.html` to `api/templates/index.html` which can be handled by the script automatically --- build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_docs.sh b/build_docs.sh index bf7ce2c79..71fc57c9d 100755 --- a/build_docs.sh +++ b/build_docs.sh @@ -4,7 +4,7 @@ set -e COMMAND=$1 if [ "$1" = "public_api" ]; then - aglio -i docs/api-specs.md --theme-template docs/aglio_templates/public.jade -o /tmp/index.html + aglio -i docs/api-specs.md --theme-template docs/aglio_templates/public.jade -o api/templates/index.html elif [ "$1" = "core_api" ]; then aglio -i docs/api-specs.md --theme-template docs/aglio_templates/core.jade -o /tmp/index.html elif [ "$1" = "deploy_gh" ]; then From febc7747b95344fd5a42defbd51bbbf3447671c7 Mon Sep 17 00:00:00 2001 From: Valentin Sundermann Date: Tue, 22 Aug 2017 22:04:44 +0200 Subject: [PATCH 6/8] Add instructions for generating index template --- docs/install-api.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/install-api.md b/docs/install-api.md index ac0bc289d..932bc110c 100644 --- a/docs/install-api.md +++ b/docs/install-api.md @@ -11,6 +11,7 @@ Then, setup the API: $ sudo apt-get install -y python-pip memcached rng-tools python-dev libmemcached-dev zlib1g-dev libgmp-dev libffi-dev libssl-dev $ sudo service memcached start $ sudo pip install virtualenv +$ sudo npm -g install aglio $ virtualenv api && source api/bin/activate $ git clone https://github.com/blockstack/blockstack-core.git $ cd blockstack-core/ @@ -19,6 +20,7 @@ $ pip install -r api/requirements.txt $ blockstack setup_wallet $ blockstack api start $ deactivate +$ ./build_docs.sh public_api ``` ### Search Subsystem From 4ec1779b6e60d6370020396f5716d2cc759cce68 Mon Sep 17 00:00:00 2001 From: Valentin Sundermann Date: Tue, 22 Aug 2017 22:08:43 +0200 Subject: [PATCH 7/8] Add explicitely Python to uwsgi config because non-pip versions seems to need that :x --- api/nginx/blockstack_api.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/api/nginx/blockstack_api.ini b/api/nginx/blockstack_api.ini index 65279bd08..e43fb6cfa 100644 --- a/api/nginx/blockstack_api.ini +++ b/api/nginx/blockstack_api.ini @@ -1,4 +1,5 @@ [uwsgi] +plugins = python module = api.server:app master = true processes = 9 From 119728cfb0566c47321b5550be2648f62265261e Mon Sep 17 00:00:00 2001 From: Aaron Blankstein Date: Tue, 22 Aug 2017 16:31:41 -0400 Subject: [PATCH 8/8] add locking so that the full indexer and the updater don't step on one another --- api/config.py | 1 + api/search/fetch_data.py | 74 ++++++++++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/api/config.py b/api/config.py index 1d0ad5c9e..53c5f4e30 100644 --- a/api/config.py +++ b/api/config.py @@ -65,6 +65,7 @@ SEARCH_API_ENDPOINT_ENABLED = True SEARCH_BLOCKCHAIN_DATA_FILE = "/var/blockstack-search/blockchain_data.json" SEARCH_PROFILE_DATA_FILE = "/var/blockstack-search/profile_data.json" SEARCH_LAST_INDEX_DATA_FILE = "/var/blockstack-search/last_indexed.json" +SEARCH_LOCKFILE = "/var/blockstack-search/indexer_lockfile.json" SEARCH_BULK_INSERT_LIMIT = 1000 SEARCH_DEFAULT_LIMIT = 50 SEARCH_LUCENE_ENABLED = False diff --git a/api/search/fetch_data.py b/api/search/fetch_data.py index af2afce39..8fe8be222 100644 --- a/api/search/fetch_data.py +++ b/api/search/fetch_data.py @@ -23,12 +23,14 @@ This file is part of Blockstack. along with Blockstack. If not, see . """ -import sys, os +import sys, os, time +import tempfile import json +from datetime import datetime from api.config import ( SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE, - SEARCH_LAST_INDEX_DATA_FILE) + SEARCH_LAST_INDEX_DATA_FILE, SEARCH_LOCKFILE) from .utils import validUsername from .utils import get_json, config_log @@ -45,16 +47,10 @@ def fetch_namespace(): Fetch all names in a namespace that should be indexed. Data is saved in data/ directory """ - - info_resp = proxy.getinfo() - last_block_processed = info_resp['last_block_processed'] - resp = proxy.get_all_names() with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout: fout.write(json.dumps(resp)) - with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout: - fout.write(json.dumps(last_block_processed)) def print_status_bar(filled, total): pct = float(filled) / total @@ -67,7 +63,10 @@ def update_profiles(): if not os.path.exists(SEARCH_LAST_INDEX_DATA_FILE): return {'error' : 'No last index, you need to rebuild the whole index.'} with open(SEARCH_LAST_INDEX_DATA_FILE, 'r') as fin: - last_block_processed = json.load(fin) + search_indexer_info = json.load(fin) + + last_block_processed = search_indexer_info['last_block_height'] + last_full_index = search_indexer_info['last_full_index'] info_resp = proxy.getinfo() try: @@ -113,10 +112,19 @@ def update_profiles(): if profile['fqu'] in names_updated: all_profiles[ix] = updated_profiles[profile['fqu']] + + if not obtain_lockfile(): + return {'error' : 'Could not obtain lockfile, abandoning my update.'} + with open(SEARCH_LAST_INDEX_DATA_FILE, 'r') as fin: + search_indexer_info = json.load(fin) + if search_indexer_info['last_full_index'] != last_full_index: + return {'error' : 'Full re-index written during our update. Abandoning'} + with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout: json.dump(all_profiles, fout) with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout: - json.dump(new_block_height, fout) + search_indexer_info['last_block_height'] = new_block_height + json.dump(search_indexer_info, fout) return {'status' : True, 'message' : 'Indexed {} profiles'.format(len(names_updated))} @@ -132,6 +140,9 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False): with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin: all_names = json.load(file) + info_resp = proxy.getinfo() + last_block_processed = info_resp['last_block_processed'] + all_profiles = [] if max_to_fetch == None: @@ -158,8 +169,51 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False): except: pass + attempts = 0 + while not obtain_lockfile(): + attempts += 1 + time.sleep(5) + if attempts > 10: + print "ERROR! Could not obtain lockfile" + return + with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout: json.dump(all_profiles, fout) + with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout: + search_index_data = { + 'last_block_height' : last_block_processed, + 'last_full_index' : datetime.now().isoformat() + } + json.dump(search_index_data, fout) + + +def obtain_lockfile(): + if os.path.exists(SEARCH_LOCKFILE): + with open(SEARCH_LOCKFILE, 'r') as fin: + pid = json.load(fin) + try: + os.kill(pid, 0) + return False # lockfile exists, pid still running. + except: + pass + # lockfile stale. unlink it + os.unlink(SEARCH_LOCKFILE) + fd, path = tempfile.mkstemp(prefix=".indexer.lock.", dir=os.path.dirname(SEARCH_LOCKFILE)) + try: + with os.fdopen(fd, 'w') as fout: + json.dump(os.getpid(), fout) + os.link( path, SEARCH_LOCKFILE ) + os.unlink( path ) + except: + import traceback as tb; tb.print_exc() + return False + # make sure we got it + with open(SEARCH_LOCKFILE, 'r') as fin: + pid = json.load(fin) + if pid == os.getpid(): + return True + print "Wrong pid : {} != {}".format(pid, os.getpid()) + return False if __name__ == "__main__":