merge in search updates from master

2026-06-02 14:18:32 +08:00 · 2017-08-23 15:22:08 -04:00
parent 73c39b0bb4 8e867a1bee
commit 9fc76c2be4
7 changed files with 232 additions and 165 deletions
--- a/api/search/basic_index.py
+++ b/api/search/basic_index.py
@@ -48,10 +48,8 @@ def fetch_profile_data_from_file():
    """ takes profile data from file and saves in the profile_data DB
    """

-    profile_data_file = open(SEARCH_PROFILE_DATA_FILE, 'r')
-
-    profiles = profile_data_file.read()
-    profiles = json.loads(profiles)
+    with open(SEARCH_PROFILE_DATA_FILE, 'r') as fin:
+        profiles = json.load(fin)

    counter = 0

@@ -59,7 +57,6 @@ def fetch_profile_data_from_file():
    log.debug("Fetching profile data from file")

    for entry in profiles:
-
        new_entry = {}
        new_entry['key'] = entry['fqu']
        new_entry['value'] = entry['profile']
@@ -67,16 +64,14 @@ def fetch_profile_data_from_file():
        try:
            profile_data.save(new_entry)
        except Exception as e:
-            print e
-            print "Exception on entry {}".format(new_entry)
+            log.exception(e)
+            log.error("Exception on entry {}".format(new_entry))

        counter += 1

        if counter % 1000 == 0:
            log.debug("Processed entries: %s" % counter)

-    profile_data_file.close()
-
    profile_data.ensure_index('key')

    return
@@ -160,7 +155,6 @@ def create_search_index():
    log.debug("Creating search index")

    for user in namespace.find():
-
        # the profile/info to be inserted
        search_profile = {}

@@ -179,19 +173,17 @@ def create_search_index():


        hasBazaarId=False
-        # search for openbazaar id in the profile        
+        # search for openbazaar id in the profile
        if 'account' in profile:
            for accounts in profile['account']:
                if  accounts['service'] == 'openbazaar':
                   hasBazaarId = True
                   search_profile['openbazaar']=accounts['identifier']
-                  # pretty_print(search_profile['openbazaar'])  

        if (hasBazaarId == False):
            search_profile['openbazaar'] = None
-        
-        if 'name' in profile:

+        if 'name' in profile:
            try:
                name = profile['name']
            except:
@@ -201,7 +193,6 @@ def create_search_index():
                name = name['formatted'].lower()
            except:
                name = name.lower()
-
            people_names.append(name)
            search_profile['name'] = name

@@ -209,7 +200,6 @@ def create_search_index():
            search_profile['name'] = None

        if 'twitter' in profile:
-        
            twitter_handle = profile['twitter']

            try:
@@ -232,9 +222,7 @@ def create_search_index():
        search_profile['profile'] = profile
        search_profiles.save(search_profile)

-        

-           
    # dedup names
    people_names = list(set(people_names))
    people_names = {'name': people_names}
@@ -246,9 +234,6 @@ def create_search_index():
    usernames = {'username': usernames}

    # save final dedup results to mongodb (using it as a cache)
-
-    
-    
    people_cache.save(people_names)
    twitter_cache.save(twitter_handles)
    username_cache.save(usernames)
@@ -257,7 +242,6 @@ def create_search_index():

    log.debug('Created name/twitter/username search index')

-        
 if __name__ == "__main__":

    if(len(sys.argv) < 2):
--- a/api/search/fetch_data.py
+++ b/api/search/fetch_data.py
@@ -35,7 +35,7 @@ from api.config import (
 from .utils import validUsername
 from .utils import get_json, config_log

-from blockstack_client  import proxy
+from blockstack_client import proxy, subdomains
 from blockstack_client.profile import get_profile
 from api.utils import profile_log
 import logging
@@ -49,8 +49,13 @@ def fetch_namespace():
    """
    resp = proxy.get_all_names()

+    subdomaindb = subdomains.SubdomainDB()
+    subdomain_names = subdomaindb.get_all_subdomains()
+
+    all_names = list(resp) + list(subdomain_names)
+
    with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout:
-        fout.write(json.dumps(resp))
+        fout.write(json.dumps(all_names))

 def print_status_bar(filled, total):
    pct = float(filled) / total
@@ -67,6 +72,7 @@ def update_profiles():

    last_block_processed = search_indexer_info['last_block_height']
    last_full_index = search_indexer_info['last_full_index']
+    last_subdomain_seq = search_indexer_info['last_subdomain_seq']

    info_resp = proxy.getinfo()
    try:
@@ -75,28 +81,40 @@ def update_profiles():
        print info_resp
        raise

+
+    with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin:
+        existing_names = set(json.load(fin))
+
    if last_block_processed - 1 > new_block_height:
        return {'status' : True, 'message' : 'No new blocks since last indexing'}

+    subdomaindb = subdomains.SubdomainDB()
+    subdomain_names = [ name for name in
+                        subdomaindb.get_all_subdomains(above_seq = last_subdomain_seq)
+                        if name not in existing_names ]
+    last_subdomain_seq = subdomaindb.get_last_index()
+
    # aaron: note, sometimes it may take a little while for
    #  new zonefiles to have propagated to the network, so
    #  we over-fetch a little bit
    zonefiles_resp = proxy.get_zonefiles_by_block(
        last_block_processed - 1, new_block_height)
    zonefiles_updated = zonefiles_resp['zonefile_info']
-    names_updated = set(
-        [ zf_info['name'] for zf_info in zonefiles_updated
-          if 'name' in zf_info ])
+    names_updated = [ zf_info['name'] for zf_info in zonefiles_updated
+                      if 'name' in zf_info ]
+    names_updated += subdomain_names
+    names_to_insert = set([ name for name in names_updated if name not in existing_names ])
+
    updated_profiles = {}
    actually_updated_names = set()
    print "Updating {} entries...".format(len(names_updated))
-    for ix, name in enumerate(names_updated):
-        print_status_bar(ix+1, len(names_updated))
+    for ix, name in enumerate(names_to_insert):
+        print_status_bar(ix+1, len(names_to_insert))
        profile_entry = {}
        profile_entry['fqu'] = name
-
        try:
-            profile_entry['profile'] = get_profile(name, use_legacy = True)['profile']
+            profile_resp = get_profile(name, use_legacy = True)
+            profile_entry['profile'] = profile_resp['profile']
            updated_profiles[name] = (profile_entry)
            actually_updated_names.add(name)
        except KeyboardInterrupt as e:
@@ -105,13 +123,17 @@ def update_profiles():
            import traceback as tb; tb.print_exc()

    names_updated = actually_updated_names
+
+    if len(names_updated) == 0:
+        return {'status' : True, 'message' : 'No new profiles'}
+
    with open(SEARCH_PROFILE_DATA_FILE, 'r') as fin:
        all_profiles = json.load(fin)
-    to_remove = []
-    for ix, profile in enumerate(all_profiles):
-        if profile['fqu'] in names_updated:
-            all_profiles[ix] = updated_profiles[profile['fqu']]
+    existing_names = list(existing_names)

+    for name_to_add in names_updated:
+        all_profiles.append(updated_profiles[name_to_add])
+        existing_names.append(name_to_add)

    if not obtain_lockfile():
        return {'error' : 'Could not obtain lockfile, abandoning my update.'}
@@ -120,10 +142,13 @@ def update_profiles():
    if search_indexer_info['last_full_index'] != last_full_index:
        return {'error' : 'Full re-index written during our update. Abandoning'}

+    with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'w') as fout:
+        json.dump(existing_names, fout)
    with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout:
        json.dump(all_profiles, fout)
    with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout:
        search_indexer_info['last_block_height'] = new_block_height
+        search_indexer_info['last_subdomain_seq'] = last_subdomain_seq
        json.dump(search_indexer_info, fout)

    return {'status' : True, 'message' : 'Indexed {} profiles'.format(len(names_updated))}
@@ -138,7 +163,7 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False):
    """

    with open(SEARCH_BLOCKCHAIN_DATA_FILE, 'r') as fin:
-        all_names = json.load(file)
+        all_names = json.load(fin)

    info_resp = proxy.getinfo()
    last_block_processed = info_resp['last_block_processed']
@@ -177,12 +202,16 @@ def fetch_profiles(max_to_fetch = None, just_test_set = False):
            print "ERROR! Could not obtain lockfile"
            return

+    subdomaindb = subdomains.SubdomainDB()
+    last_subdomain_seq = subdomaindb.get_last_index()
+
    with open(SEARCH_PROFILE_DATA_FILE, 'w') as fout:
        json.dump(all_profiles, fout)
    with open(SEARCH_LAST_INDEX_DATA_FILE, 'w') as fout:
        search_index_data = {
            'last_block_height' : last_block_processed,
-            'last_full_index' : datetime.now().isoformat()
+            'last_full_index' : datetime.now().isoformat(),
+            'last_subdomain_seq' : last_subdomain_seq
        }
        json.dump(search_index_data, fout)

--- a/api/search/server.py
+++ b/api/search/server.py
@@ -1,26 +1,26 @@
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 """
-	Search
-	~~~~~
+Search
+~~~~~

-	copyright: (c) 2014-2016 by Halfmoon Labs, Inc.
-	copyright: (c) 2016 by Blockstack.org
+copyright: (c) 2014-2016 by Halfmoon Labs, Inc.
+copyright: (c) 2016 by Blockstack.org

 This file is part of Search.

-	Search is free software: you can redistribute it and/or modify
-	it under the terms of the GNU General Public License as published by
-	the Free Software Foundation, either version 3 of the License, or
-	(at your option) any later version.
+Search is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.

-	Search is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty of
-	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-	GNU General Public License for more details.
+Search is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.

-	You should have received a copy of the GNU General Public License
-	along with Search. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with Search. If not, see <http://www.gnu.org/licenses/>.
 """

 import sys
@@ -48,67 +48,67 @@ from api.utils import get_mc_client
 mc = get_mc_client()

 class QueryThread(threading.Thread):
-	""" for performing multi-threaded search on three search sub-systems
-	"""
-	def __init__(self, query, query_type, limit_results):
-		threading.Thread.__init__(self)
-		self.query = query
-		self.query_type = query_type
-		self.results = []
-		self.limit_results = limit_results
-		self.found_exact_match = False
+    """ for performing multi-threaded search on three search sub-systems
+    """
+    def __init__(self, query, query_type, limit_results):
+        threading.Thread.__init__(self)
+        self.query = query
+        self.query_type = query_type
+        self.results = []
+        self.limit_results = limit_results
+        self.found_exact_match = False

-	def run(self):
-		if(self.query_type == 'people_search'):
-			self.results = query_people_database(self.query, self.limit_results)
-		elif(self.query_type == 'twitter_search'):
-			self.results = query_twitter_database(self.query, self.limit_results)
-		elif(self.query_type == 'username_search'):
-			self.results = query_username_database(self.query, self.limit_results)
-			#self.found_exact_match, self.results = query_company_database(self.query)
-		if(self.query_type == 'lucene_search'):
-			self.results = query_lucene_index(self.query, self.limit_results)
+    def run(self):
+        if(self.query_type == 'people_search'):
+            self.results = query_people_database(self.query, self.limit_results)
+        elif(self.query_type == 'twitter_search'):
+            self.results = query_twitter_database(self.query, self.limit_results)
+        elif(self.query_type == 'username_search'):
+            self.results = query_username_database(self.query, self.limit_results)
+            #self.found_exact_match, self.results = query_company_database(self.query)
+        if(self.query_type == 'lucene_search'):
+            self.results = query_lucene_index(self.query, self.limit_results)


 def error_reply(msg, code=-1):
-	reply = {}
-	reply['status'] = code
-	reply['message'] = "ERROR: " + msg
-	return jsonify(reply)
+    reply = {}
+    reply['status'] = code
+    reply['message'] = "ERROR: " + msg
+    return jsonify(reply)


 def query_people_database(query, limit_results=DEFAULT_LIMIT):

-	name_search_results = search_people_by_name(query, limit_results)
-	return fetch_profiles(name_search_results, search_type="name")
+    name_search_results = search_people_by_name(query, limit_results)
+    return fetch_profiles(name_search_results, search_type="name")


 def query_twitter_database(query, limit_results=DEFAULT_LIMIT):

-	twitter_search_results = search_people_by_twitter(query, limit_results)
-	return fetch_profiles(twitter_search_results, search_type="twitter")
+    twitter_search_results = search_people_by_twitter(query, limit_results)
+    return fetch_profiles(twitter_search_results, search_type="twitter")


 def query_username_database(query, limit_results=DEFAULT_LIMIT):

-	username_search_results = search_people_by_username(query, limit_results)
-	return fetch_profiles(username_search_results, search_type="username")
+    username_search_results = search_people_by_username(query, limit_results)
+    return fetch_profiles(username_search_results, search_type="username")


 def query_lucene_index(query, index, limit_results=DEFAULT_LIMIT):

-	username_search_results = search_people_by_bio(query, limit_results)
-	return fetch_profiles(username_search_results, search_type="username")
+    username_search_results = search_people_by_bio(query, limit_results)
+    return fetch_profiles(username_search_results, search_type="username")


 def test_alphanumeric(query):
-	""" check if query has only alphanumeric characters or not
-	"""
+    """ check if query has only alphanumeric characters or not
+    """

-	import re
-	valid = re.match(r'^\w+[\s\w]*$', query) is not None
+    import re
+    valid = re.match(r'^\w+[\s\w]*$', query) is not None

-	return True
+    return True


@searcher.route('/search', methods = ["GET", "POST"], strict_slashes = False)
@@ -116,110 +116,110 @@ def test_alphanumeric(query):
@cache_control(MEMCACHED_TIMEOUT)
 def search_by_name():

-	query = request.args.get('query')
+    query = request.args.get('query')

-	results_people = []
+    results_people = []

-	if query is None:
-		return error_reply("No query given")
-	elif query == '' or query == ' ':
-		return json.dumps({})
+    if query is None:
+        return error_reply("No query given")
+    elif query == '' or query == ' ':
+        return json.dumps({})

-	if MEMCACHED_ENABLED:
+    if MEMCACHED_ENABLED:

-		cache_key = str('search_cache_' + query.lower())
-		cache_reply = mc.get(cache_key)
+        cache_key = str('search_cache_' + query.lower())
+        cache_reply = mc.get(cache_key)

-		# if a cache hit, respond straight away
-		if(cache_reply is not None):
-			return jsonify(cache_reply)
+        # if a cache hit, respond straight away
+        if(cache_reply is not None):
+            return jsonify(cache_reply)

-	new_limit = DEFAULT_LIMIT
+    new_limit = DEFAULT_LIMIT

-	try:
-		new_limit = int(request.values['limit_results'])
-	except:
-		pass
+    try:
+        new_limit = int(request.values['limit_results'])
+    except:
+        pass

-	if validProofQuery(query):
-		return search_proofs_index(query)
+    if validProofQuery(query):
+        return search_proofs_index(query)

-	elif test_alphanumeric(query) is False:
-		pass
+    elif test_alphanumeric(query) is False:
+        pass

-	else:
+    else:

-		threads = []
+        threads = []

-		t1 = QueryThread(query, 'username_search', new_limit)
-		t2 = QueryThread(query, 'twitter_search', new_limit)
-		t3 = QueryThread(query, 'people_search', new_limit)
+        t1 = QueryThread(query, 'username_search', new_limit)
+        t2 = QueryThread(query, 'twitter_search', new_limit)
+        t3 = QueryThread(query, 'people_search', new_limit)

-		if LUCENE_ENABLED:
-			t4 = QueryThread(query, 'lucene_search', new_limit)
+        if LUCENE_ENABLED:
+            t4 = QueryThread(query, 'lucene_search', new_limit)

-		threads.append(t1)
-		threads.append(t2)
-		threads.append(t3)
+        threads.append(t1)
+        threads.append(t2)
+        threads.append(t3)

-		if LUCENE_ENABLED:
-			threads.append(t4)
+        if LUCENE_ENABLED:
+            threads.append(t4)

-		# start all threads
-		[x.start() for x in threads]
+        # start all threads
+        [x.start() for x in threads]

-		# wait for all of them to finish
-		[x.join() for x in threads]
+        # wait for all of them to finish
+        [x.join() for x in threads]

-		# at this point all threads have finished and all queries have been performed
+        # at this point all threads have finished and all queries have been performed

-		results_username = t1.results
-		results_twitter = t2.results
-		results_people = t3.results
+        results_username = t1.results
+        results_twitter = t2.results
+        results_people = t3.results

-		if LUCENE_ENABLED:
-			results_bio = t4.results
+        if LUCENE_ENABLED:
+            results_bio = t4.results

-		results_people += results_username + results_twitter
-		if LUCENE_ENABLED:
-			results_people += results_bio
+        results_people += results_username + results_twitter
+        if LUCENE_ENABLED:
+            results_people += results_bio

-		# dedup all results before sending out
-		from substring_search import dedup_search_results
-		results_people = dedup_search_results(results_people)
+        # dedup all results before sending out
+        from substring_search import dedup_search_results
+        results_people = dedup_search_results(results_people)

-	results = {}
-	results['results'] = results_people[:new_limit]
+    results = {}
+    results['results'] = results_people[:new_limit]

-	if MEMCACHED_ENABLED:
-		mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
+    if MEMCACHED_ENABLED:
+        mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))

-	return jsonify(results)
+    return jsonify(results)


 def search_proofs_index(query):

-	results = {}
+    results = {}

-	query = request.args.get('query')
+    query = request.args.get('query')

-	if query is None:
-		return error_reply("No query given")
-	elif query == '' or query == ' ':
-		return json.dumps({})
+    if query is None:
+        return error_reply("No query given")
+    elif query == '' or query == ' ':
+        return json.dumps({})

-	if MEMCACHED_ENABLED:
+    if MEMCACHED_ENABLED:

-		cache_key = str('search_cache_' + query.lower())
-		cache_reply = mc.get(cache_key)
+        cache_key = str('search_cache_' + query.lower())
+        cache_reply = mc.get(cache_key)

-		# if a cache hit, respond straight away
-		if(cache_reply is not None):
-			return jsonify(cache_reply)
+        # if a cache hit, respond straight away
+        if(cache_reply is not None):
+            return jsonify(cache_reply)

-	results['results'] = search_proofs(query)
+    results['results'] = search_proofs(query)

-	if MEMCACHED_ENABLED:
-		mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))
+    if MEMCACHED_ENABLED:
+        mc.set(cache_key, results, int(time() + MEMCACHED_TIMEOUT))

-	return jsonify(results)
+    return jsonify(results)
--- a/api/search/utils.py
+++ b/api/search/utils.py
@@ -99,4 +99,8 @@ def validUsername(username):
    if a.match(username):
        return True
    else:
+        parts = username.split(".")
+        if len(parts) == 2:
+            if a.match(parts[0]) and a.match(parts[1]):
+                return True
        return False
--- a/api/tests/search_tests.py
+++ b/api/tests/search_tests.py
@@ -50,7 +50,7 @@ class SearchTestCase(unittest.TestCase):
            self.assertIsNotNone(entry['username'])
            if entry['username'] not in self.test_users:
                continue
-            self.assertIsNotNone(entry['profile'], 
+            self.assertIsNotNone(entry['profile'],
                                 msg="Error in fetching profile of entry: {}".format(entry))

    def test_find_user(self):
@@ -60,3 +60,10 @@ class SearchTestCase(unittest.TestCase):
            self.assertIn(u, data['results'][0]['username'])
            self.assertIn("profile", data['results'][0].keys())

+    def test_find_subdomain(self):
+        for u in ["Thomas Hobbes"]:
+            data = self.do_search(u)
+            self.assertTrue(len(data['results']) > 0)
+
+if __name__ == "__main__":
+    unittest.main()
--- a/blockstack_client/profile.py
+++ b/blockstack_client/profile.py
@@ -36,7 +36,7 @@ from .proxy import (
    json_is_error, get_name_blockchain_history, get_name_blockchain_record,
    get_default_proxy)

-from blockstack_client import storage
+from blockstack_client import storage, subdomains
 from blockstack_client import user as user_db

 from .logger import get_logger
@@ -205,13 +205,13 @@ def get_profile(name, zonefile_storage_drivers=None, profile_storage_drivers=Non
    and then loading the profile from that zonefile's public key.

    Notes on backwards compatibility (activated if use_legacy=True and use_legacy_zonefile=True):
-    
+
    * (use_legacy=True) If the user's zonefile is really a legacy profile from Onename, then
    the profile returned will be the converted legacy profile.  The returned zonefile will still
    be a legacy profile, however.
    The caller can check this and perform the conversion automatically.

-    * (use_legacy_zonefile=True) If the name points to a current zonefile that does not have a 
+    * (use_legacy_zonefile=True) If the name points to a current zonefile that does not have a
    data public key, then the owner address of the name will be used to verify
    the profile's authenticity.

@@ -224,6 +224,15 @@ def get_profile(name, zonefile_storage_drivers=None, profile_storage_drivers=Non

    proxy = get_default_proxy() if proxy is None else proxy

+    res = subdomains.is_address_subdomain(str(name))
+    if res:
+        subdomain, domain = res[1]
+        try:
+            return subdomains.resolve_subdomain(subdomain, domain)
+        except subdomains.SubdomainNotFound as e:
+            log.exception(e)
+            return {'error' : "Failed to find name {}.{}".format(subdomain, domain)}
+
    raw_zonefile = None
    if user_zonefile is None:
        user_zonefile = get_name_zonefile(
--- a/blockstack_client/subdomains.py
+++ b/blockstack_client/subdomains.py
@@ -30,7 +30,7 @@ import virtualchain
 from multiprocessing import Pool

 from itertools import izip
-from blockstack_client import data, storage, config, proxy, schemas, constants
+from blockstack_client import storage, config, proxy, schemas, constants
 from blockstack_client import zonefile as bs_zonefile
 from blockstack_client import user as user_db
 from blockstack_client.backend import safety
@@ -178,6 +178,22 @@ class SubdomainDB(object):

        return Subdomain(domain_name, subdomain_name, str(encoded_pubkey), int(n), str(zonefile_str), sig, txid)

+    def get_all_subdomains(self, above_seq = None):
+        if above_seq:
+            get_cmd = "SELECT fully_qualified_subdomain FROM {} WHERE sequence >= ?"
+        else:
+            get_cmd = "SELECT fully_qualified_subdomain FROM {}"
+        get_cmd = get_cmd.format(self.subdomain_table)
+        cursor = self.conn.cursor()
+        if above_seq:
+            cursor.execute(get_cmd, (above_seq,))
+        else:
+            cursor.execute(get_cmd)
+        try:
+            return [ x[0] for x in cursor.fetchall() ]
+        except:
+            return []
+
    def get_subdomains_owned_by_address(self, owner):
        get_cmd = "SELECT fully_qualified_subdomain FROM {} WHERE owner = ?".format(
            self.subdomain_table)
@@ -308,18 +324,34 @@ class SubdomainDB(object):
            return 0
        return int(last_block)

+    def get_last_index(self):
+        """
+        Returns the last sequence number for the subdomain DB.
+        WARNING: this is specific to *this* instance, and *this* DB,
+        if you use this, it should *only* be as an optimization.
+        """
+        get_cmd = """SELECT sequence FROM {} ORDER BY sequence DESC LIMIT 1""".format(
+            self.subdomain_table)
+        cursor = self.conn.cursor()
+        cursor.execute(get_cmd)
+        try:
+            last_seq = cursor.fetchone()[0]
+        except:
+            return 0
+        return int(last_seq)
+
    def _drop_tables(self):
        drop_cmd = "DROP TABLE IF EXISTS {};"
        cursor = self.conn.cursor()
        cursor.execute(drop_cmd.format(self.subdomain_table))
-        cursor.execute(drop_cmd.format(self.status_table)) 
+        cursor.execute(drop_cmd.format(self.status_table))

    def _create_tables(self):
        create_cmd = """CREATE TABLE IF NOT EXISTS {} (
        fully_qualified_subdomain TEXT PRIMARY KEY,
        sequence INTEGER,
        owner TEXT,
-        zonefile TEXT, 
+        zonefile TEXT,
        signature TEXT,
        last_txid TEXT);
        """.format(self.subdomain_table)
@@ -344,7 +376,8 @@ def parse_zonefile_subdomains(domain, zonefile_json):

 def is_address_subdomain(fqa):
    """
-    Tests whether fqa is a subdomain. 
+    Tests whether fqa is a subdomain.
+    @fqa must be a string
    If it isn't, returns False.
    If it is, returns True and a tuple (subdomain_name, domain)
    """
@@ -448,6 +481,7 @@ def add_subdomains(subdomains, domain_fqa):

 def get_subdomain_info(subdomain, domain_fqa, use_cache = True):
    if not use_cache:
+        from blockstack_client import data
        zonefiles = data.list_zonefile_history(domain_fqa)
        subdomain_db = _build_subdomain_db([domain_fqa for z in zonefiles], zonefiles)
    else: