add fixture data for the search indexer container

This commit is contained in:
Aaron Blankstein
2018-01-18 12:33:52 -05:00
parent 68277d28e3
commit eb593bb622
9 changed files with 1665 additions and 184 deletions

View File

@@ -1,3 +1,3 @@
0 12 * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-reindex
0 0 * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-reindex
0,12,24,36,48 * * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-update
0 12 * * * /src/blockstack/api/search/scripts/search_reindex.sh
0 0 * * * /src/blockstack/api/search/scripts/search_reindex.sh
0,12,24,36,48 * * * * /src/blockstack/api/search/scripts/search_update.sh

View File

@@ -42,5 +42,23 @@ services:
volumes:
- "./data/search-api/:/var/blockstack-search"
search-indexer:
build:
context: ../
dockerfile: api/search/Dockerfile
environment:
- BLOCKSTACK_DEBUG=0
- PUBLIC_NODE=True
- DEFAULT_HOST=0.0.0.0
- MONGODB_URI=mongodb://mongodb
networks:
- api
restart: always
links:
- mongodb
- blockstack-api
volumes:
- "./data/search-api/:/var/blockstack-search"
networks:
api: null

View File

@@ -27,9 +27,7 @@ import sys
import json
import requests
from pymongo import MongoClient
from .utils import validUsername
from .utils import validUsername, get_mongo_client
from .utils import get_json, config_log, pretty_print
from api.config import SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE
@@ -118,7 +116,7 @@ def fetch_namespace_from_file():
def flush_db():
client = MongoClient()
client = get_mongo_client()
# delete any old cache/index
client.drop_database('search_db')

View File

@@ -23,9 +23,9 @@ This file is part of Search.
along with Search. If not, see <http://www.gnu.org/licenses/>.
"""
from pymongo import MongoClient
from .utils import get_mongo_client
client = MongoClient()
client = get_mongo_client()
search_db = client['search_db']
search_cache = client['search_cache']

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,156 +0,0 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Search
~~~~~
copyright: (c) 2014 by Halfmoon Labs, Inc.
copyright: (c) 2015 by Blockstack.org
This file is part of Search.
Search is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Search is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Search. If not, see <http://www.gnu.org/licenses/>.
"""
""" functions for building the ES/lucene search index and mappings
"""
import sys
import json
from pyes import *
conn = ES()
from pymongo import MongoClient
c = MongoClient()
INPUT_OPTIONS = '--create_index --search'
from config import BULK_INSERT_LIMIT
from common import log
def create_mapping(index_name, index_type):
""" create lucene index and add/specify document type
"""
try:
# delete the old mapping, if exists
conn.indices.delete_index(index_name)
except:
pass
conn.indices.create_index(index_name)
mapping = {u'profile_bio': {'boost': 3.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
'term_vector': 'with_positions_offsets'}}
conn.indices.put_mapping(index_type, {'properties': mapping}, [index_name])
def create_people_index():
""" create a lucene index from exisitng user data in mongodb
"""
create_mapping("onename_people_index", "onename_profiles")
conn.default_indices = ["onename_people_index"]
from pymongo import MongoClient
from bson import json_util
import json
mc = MongoClient()
db = mc['search_db']
counter = 0
profile_bio = ''
for profile in db.profiles.find():
profile_data = profile['profile']
if type(profile_data) is dict:
profile_bio = profile_data.get('bio', None)
if profile_bio:
try:
res = conn.index({'profile_bio': profile_bio,
'username': profile['username'],
'_boost': 3,
},
"onename_people_index", "onename_profiles",
bulk=True)
counter += 1
except Exception as e:
pass
# print e
if(counter % BULK_INSERT_LIMIT == 0):
print '-' * 5
print 'items indexed so far:' + str(counter)
print '-' * 5
conn.indices.refresh(["onename_people_index"])
conn.indices.flush()
def test_query(query, index=['onename_people_index']):
q = QueryStringQuery(query,
search_fields=['profile_bio', 'username'],
default_operator='and')
count = conn.count(query=q)
count = count.count
if(count == 0):
q = QueryStringQuery(query,
search_fields=['profile_bio', 'username'],
default_operator='or')
# q = TermQuery("profile_bio",query)
results = conn.search(query=q, size=20, indices=index)
counter = 0
results_list = []
for i in results:
counter += 1
print 'username: ' + i['username']
print 'bio: ' + i['profile_bio']
print results_list
if __name__ == "__main__":
try:
if(len(sys.argv) < 2):
print "Usage error"
option = sys.argv[1]
if(option == '--create_index'):
create_people_index()
elif(option == '--search_bio'):
test_query(query=sys.argv[2])
else:
print "Usage error"
except Exception as e:
print e

View File

@@ -62,8 +62,6 @@ class QueryThread(threading.Thread):
elif(self.query_type == 'username_search'):
self.results = query_username_database(self.query, self.limit_results)
#self.found_exact_match, self.results = query_company_database(self.query)
if(self.query_type == 'lucene_search'):
self.results = query_lucene_index(self.query, self.limit_results)
def error_reply(msg, code=-1):
@@ -91,12 +89,6 @@ def query_username_database(query, limit_results=DEFAULT_LIMIT):
return fetch_profiles(username_search_results, search_type="username")
def query_lucene_index(query, index, limit_results=DEFAULT_LIMIT):
username_search_results = search_people_by_bio(query, limit_results)
return fetch_profiles(username_search_results, search_type="username")
def test_alphanumeric(query):
""" check if query has only alphanumeric characters or not
"""
@@ -142,16 +134,10 @@ def search_by_name():
t2 = QueryThread(query, 'twitter_search', new_limit)
t3 = QueryThread(query, 'people_search', new_limit)
if LUCENE_ENABLED:
t4 = QueryThread(query, 'lucene_search', new_limit)
threads.append(t1)
threads.append(t2)
threads.append(t3)
if LUCENE_ENABLED:
threads.append(t4)
# start all threads
[x.start() for x in threads]
@@ -164,12 +150,7 @@ def search_by_name():
results_twitter = t2.results
results_people = t3.results
if LUCENE_ENABLED:
results_bio = t4.results
results_people += results_username + results_twitter
if LUCENE_ENABLED:
results_people += results_bio
# dedup all results before sending out
from substring_search import dedup_search_results

View File

@@ -23,6 +23,8 @@ This file is part of Search.
along with Search. If not, see <http://www.gnu.org/licenses/>.
"""
from pymongo import MongoClient
import json
import re
@@ -78,6 +80,9 @@ def error_reply(msg):
reply['message'] = "ERROR: " + msg
return pretty_dump(reply)
def get_mongo_client():
from api.config import MONGODB_URI
return MongoClient(MONGODB_URI)
def get_json(data):