mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-04-08 16:59:35 +08:00
add fixture data for the search indexer container
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
0 12 * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-reindex
|
||||
0 0 * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-reindex
|
||||
0,12,24,36,48 * * * * /usr/bin/docker run -d --network api_api -v /home/ubuntu/blockstack-core/api/data/search-api/:/var/blockstack-search/ api_search-api:latest ./ops search-update
|
||||
0 12 * * * /src/blockstack/api/search/scripts/search_reindex.sh
|
||||
0 0 * * * /src/blockstack/api/search/scripts/search_reindex.sh
|
||||
0,12,24,36,48 * * * * /src/blockstack/api/search/scripts/search_update.sh
|
||||
|
||||
@@ -42,5 +42,23 @@ services:
|
||||
volumes:
|
||||
- "./data/search-api/:/var/blockstack-search"
|
||||
|
||||
search-indexer:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: api/search/Dockerfile
|
||||
environment:
|
||||
- BLOCKSTACK_DEBUG=0
|
||||
- PUBLIC_NODE=True
|
||||
- DEFAULT_HOST=0.0.0.0
|
||||
- MONGODB_URI=mongodb://mongodb
|
||||
networks:
|
||||
- api
|
||||
restart: always
|
||||
links:
|
||||
- mongodb
|
||||
- blockstack-api
|
||||
volumes:
|
||||
- "./data/search-api/:/var/blockstack-search"
|
||||
|
||||
networks:
|
||||
api: null
|
||||
|
||||
@@ -27,9 +27,7 @@ import sys
|
||||
import json
|
||||
import requests
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
from .utils import validUsername
|
||||
from .utils import validUsername, get_mongo_client
|
||||
from .utils import get_json, config_log, pretty_print
|
||||
|
||||
from api.config import SEARCH_BLOCKCHAIN_DATA_FILE, SEARCH_PROFILE_DATA_FILE
|
||||
@@ -118,7 +116,7 @@ def fetch_namespace_from_file():
|
||||
|
||||
def flush_db():
|
||||
|
||||
client = MongoClient()
|
||||
client = get_mongo_client()
|
||||
|
||||
# delete any old cache/index
|
||||
client.drop_database('search_db')
|
||||
|
||||
@@ -23,9 +23,9 @@ This file is part of Search.
|
||||
along with Search. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
from pymongo import MongoClient
|
||||
from .utils import get_mongo_client
|
||||
|
||||
client = MongoClient()
|
||||
client = get_mongo_client()
|
||||
search_db = client['search_db']
|
||||
search_cache = client['search_cache']
|
||||
|
||||
|
||||
1
api/search/fixtures/blockchain_data.json
Normal file
1
api/search/fixtures/blockchain_data.json
Normal file
File diff suppressed because one or more lines are too long
1634
api/search/fixtures/profile_data.slice.json
Normal file
1634
api/search/fixtures/profile_data.slice.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,156 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Search
|
||||
~~~~~
|
||||
|
||||
copyright: (c) 2014 by Halfmoon Labs, Inc.
|
||||
copyright: (c) 2015 by Blockstack.org
|
||||
|
||||
This file is part of Search.
|
||||
|
||||
Search is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Search is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Search. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
""" functions for building the ES/lucene search index and mappings
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pyes import *
|
||||
conn = ES()
|
||||
|
||||
from pymongo import MongoClient
|
||||
c = MongoClient()
|
||||
|
||||
INPUT_OPTIONS = '--create_index --search'
|
||||
|
||||
from config import BULK_INSERT_LIMIT
|
||||
from common import log
|
||||
|
||||
|
||||
def create_mapping(index_name, index_type):
|
||||
""" create lucene index and add/specify document type
|
||||
"""
|
||||
|
||||
try:
|
||||
# delete the old mapping, if exists
|
||||
conn.indices.delete_index(index_name)
|
||||
except:
|
||||
pass
|
||||
|
||||
conn.indices.create_index(index_name)
|
||||
|
||||
mapping = {u'profile_bio': {'boost': 3.0,
|
||||
'index': 'analyzed',
|
||||
'store': 'yes',
|
||||
'type': u'string',
|
||||
'term_vector': 'with_positions_offsets'}}
|
||||
|
||||
conn.indices.put_mapping(index_type, {'properties': mapping}, [index_name])
|
||||
|
||||
|
||||
def create_people_index():
|
||||
""" create a lucene index from exisitng user data in mongodb
|
||||
"""
|
||||
|
||||
create_mapping("onename_people_index", "onename_profiles")
|
||||
conn.default_indices = ["onename_people_index"]
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson import json_util
|
||||
import json
|
||||
|
||||
mc = MongoClient()
|
||||
db = mc['search_db']
|
||||
|
||||
counter = 0
|
||||
profile_bio = ''
|
||||
|
||||
for profile in db.profiles.find():
|
||||
profile_data = profile['profile']
|
||||
if type(profile_data) is dict:
|
||||
profile_bio = profile_data.get('bio', None)
|
||||
|
||||
if profile_bio:
|
||||
try:
|
||||
res = conn.index({'profile_bio': profile_bio,
|
||||
'username': profile['username'],
|
||||
'_boost': 3,
|
||||
},
|
||||
"onename_people_index", "onename_profiles",
|
||||
bulk=True)
|
||||
|
||||
counter += 1
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
# print e
|
||||
|
||||
if(counter % BULK_INSERT_LIMIT == 0):
|
||||
print '-' * 5
|
||||
print 'items indexed so far:' + str(counter)
|
||||
print '-' * 5
|
||||
|
||||
conn.indices.refresh(["onename_people_index"])
|
||||
conn.indices.flush()
|
||||
|
||||
|
||||
def test_query(query, index=['onename_people_index']):
|
||||
|
||||
q = QueryStringQuery(query,
|
||||
search_fields=['profile_bio', 'username'],
|
||||
default_operator='and')
|
||||
|
||||
count = conn.count(query=q)
|
||||
count = count.count
|
||||
|
||||
if(count == 0):
|
||||
q = QueryStringQuery(query,
|
||||
search_fields=['profile_bio', 'username'],
|
||||
default_operator='or')
|
||||
|
||||
# q = TermQuery("profile_bio",query)
|
||||
results = conn.search(query=q, size=20, indices=index)
|
||||
|
||||
counter = 0
|
||||
|
||||
results_list = []
|
||||
|
||||
for i in results:
|
||||
counter += 1
|
||||
print 'username: ' + i['username']
|
||||
print 'bio: ' + i['profile_bio']
|
||||
|
||||
print results_list
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
try:
|
||||
|
||||
if(len(sys.argv) < 2):
|
||||
print "Usage error"
|
||||
|
||||
option = sys.argv[1]
|
||||
|
||||
if(option == '--create_index'):
|
||||
create_people_index()
|
||||
elif(option == '--search_bio'):
|
||||
test_query(query=sys.argv[2])
|
||||
else:
|
||||
print "Usage error"
|
||||
|
||||
except Exception as e:
|
||||
print e
|
||||
@@ -62,8 +62,6 @@ class QueryThread(threading.Thread):
|
||||
elif(self.query_type == 'username_search'):
|
||||
self.results = query_username_database(self.query, self.limit_results)
|
||||
#self.found_exact_match, self.results = query_company_database(self.query)
|
||||
if(self.query_type == 'lucene_search'):
|
||||
self.results = query_lucene_index(self.query, self.limit_results)
|
||||
|
||||
|
||||
def error_reply(msg, code=-1):
|
||||
@@ -91,12 +89,6 @@ def query_username_database(query, limit_results=DEFAULT_LIMIT):
|
||||
return fetch_profiles(username_search_results, search_type="username")
|
||||
|
||||
|
||||
def query_lucene_index(query, index, limit_results=DEFAULT_LIMIT):
|
||||
|
||||
username_search_results = search_people_by_bio(query, limit_results)
|
||||
return fetch_profiles(username_search_results, search_type="username")
|
||||
|
||||
|
||||
def test_alphanumeric(query):
|
||||
""" check if query has only alphanumeric characters or not
|
||||
"""
|
||||
@@ -142,16 +134,10 @@ def search_by_name():
|
||||
t2 = QueryThread(query, 'twitter_search', new_limit)
|
||||
t3 = QueryThread(query, 'people_search', new_limit)
|
||||
|
||||
if LUCENE_ENABLED:
|
||||
t4 = QueryThread(query, 'lucene_search', new_limit)
|
||||
|
||||
threads.append(t1)
|
||||
threads.append(t2)
|
||||
threads.append(t3)
|
||||
|
||||
if LUCENE_ENABLED:
|
||||
threads.append(t4)
|
||||
|
||||
# start all threads
|
||||
[x.start() for x in threads]
|
||||
|
||||
@@ -164,12 +150,7 @@ def search_by_name():
|
||||
results_twitter = t2.results
|
||||
results_people = t3.results
|
||||
|
||||
if LUCENE_ENABLED:
|
||||
results_bio = t4.results
|
||||
|
||||
results_people += results_username + results_twitter
|
||||
if LUCENE_ENABLED:
|
||||
results_people += results_bio
|
||||
|
||||
# dedup all results before sending out
|
||||
from substring_search import dedup_search_results
|
||||
|
||||
@@ -23,6 +23,8 @@ This file is part of Search.
|
||||
along with Search. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -78,6 +80,9 @@ def error_reply(msg):
|
||||
reply['message'] = "ERROR: " + msg
|
||||
return pretty_dump(reply)
|
||||
|
||||
def get_mongo_client():
|
||||
from api.config import MONGODB_URI
|
||||
return MongoClient(MONGODB_URI)
|
||||
|
||||
def get_json(data):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user