mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-04-24 03:45:38 +08:00
157 lines
4.1 KiB
Python
157 lines
4.1 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Search
|
|
~~~~~
|
|
|
|
copyright: (c) 2014 by Halfmoon Labs, Inc.
|
|
copyright: (c) 2015 by Blockstack.org
|
|
|
|
This file is part of Search.
|
|
|
|
Search is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Search is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Search. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
""" functions for building the ES/lucene search index and mappings
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
from pyes import *
|
|
conn = ES()
|
|
|
|
from pymongo import MongoClient
|
|
c = MongoClient()
|
|
|
|
INPUT_OPTIONS = '--create_index --search'
|
|
|
|
from config import BULK_INSERT_LIMIT
|
|
from common import log
|
|
|
|
|
|
def create_mapping(index_name, index_type):
|
|
""" create lucene index and add/specify document type
|
|
"""
|
|
|
|
try:
|
|
# delete the old mapping, if exists
|
|
conn.indices.delete_index(index_name)
|
|
except:
|
|
pass
|
|
|
|
conn.indices.create_index(index_name)
|
|
|
|
mapping = {u'profile_bio': {'boost': 3.0,
|
|
'index': 'analyzed',
|
|
'store': 'yes',
|
|
'type': u'string',
|
|
'term_vector': 'with_positions_offsets'}}
|
|
|
|
conn.indices.put_mapping(index_type, {'properties': mapping}, [index_name])
|
|
|
|
|
|
def create_people_index():
|
|
""" create a lucene index from exisitng user data in mongodb
|
|
"""
|
|
|
|
create_mapping("onename_people_index", "onename_profiles")
|
|
conn.default_indices = ["onename_people_index"]
|
|
|
|
from pymongo import MongoClient
|
|
from bson import json_util
|
|
import json
|
|
|
|
mc = MongoClient()
|
|
db = mc['search_db']
|
|
|
|
counter = 0
|
|
profile_bio = ''
|
|
|
|
for profile in db.profiles.find():
|
|
profile_data = profile['profile']
|
|
if type(profile_data) is dict:
|
|
profile_bio = profile_data.get('bio', None)
|
|
|
|
if profile_bio:
|
|
try:
|
|
res = conn.index({'profile_bio': profile_bio,
|
|
'username': profile['username'],
|
|
'_boost': 3,
|
|
},
|
|
"onename_people_index", "onename_profiles",
|
|
bulk=True)
|
|
|
|
counter += 1
|
|
|
|
except Exception as e:
|
|
pass
|
|
# print e
|
|
|
|
if(counter % BULK_INSERT_LIMIT == 0):
|
|
print '-' * 5
|
|
print 'items indexed so far:' + str(counter)
|
|
print '-' * 5
|
|
|
|
conn.indices.refresh(["onename_people_index"])
|
|
conn.indices.flush()
|
|
|
|
|
|
def test_query(query, index=['onename_people_index']):
|
|
|
|
q = QueryStringQuery(query,
|
|
search_fields=['profile_bio', 'username'],
|
|
default_operator='and')
|
|
|
|
count = conn.count(query=q)
|
|
count = count.count
|
|
|
|
if(count == 0):
|
|
q = QueryStringQuery(query,
|
|
search_fields=['profile_bio', 'username'],
|
|
default_operator='or')
|
|
|
|
# q = TermQuery("profile_bio",query)
|
|
results = conn.search(query=q, size=20, indices=index)
|
|
|
|
counter = 0
|
|
|
|
results_list = []
|
|
|
|
for i in results:
|
|
counter += 1
|
|
print 'username: ' + i['username']
|
|
print 'bio: ' + i['profile_bio']
|
|
|
|
print results_list
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
|
|
if(len(sys.argv) < 2):
|
|
print "Usage error"
|
|
|
|
option = sys.argv[1]
|
|
|
|
if(option == '--create_index'):
|
|
create_people_index()
|
|
elif(option == '--search_bio'):
|
|
test_query(query=sys.argv[2])
|
|
else:
|
|
print "Usage error"
|
|
|
|
except Exception as e:
|
|
print e
|