Commit
ยท
23cefb2
1
Parent(s):
07df3b7
update app
Browse files
app.py
CHANGED
|
@@ -9,13 +9,14 @@ sys.path.append(str(path_root))
|
|
| 9 |
|
| 10 |
|
| 11 |
encoder_index_map = {
|
| 12 |
-
'uniCOIL': ('UniCoil', 'index-unicoil'),
|
| 13 |
-
'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'index-splade-pp-ed'),
|
| 14 |
-
'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'index-splade-pp-sd')
|
| 15 |
}
|
| 16 |
|
| 17 |
index = 'index-splade-pp-ed'
|
| 18 |
encoder = 'SpladePlusPlusEnsembleDistil'
|
|
|
|
| 19 |
|
| 20 |
st.set_page_config(page_title="Pyserini with ONNX Runtime",
|
| 21 |
page_icon='๐ธ', layout="centered")
|
|
@@ -24,14 +25,33 @@ cola, colb, colc = st.columns([5, 4, 5])
|
|
| 24 |
with colb:
|
| 25 |
st.image("logo.jpeg")
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
colaa, colbb, colcc = st.columns([1, 8, 1])
|
| 28 |
with colbb:
|
| 29 |
encoder = st.select_slider(
|
| 30 |
-
'Select a query encoder
|
| 31 |
options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
|
| 32 |
st.write('Now Running Encoder: ', encoder)
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
col1, col2 = st.columns([9, 1])
|
| 37 |
with col1:
|
|
@@ -41,8 +61,6 @@ with col2:
|
|
| 41 |
st.write('#')
|
| 42 |
button_clicked = st.button("๐")
|
| 43 |
|
| 44 |
-
searcher = LuceneImpactSearcher(
|
| 45 |
-
f'indexes/{index}', f'{encoder}', encoder_type='onnx')
|
| 46 |
|
| 47 |
if search_query or button_clicked:
|
| 48 |
num_results = None
|
|
@@ -55,10 +73,13 @@ if search_query or button_clicked:
|
|
| 55 |
for i, result in enumerate(search_results[:10]):
|
| 56 |
result_score = result.score
|
| 57 |
result_id = result.docid
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
try:
|
| 61 |
st.write(output, unsafe_allow_html=True)
|
|
|
|
|
|
|
| 62 |
|
| 63 |
except:
|
| 64 |
pass
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
encoder_index_map = {
|
| 12 |
+
'uniCOIL': ('UniCoil', 'castorini/unicoil-noexp-msmarco-passage', 'index-unicoil'),
|
| 13 |
+
'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-ed'),
|
| 14 |
+
'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-sd')
|
| 15 |
}
|
| 16 |
|
| 17 |
index = 'index-splade-pp-ed'
|
| 18 |
encoder = 'SpladePlusPlusEnsembleDistil'
|
| 19 |
+
encoder_index = 0
|
| 20 |
|
| 21 |
st.set_page_config(page_title="Pyserini with ONNX Runtime",
|
| 22 |
page_icon='๐ธ', layout="centered")
|
|
|
|
| 25 |
with colb:
|
| 26 |
st.image("logo.jpeg")
|
| 27 |
|
| 28 |
+
|
| 29 |
+
colaa, colbb, colcc = st.columns([1, 8, 1])
|
| 30 |
+
with colbb:
|
| 31 |
+
runtime = st.select_slider(
|
| 32 |
+
'Select a runtime type',
|
| 33 |
+
options=['PyTorch', 'ONNX Runtime'])
|
| 34 |
+
st.write('Now using: ', runtime)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
colaa, colbb, colcc = st.columns([1, 8, 1])
|
| 38 |
with colbb:
|
| 39 |
encoder = st.select_slider(
|
| 40 |
+
'Select a query encoder',
|
| 41 |
options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
|
| 42 |
st.write('Now Running Encoder: ', encoder)
|
| 43 |
|
| 44 |
+
if runtime == 'PyTorch':
|
| 45 |
+
runtime = 'pytorch'
|
| 46 |
+
runtime_index = 1
|
| 47 |
+
else:
|
| 48 |
+
runtime = 'onnx'
|
| 49 |
+
runtime_index = 0
|
| 50 |
+
|
| 51 |
+
encoder, index = encoder_index_map[encoder][runtime_index], encoder_index_map[encoder][2]
|
| 52 |
+
|
| 53 |
+
searcher = LuceneImpactSearcher(
|
| 54 |
+
f'indexes/{index}', f'{encoder}', encoder_type=f'{runtime}')
|
| 55 |
|
| 56 |
col1, col2 = st.columns([9, 1])
|
| 57 |
with col1:
|
|
|
|
| 61 |
st.write('#')
|
| 62 |
button_clicked = st.button("๐")
|
| 63 |
|
|
|
|
|
|
|
| 64 |
|
| 65 |
if search_query or button_clicked:
|
| 66 |
num_results = None
|
|
|
|
| 73 |
for i, result in enumerate(search_results[:10]):
|
| 74 |
result_score = result.score
|
| 75 |
result_id = result.docid
|
| 76 |
+
contents = json.loads(result.raw)["contents"]
|
| 77 |
+
output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
|
| 78 |
|
| 79 |
try:
|
| 80 |
st.write(output, unsafe_allow_html=True)
|
| 81 |
+
st.write(
|
| 82 |
+
f'<div class="row">{contents}</div>', unsafe_allow_html=True)
|
| 83 |
|
| 84 |
except:
|
| 85 |
pass
|
pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc
CHANGED
|
Binary files a/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc and b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc differ
|
|
|
pyserini/search/lucene/_impact_searcher.py
CHANGED
|
@@ -142,7 +142,7 @@ class LuceneImpactSearcher:
|
|
| 142 |
|
| 143 |
encoded_query = self.encode(q)
|
| 144 |
|
| 145 |
-
jquery =
|
| 146 |
if self.encoder_type == 'pytorch':
|
| 147 |
for (token, weight) in encoded_query.items():
|
| 148 |
if token in self.idf and self.idf[token] > self.min_idf:
|
|
|
|
| 142 |
|
| 143 |
encoded_query = self.encode(q)
|
| 144 |
|
| 145 |
+
jquery = JHashMap()
|
| 146 |
if self.encoder_type == 'pytorch':
|
| 147 |
for (token, weight) in encoded_query.items():
|
| 148 |
if token in self.idf and self.idf[token] > self.min_idf:
|