Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat-add-reranker-tab-0607
#21
by
nan
- opened
app.py
CHANGED
|
@@ -11,10 +11,10 @@ from src.about import (
|
|
| 11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
| 12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
| 13 |
from src.display.css_html_js import custom_css
|
| 14 |
-
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL
|
| 15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
| 17 |
-
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
|
| 18 |
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
|
| 19 |
get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
|
| 20 |
from src.display.gradio_listener import set_listeners
|
|
@@ -108,9 +108,8 @@ with demo:
|
|
| 108 |
show_anonymous = get_anonymous_checkbox()
|
| 109 |
with gr.Row():
|
| 110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
| 111 |
-
|
| 112 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
| 113 |
-
with gr.TabItem("
|
| 114 |
with gr.Row():
|
| 115 |
# search retrieval models
|
| 116 |
with gr.Column():
|
|
@@ -149,17 +148,19 @@ with demo:
|
|
| 149 |
leaderboard_table,
|
| 150 |
queue=True
|
| 151 |
)
|
| 152 |
-
with gr.TabItem("
|
| 153 |
-
with gr.
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 157 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 158 |
-
hidden_lb_db_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 159 |
-
hidden_lb_db_retriever = reset_rank(hidden_lb_db_retriever)
|
| 160 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
| 161 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 162 |
-
|
|
|
|
|
|
|
| 163 |
|
| 164 |
set_listeners(
|
| 165 |
"qa",
|
|
@@ -188,7 +189,48 @@ with demo:
|
|
| 188 |
lb_table_retriever,
|
| 189 |
queue=True
|
| 190 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
| 193 |
with gr.Row():
|
| 194 |
with gr.Column(min_width=320):
|
|
@@ -211,7 +253,7 @@ with demo:
|
|
| 211 |
with gr.Row():
|
| 212 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
| 213 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
| 214 |
-
with gr.TabItem("
|
| 215 |
with gr.Row():
|
| 216 |
with gr.Column():
|
| 217 |
search_bar = get_search_bar()
|
|
@@ -255,11 +297,12 @@ with demo:
|
|
| 255 |
lb_table,
|
| 256 |
queue=True
|
| 257 |
)
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
| 263 |
lb_df_retriever_long_doc = leaderboard_df_long_doc[
|
| 264 |
leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 265 |
]
|
|
@@ -300,6 +343,49 @@ with demo:
|
|
| 300 |
lb_table_retriever_long_doc,
|
| 301 |
queue=True
|
| 302 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
| 305 |
with gr.Column():
|
|
|
|
| 11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
| 12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
| 13 |
from src.display.css_html_js import custom_css
|
| 14 |
+
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
| 15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
| 17 |
+
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank, remove_html
|
| 18 |
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
|
| 19 |
get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
|
| 20 |
from src.display.gradio_listener import set_listeners
|
|
|
|
| 108 |
show_anonymous = get_anonymous_checkbox()
|
| 109 |
with gr.Row():
|
| 110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
|
| 111 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
| 112 |
+
with gr.TabItem("Retrieval + Reranking", id=10):
|
| 113 |
with gr.Row():
|
| 114 |
# search retrieval models
|
| 115 |
with gr.Column():
|
|
|
|
| 148 |
leaderboard_table,
|
| 149 |
queue=True
|
| 150 |
)
|
| 151 |
+
with gr.TabItem("Retrieval Only", id=11):
|
| 152 |
+
with gr.Row():
|
| 153 |
+
with gr.Column(scale=1):
|
| 154 |
+
search_bar_retriever = get_search_bar()
|
| 155 |
+
with gr.Column(scale=1):
|
| 156 |
+
selected_noreranker = get_noreranking_dropdown()
|
| 157 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 158 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
|
|
|
|
|
|
| 159 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
| 160 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 161 |
+
hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 162 |
+
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 163 |
+
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
|
| 164 |
|
| 165 |
set_listeners(
|
| 166 |
"qa",
|
|
|
|
| 189 |
lb_table_retriever,
|
| 190 |
queue=True
|
| 191 |
)
|
| 192 |
+
with gr.TabItem("Reranking Only", id=12):
|
| 193 |
+
lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
| 194 |
+
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 195 |
+
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 196 |
+
with gr.Row():
|
| 197 |
+
with gr.Column(scale=1):
|
| 198 |
+
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
| 199 |
+
with gr.Column(scale=1):
|
| 200 |
+
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
| 201 |
+
lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
|
| 202 |
+
hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
| 203 |
+
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
| 204 |
+
hidden_lb_table_reranker = get_leaderboard_table(
|
| 205 |
+
hidden_lb_df_reranker, types_qa, visible=False
|
| 206 |
+
)
|
| 207 |
|
| 208 |
+
set_listeners(
|
| 209 |
+
"qa",
|
| 210 |
+
lb_table_reranker,
|
| 211 |
+
hidden_lb_table_reranker,
|
| 212 |
+
search_bar_reranker,
|
| 213 |
+
selected_domains,
|
| 214 |
+
selected_langs,
|
| 215 |
+
selected_rerankings_reranker,
|
| 216 |
+
show_anonymous,
|
| 217 |
+
show_revision_and_timestamp,
|
| 218 |
+
)
|
| 219 |
+
# set metric listener
|
| 220 |
+
selected_metric.change(
|
| 221 |
+
update_metric_qa,
|
| 222 |
+
[
|
| 223 |
+
selected_metric,
|
| 224 |
+
selected_domains,
|
| 225 |
+
selected_langs,
|
| 226 |
+
selected_rerankings_reranker,
|
| 227 |
+
search_bar_reranker,
|
| 228 |
+
show_anonymous,
|
| 229 |
+
show_revision_and_timestamp,
|
| 230 |
+
],
|
| 231 |
+
lb_table_reranker,
|
| 232 |
+
queue=True
|
| 233 |
+
)
|
| 234 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
| 235 |
with gr.Row():
|
| 236 |
with gr.Column(min_width=320):
|
|
|
|
| 253 |
with gr.Row():
|
| 254 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
| 255 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
| 256 |
+
with gr.TabItem("Retrieval + Reranking", id=20):
|
| 257 |
with gr.Row():
|
| 258 |
with gr.Column():
|
| 259 |
search_bar = get_search_bar()
|
|
|
|
| 297 |
lb_table,
|
| 298 |
queue=True
|
| 299 |
)
|
| 300 |
+
with gr.TabItem("Retrieval Only", id=21):
|
| 301 |
+
with gr.Row():
|
| 302 |
+
with gr.Column(scale=1):
|
| 303 |
+
search_bar_retriever = get_search_bar()
|
| 304 |
+
with gr.Column(scale=1):
|
| 305 |
+
selected_noreranker = get_noreranking_dropdown()
|
| 306 |
lb_df_retriever_long_doc = leaderboard_df_long_doc[
|
| 307 |
leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 308 |
]
|
|
|
|
| 343 |
lb_table_retriever_long_doc,
|
| 344 |
queue=True
|
| 345 |
)
|
| 346 |
+
with gr.TabItem("Reranking Only", id=22):
|
| 347 |
+
lb_df_reranker_ldoc = leaderboard_df_long_doc[
|
| 348 |
+
leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"
|
| 349 |
+
]
|
| 350 |
+
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 351 |
+
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 352 |
+
with gr.Row():
|
| 353 |
+
with gr.Column(scale=1):
|
| 354 |
+
selected_rerankings_reranker_ldoc = get_reranking_dropdown(reranking_models_reranker_ldoc)
|
| 355 |
+
with gr.Column(scale=1):
|
| 356 |
+
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
| 357 |
+
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, types_long_doc)
|
| 358 |
+
hidden_lb_df_reranker_ldoc = original_df_long_doc[original_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
| 359 |
+
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
| 360 |
+
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
| 361 |
+
hidden_lb_df_reranker_ldoc, types_long_doc, visible=False
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
set_listeners(
|
| 365 |
+
"long-doc",
|
| 366 |
+
lb_table_reranker_ldoc,
|
| 367 |
+
hidden_lb_table_reranker_ldoc,
|
| 368 |
+
search_bar_reranker_ldoc,
|
| 369 |
+
selected_domains,
|
| 370 |
+
selected_langs,
|
| 371 |
+
selected_rerankings_reranker_ldoc,
|
| 372 |
+
show_anonymous,
|
| 373 |
+
show_revision_and_timestamp,
|
| 374 |
+
)
|
| 375 |
+
selected_metric.change(
|
| 376 |
+
update_metric_long_doc,
|
| 377 |
+
[
|
| 378 |
+
selected_metric,
|
| 379 |
+
selected_domains,
|
| 380 |
+
selected_langs,
|
| 381 |
+
selected_rerankings_reranker_ldoc,
|
| 382 |
+
search_bar_reranker_ldoc,
|
| 383 |
+
show_anonymous,
|
| 384 |
+
show_revision_and_timestamp,
|
| 385 |
+
],
|
| 386 |
+
lb_table_reranker_ldoc,
|
| 387 |
+
queue=True
|
| 388 |
+
)
|
| 389 |
|
| 390 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
| 391 |
with gr.Column():
|