Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix: fix the bug in selecting reranking models
Browse files- src/utils.py +11 -6
src/utils.py
CHANGED
|
@@ -13,9 +13,17 @@ from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC,
|
|
| 13 |
from src.envs import API, SEARCH_RESULTS_REPO
|
| 14 |
from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
| 18 |
-
return df.loc[df[
|
| 19 |
|
| 20 |
|
| 21 |
def filter_queries(query: str, df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -99,7 +107,6 @@ def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, t
|
|
| 99 |
selected_cols.append(c)
|
| 100 |
# We use COLS to maintain sorting
|
| 101 |
filtered_df = df[FIXED_COLS + selected_cols]
|
| 102 |
-
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].mean(axis=1, numeric_only=True).round(decimals=2)
|
| 103 |
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
|
| 104 |
filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
|
| 105 |
filtered_df.reset_index(inplace=True, drop=True)
|
|
@@ -116,14 +123,12 @@ def update_table(
|
|
| 116 |
query: str,
|
| 117 |
show_anonymous: bool
|
| 118 |
):
|
| 119 |
-
filtered_df = hidden_df
|
| 120 |
if not show_anonymous:
|
| 121 |
-
filtered_df = hidden_df.copy()
|
| 122 |
filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
|
| 123 |
filtered_df = filter_models(filtered_df, reranking_query)
|
| 124 |
filtered_df = filter_queries(query, filtered_df)
|
| 125 |
-
|
| 126 |
-
return df
|
| 127 |
|
| 128 |
|
| 129 |
def update_table_long_doc(
|
|
|
|
| 13 |
from src.envs import API, SEARCH_RESULTS_REPO
|
| 14 |
from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
|
| 15 |
|
| 16 |
+
import re
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def remove_html(input_str):
|
| 20 |
+
# Regular expression for finding HTML tags
|
| 21 |
+
clean = re.sub(r'<.*?>', '', input_str)
|
| 22 |
+
return clean
|
| 23 |
+
|
| 24 |
|
| 25 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
| 26 |
+
return df.loc[df[COL_NAME_RERANKING_MODEL].apply(remove_html).isin(reranking_query)]
|
| 27 |
|
| 28 |
|
| 29 |
def filter_queries(query: str, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 107 |
selected_cols.append(c)
|
| 108 |
# We use COLS to maintain sorting
|
| 109 |
filtered_df = df[FIXED_COLS + selected_cols]
|
|
|
|
| 110 |
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
|
| 111 |
filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
|
| 112 |
filtered_df.reset_index(inplace=True, drop=True)
|
|
|
|
| 123 |
query: str,
|
| 124 |
show_anonymous: bool
|
| 125 |
):
|
| 126 |
+
filtered_df = hidden_df.copy()
|
| 127 |
if not show_anonymous:
|
|
|
|
| 128 |
filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
|
| 129 |
filtered_df = filter_models(filtered_df, reranking_query)
|
| 130 |
filtered_df = filter_queries(query, filtered_df)
|
| 131 |
+
return select_columns(filtered_df, domains, langs, task='qa')
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
def update_table_long_doc(
|