Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: remove the unnecessary variables
Browse files- src/display/utils.py +0 -2
- src/read_evals.py +5 -9
- tests/src/display/test_utils.py +1 -3
src/display/utils.py
CHANGED
|
@@ -80,6 +80,4 @@ TYPES_QA = [c.type for c in fields(AutoEvalColumnQA) if not c.hidden]
|
|
| 80 |
TYPES_LONG_DOC = [c.type for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
|
| 81 |
COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default and not c.hidden]
|
| 82 |
|
| 83 |
-
QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
|
| 84 |
|
| 85 |
-
LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
|
|
|
|
| 80 |
TYPES_LONG_DOC = [c.type for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
|
| 81 |
COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default and not c.hidden]
|
| 82 |
|
|
|
|
| 83 |
|
|
|
src/read_evals.py
CHANGED
|
@@ -6,13 +6,8 @@ from typing import List
|
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
|
| 9 |
-
from src.benchmarks import get_safe_name
|
| 10 |
-
from src.display.utils import
|
| 11 |
-
COLS_QA,
|
| 12 |
-
QA_BENCHMARK_COLS,
|
| 13 |
-
COLS_LONG_DOC,
|
| 14 |
-
LONG_DOC_BENCHMARK_COLS
|
| 15 |
-
)
|
| 16 |
from src.display.column_names import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, \
|
| 17 |
COL_NAME_RETRIEVAL_MODEL_LINK, COL_NAME_RERANKING_MODEL_LINK, COL_NAME_RANK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, \
|
| 18 |
COL_NAME_IS_ANONYMOUS
|
|
@@ -21,6 +16,7 @@ from src.display.formatting import make_clickable_model
|
|
| 21 |
|
| 22 |
pd.options.mode.copy_on_write = True
|
| 23 |
|
|
|
|
| 24 |
def calculate_mean(row):
|
| 25 |
if pd.isna(row).any():
|
| 26 |
return -1
|
|
@@ -189,10 +185,10 @@ def get_leaderboard_df(raw_data: List[FullEvalResult], task: str, metric: str) -
|
|
| 189 |
cols = [COL_NAME_IS_ANONYMOUS, ]
|
| 190 |
if task == "qa":
|
| 191 |
cols += COLS_QA
|
| 192 |
-
benchmark_cols =
|
| 193 |
elif task == "long-doc":
|
| 194 |
cols += COLS_LONG_DOC
|
| 195 |
-
benchmark_cols =
|
| 196 |
else:
|
| 197 |
raise NotImplemented
|
| 198 |
all_data_json = []
|
|
|
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
|
| 9 |
+
from src.benchmarks import get_safe_name, BenchmarksQA, BenchmarksLongDoc
|
| 10 |
+
from src.display.utils import COLS_QA, COLS_LONG_DOC
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from src.display.column_names import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, \
|
| 12 |
COL_NAME_RETRIEVAL_MODEL_LINK, COL_NAME_RERANKING_MODEL_LINK, COL_NAME_RANK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, \
|
| 13 |
COL_NAME_IS_ANONYMOUS
|
|
|
|
| 16 |
|
| 17 |
pd.options.mode.copy_on_write = True
|
| 18 |
|
| 19 |
+
|
| 20 |
def calculate_mean(row):
|
| 21 |
if pd.isna(row).any():
|
| 22 |
return -1
|
|
|
|
| 185 |
cols = [COL_NAME_IS_ANONYMOUS, ]
|
| 186 |
if task == "qa":
|
| 187 |
cols += COLS_QA
|
| 188 |
+
benchmark_cols = [t.value.col_name for t in BenchmarksQA]
|
| 189 |
elif task == "long-doc":
|
| 190 |
cols += COLS_LONG_DOC
|
| 191 |
+
benchmark_cols = [t.value.col_name for t in BenchmarksLongDoc]
|
| 192 |
else:
|
| 193 |
raise NotImplemented
|
| 194 |
all_data_json = []
|
tests/src/display/test_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from src.display.utils import fields, AutoEvalColumnQA, COLS_QA, COLS_LONG_DOC, COLS_LITE, TYPES_QA, TYPES_LONG_DOC,
|
| 3 |
|
| 4 |
|
| 5 |
def test_fields():
|
|
@@ -13,8 +13,6 @@ def test_macro_variables():
|
|
| 13 |
print(f'COLS_LITE: {COLS_LITE}')
|
| 14 |
print(f'TYPES_QA: {TYPES_QA}')
|
| 15 |
print(f'TYPES_LONG_DOC: {TYPES_LONG_DOC}')
|
| 16 |
-
print(f'QA_BENCHMARK_COLS: {QA_BENCHMARK_COLS}')
|
| 17 |
-
print(f'LONG_DOC_BENCHMARK_COLS: {LONG_DOC_BENCHMARK_COLS}')
|
| 18 |
|
| 19 |
|
| 20 |
def test_get_default_auto_eval_column_dict():
|
|
|
|
| 1 |
import pytest
|
| 2 |
+
from src.display.utils import fields, AutoEvalColumnQA, COLS_QA, COLS_LONG_DOC, COLS_LITE, TYPES_QA, TYPES_LONG_DOC, get_default_auto_eval_column_dict
|
| 3 |
|
| 4 |
|
| 5 |
def test_fields():
|
|
|
|
| 13 |
print(f'COLS_LITE: {COLS_LITE}')
|
| 14 |
print(f'TYPES_QA: {TYPES_QA}')
|
| 15 |
print(f'TYPES_LONG_DOC: {TYPES_LONG_DOC}')
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def test_get_default_auto_eval_column_dict():
|