yangzhitao commited on
Commit
fe8ec74
·
1 Parent(s): df06dc3

refactor: add environment configuration and refactor settings management

Browse files
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ HF_TOKEN=changethis
.vscode/cspell.json CHANGED
@@ -1,5 +1,7 @@
1
  {
2
  "words": [
 
 
3
  "modelcard",
4
  "sentencepiece"
5
  ]
 
1
  {
2
  "words": [
3
+ "changethis",
4
+ "initialisation",
5
  "modelcard",
6
  "sentencepiece"
7
  ]
.vscode/settings.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "[python]": {
4
+ "editor.formatOnSave": true,
5
+ "editor.defaultFormatter": "charliermarsh.ruff",
6
+ "editor.codeActionsOnSave": {
7
+ "source.fixAll.ruff": "always",
8
+ "source.organizeImports.ruff": "always"
9
+ }
10
+ }
11
+ }
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
5
  from huggingface_hub import snapshot_download
@@ -24,49 +23,54 @@ from src.display.utils import (
24
  WeightType,
25
  fields,
26
  )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
  from src.submission.submit import add_new_eval
30
 
31
 
32
  def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
 
35
 
36
  # Space initialisation
37
  try:
38
- print(EVAL_REQUESTS_PATH)
39
  snapshot_download(
40
- repo_id=QUEUE_REPO,
41
- local_dir=EVAL_REQUESTS_PATH,
42
  repo_type="dataset",
43
  tqdm_class=None,
44
  etag_timeout=30,
45
- token=TOKEN,
46
  )
47
  except Exception:
48
  restart_space()
49
  try:
50
- print(EVAL_RESULTS_PATH)
51
  snapshot_download(
52
- repo_id=RESULTS_REPO,
53
- local_dir=EVAL_RESULTS_PATH,
54
  repo_type="dataset",
55
  tqdm_class=None,
56
  etag_timeout=30,
57
- token=TOKEN,
58
  )
59
  except Exception:
60
  restart_space()
61
 
62
 
63
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
 
 
 
 
64
 
65
  (
66
  finished_eval_queue_df,
67
  running_eval_queue_df,
68
  pending_eval_queue_df,
69
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
70
 
71
 
72
  def init_leaderboard(dataframe):
 
1
  import gradio as gr
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
4
  from huggingface_hub import snapshot_download
 
23
  WeightType,
24
  fields,
25
  )
26
+ from src.envs import API, settings
27
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
28
  from src.submission.submit import add_new_eval
29
 
30
 
31
  def restart_space():
32
+ API.restart_space(repo_id=settings.REPO_ID)
33
 
34
 
35
  # Space initialisation
36
  try:
37
+ print(settings.EVAL_REQUESTS_PATH)
38
  snapshot_download(
39
+ repo_id=settings.QUEUE_REPO,
40
+ local_dir=settings.EVAL_REQUESTS_PATH,
41
  repo_type="dataset",
42
  tqdm_class=None,
43
  etag_timeout=30,
44
+ token=settings.TOKEN,
45
  )
46
  except Exception:
47
  restart_space()
48
  try:
49
+ print(settings.EVAL_RESULTS_PATH)
50
  snapshot_download(
51
+ repo_id=settings.RESULTS_REPO,
52
+ local_dir=settings.EVAL_RESULTS_PATH,
53
  repo_type="dataset",
54
  tqdm_class=None,
55
  etag_timeout=30,
56
+ token=settings.TOKEN,
57
  )
58
  except Exception:
59
  restart_space()
60
 
61
 
62
+ LEADERBOARD_DF = get_leaderboard_df(
63
+ settings.EVAL_RESULTS_PATH,
64
+ settings.EVAL_REQUESTS_PATH,
65
+ COLS,
66
+ BENCHMARK_COLS,
67
+ )
68
 
69
  (
70
  finished_eval_queue_df,
71
  running_eval_queue_df,
72
  pending_eval_queue_df,
73
+ ) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS)
74
 
75
 
76
  def init_leaderboard(dataframe):
pyproject.toml CHANGED
@@ -21,6 +21,8 @@ dependencies = [
21
  "transformers",
22
  "tokenizers>=0.15.0",
23
  "sentencepiece",
 
 
24
  ]
25
 
26
  [dependency-groups]
 
21
  "transformers",
22
  "tokenizers>=0.15.0",
23
  "sentencepiece",
24
+ "python-dotenv>=1.2.1",
25
+ "pydantic-settings>=2.11.0",
26
  ]
27
 
28
  [dependency-groups]
src/display/utils.py CHANGED
@@ -1,8 +1,6 @@
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
4
- import pandas as pd
5
-
6
  from src.about import Tasks
7
 
8
 
 
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
 
 
4
  from src.about import Tasks
5
 
6
 
src/envs.py CHANGED
@@ -1,25 +1,67 @@
1
- import os
 
 
2
 
3
  from huggingface_hub import HfApi
 
 
4
 
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
- TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
- # ----------------------------------
11
 
12
- REPO_ID = f"{OWNER}/leaderboard"
13
- QUEUE_REPO = f"{OWNER}/requests"
14
- RESULTS_REPO = f"{OWNER}/results"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # If you setup a cache later, just change HF_HOME
17
- CACHE_PATH = os.getenv("HF_HOME", ".")
 
 
18
 
19
- # Local caches
20
- EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
- EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
- EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
- EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
 
25
- API = HfApi(token=TOKEN)
 
 
1
+ from functools import cached_property
2
+ from pathlib import Path
3
+ from typing import Annotated
4
 
5
  from huggingface_hub import HfApi
6
+ from pydantic import Field, computed_field
7
+ from pydantic_settings import BaseSettings, SettingsConfigDict
8
 
9
+ # ----------------------------------
10
  # Info to change for your repository
11
  # ----------------------------------
 
12
 
 
 
13
 
14
+ class Settings(BaseSettings):
15
+ model_config = SettingsConfigDict(env_file=".env")
16
+
17
+ TOKEN: Annotated[str, Field(..., alias="HF_TOKEN", description="A read/write token for your org")]
18
+
19
+ # Change to your org - don't forget to create a results and request dataset, with the correct format!
20
+ OWNER: Annotated[
21
+ str,
22
+ Field("y-playground-backend"),
23
+ ]
24
+
25
+ @computed_field
26
+ def REPO_ID(self) -> str:
27
+ return (Path(self.OWNER) / "leaderboard").as_posix()
28
+
29
+ @computed_field
30
+ def QUEUE_REPO(self) -> str:
31
+ return (Path(self.OWNER) / "requests").as_posix()
32
+
33
+ @computed_field
34
+ def RESULTS_REPO(self) -> str:
35
+ return (Path(self.OWNER) / "results").as_posix()
36
+
37
+ CACHE_PATH: Annotated[
38
+ str,
39
+ Field(".", alias="HF_HOME", description="If you setup a cache later, just change `HF_HOME`"),
40
+ ]
41
+
42
+ # Local caches
43
+
44
+ @computed_field
45
+ def EVAL_REQUESTS_PATH(self) -> str:
46
+ return (Path(self.CACHE_PATH) / "eval-queue").as_posix()
47
+
48
+ @computed_field
49
+ def EVAL_RESULTS_PATH(self) -> str:
50
+ return (Path(self.CACHE_PATH) / "eval-results").as_posix()
51
+
52
+ @computed_field
53
+ def EVAL_REQUESTS_PATH_BACKEND(self) -> str:
54
+ return (Path(self.CACHE_PATH) / "eval-queue-bk").as_posix()
55
+
56
+ @computed_field
57
+ def EVAL_RESULTS_PATH_BACKEND(self) -> str:
58
+ return (Path(self.CACHE_PATH) / "eval-results-bk").as_posix()
59
 
60
+ @computed_field
61
+ @cached_property
62
+ def API(self) -> HfApi:
63
+ return HfApi(token=self.TOKEN)
64
 
 
 
 
 
 
65
 
66
+ settings = Settings()
67
+ API = settings.API
src/leaderboard/read_evals.py CHANGED
@@ -1,6 +1,5 @@
1
  import glob
2
  import json
3
- import math
4
  import os
5
  from dataclasses import dataclass
6
 
 
1
  import glob
2
  import json
 
3
  import os
4
  from dataclasses import dataclass
5
 
src/submission/check_validity.py CHANGED
@@ -1,8 +1,6 @@
1
  import json
2
  import os
3
- import re
4
  from collections import defaultdict
5
- from datetime import datetime, timedelta, timezone
6
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
@@ -43,12 +41,12 @@ def is_model_on_hub(
43
  )
44
  if test_tokenizer:
45
  try:
46
- tk = AutoTokenizer.from_pretrained(
47
  model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
48
  )
49
  except ValueError as e:
50
  return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
51
- except Exception as e:
52
  return (
53
  False,
54
  "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
@@ -63,7 +61,7 @@ def is_model_on_hub(
63
  None,
64
  )
65
 
66
- except Exception as e:
67
  return False, "was not found on hub!", None
68
 
69
 
 
1
  import json
2
  import os
 
3
  from collections import defaultdict
 
4
 
5
  import huggingface_hub
6
  from huggingface_hub import ModelCard
 
41
  )
42
  if test_tokenizer:
43
  try:
44
+ _tk = AutoTokenizer.from_pretrained(
45
  model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
46
  )
47
  except ValueError as e:
48
  return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
49
+ except Exception:
50
  return (
51
  False,
52
  "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
 
61
  None,
62
  )
63
 
64
+ except Exception:
65
  return False, "was not found on hub!", None
66
 
67
 
src/submission/submit.py CHANGED
@@ -4,7 +4,7 @@ import sys
4
  from datetime import datetime, timezone
5
 
6
  from src.display.formatting import styled_error, styled_message, styled_warning
7
- from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
8
  from src.submission.check_validity import (
9
  already_submitted_models,
10
  check_model_card,
@@ -32,7 +32,7 @@ def add_new_eval(
32
  global REQUESTED_MODELS
33
  global USERS_TO_SUBMISSION_DATES
34
  if not REQUESTED_MODELS:
35
- REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
36
 
37
  user_name = ""
38
  model_path = model
@@ -53,13 +53,13 @@ def add_new_eval(
53
  # Is the model on the hub?
54
  if weight_type in ["Delta", "Adapter"]:
55
  base_model_on_hub, error, _ = is_model_on_hub(
56
- model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
57
  )
58
  if not base_model_on_hub:
59
  return styled_error(f'Base model "{base_model}" {error}')
60
 
61
  if not weight_type == "Adapter":
62
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
63
  if not model_on_hub:
64
  return styled_error(f'Model "{model}" {error}')
65
 
@@ -104,7 +104,7 @@ def add_new_eval(
104
  return styled_warning("This model has been already submitted.")
105
 
106
  print("Creating eval file")
107
- OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
108
  os.makedirs(OUT_DIR, exist_ok=True)
109
  out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
110
 
@@ -115,7 +115,7 @@ def add_new_eval(
115
  API.upload_file(
116
  path_or_fileobj=out_path,
117
  path_in_repo=out_path.split("eval-queue/")[1],
118
- repo_id=QUEUE_REPO,
119
  repo_type="dataset",
120
  commit_message=f"Add {model} to eval queue",
121
  )
 
4
  from datetime import datetime, timezone
5
 
6
  from src.display.formatting import styled_error, styled_message, styled_warning
7
+ from src.envs import API, settings
8
  from src.submission.check_validity import (
9
  already_submitted_models,
10
  check_model_card,
 
32
  global REQUESTED_MODELS
33
  global USERS_TO_SUBMISSION_DATES
34
  if not REQUESTED_MODELS:
35
+ REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(settings.EVAL_REQUESTS_PATH)
36
 
37
  user_name = ""
38
  model_path = model
 
53
  # Is the model on the hub?
54
  if weight_type in ["Delta", "Adapter"]:
55
  base_model_on_hub, error, _ = is_model_on_hub(
56
+ model_name=base_model, revision=revision, token=settings.TOKEN, test_tokenizer=True
57
  )
58
  if not base_model_on_hub:
59
  return styled_error(f'Base model "{base_model}" {error}')
60
 
61
  if not weight_type == "Adapter":
62
+ model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=settings.TOKEN, test_tokenizer=True)
63
  if not model_on_hub:
64
  return styled_error(f'Model "{model}" {error}')
65
 
 
104
  return styled_warning("This model has been already submitted.")
105
 
106
  print("Creating eval file")
107
+ OUT_DIR = f"{settings.EVAL_REQUESTS_PATH}/{user_name}"
108
  os.makedirs(OUT_DIR, exist_ok=True)
109
  out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
110
 
 
115
  API.upload_file(
116
  path_or_fileobj=out_path,
117
  path_in_repo=out_path.split("eval-queue/")[1],
118
+ repo_id=settings.QUEUE_REPO,
119
  repo_type="dataset",
120
  commit_message=f"Add {model} to eval queue",
121
  )
uv.lock CHANGED
@@ -681,7 +681,9 @@ dependencies = [
681
  { name = "matplotlib" },
682
  { name = "numpy" },
683
  { name = "pandas" },
 
684
  { name = "python-dateutil" },
 
685
  { name = "sentencepiece" },
686
  { name = "tokenizers" },
687
  { name = "tqdm" },
@@ -705,7 +707,9 @@ requires-dist = [
705
  { name = "matplotlib" },
706
  { name = "numpy" },
707
  { name = "pandas" },
 
708
  { name = "python-dateutil" },
 
709
  { name = "sentencepiece" },
710
  { name = "tokenizers", specifier = ">=0.15.0" },
711
  { name = "tqdm" },
@@ -1024,6 +1028,20 @@ wheels = [
1024
  { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" },
1025
  ]
1026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  [[package]]
1028
  name = "pydub"
1029
  version = "0.25.1"
@@ -1063,6 +1081,15 @@ wheels = [
1063
  { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
1064
  ]
1065
 
 
 
 
 
 
 
 
 
 
1066
  [[package]]
1067
  name = "python-multipart"
1068
  version = "0.0.20"
 
681
  { name = "matplotlib" },
682
  { name = "numpy" },
683
  { name = "pandas" },
684
+ { name = "pydantic-settings" },
685
  { name = "python-dateutil" },
686
+ { name = "python-dotenv" },
687
  { name = "sentencepiece" },
688
  { name = "tokenizers" },
689
  { name = "tqdm" },
 
707
  { name = "matplotlib" },
708
  { name = "numpy" },
709
  { name = "pandas" },
710
+ { name = "pydantic-settings", specifier = ">=2.11.0" },
711
  { name = "python-dateutil" },
712
+ { name = "python-dotenv", specifier = ">=1.2.1" },
713
  { name = "sentencepiece" },
714
  { name = "tokenizers", specifier = ">=0.15.0" },
715
  { name = "tqdm" },
 
1028
  { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" },
1029
  ]
1030
 
1031
+ [[package]]
1032
+ name = "pydantic-settings"
1033
+ version = "2.11.0"
1034
+ source = { registry = "https://pypi.org/simple" }
1035
+ dependencies = [
1036
+ { name = "pydantic" },
1037
+ { name = "python-dotenv" },
1038
+ { name = "typing-inspection" },
1039
+ ]
1040
+ sdist = { url = "https://files.pythonhosted.org/packages/20/c5/dbbc27b814c71676593d1c3f718e6cd7d4f00652cefa24b75f7aa3efb25e/pydantic_settings-2.11.0.tar.gz", hash = "sha256:d0e87a1c7d33593beb7194adb8470fc426e95ba02af83a0f23474a04c9a08180", size = 188394, upload-time = "2025-09-24T14:19:11.764Z" }
1041
+ wheels = [
1042
+ { url = "https://files.pythonhosted.org/packages/83/d6/887a1ff844e64aa823fb4905978d882a633cfe295c32eacad582b78a7d8b/pydantic_settings-2.11.0-py3-none-any.whl", hash = "sha256:fe2cea3413b9530d10f3a5875adffb17ada5c1e1bab0b2885546d7310415207c", size = 48608, upload-time = "2025-09-24T14:19:10.015Z" },
1043
+ ]
1044
+
1045
  [[package]]
1046
  name = "pydub"
1047
  version = "0.25.1"
 
1081
  { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
1082
  ]
1083
 
1084
+ [[package]]
1085
+ name = "python-dotenv"
1086
+ version = "1.2.1"
1087
+ source = { registry = "https://pypi.org/simple" }
1088
+ sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
1089
+ wheels = [
1090
+ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
1091
+ ]
1092
+
1093
  [[package]]
1094
  name = "python-multipart"
1095
  version = "0.0.20"