Rename prompts for LS, SU, NER, and REL
Browse files- app.py +102 -25
- src/tasks.py +8 -8
app.py
CHANGED
|
@@ -47,6 +47,9 @@ def mean_of_max_per_field(df):
|
|
| 47 |
|
| 48 |
|
| 49 |
def boxplot_per_task(dataframe=None, baselines=None):
|
|
|
|
|
|
|
|
|
|
| 50 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 51 |
|
| 52 |
if dataframe is None:
|
|
@@ -56,7 +59,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 56 |
for task in tasks
|
| 57 |
})
|
| 58 |
|
| 59 |
-
# baseline per ciascun task (se non viene passata, metto random tra 50 e 70)
|
| 60 |
if baselines is None:
|
| 61 |
baselines = {task: np.random.randint(50, 70) for task in tasks}
|
| 62 |
|
|
@@ -73,27 +75,26 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 73 |
fig.add_trace(go.Box(
|
| 74 |
y=y_data,
|
| 75 |
name=task,
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
line=dict(color=
|
| 79 |
fillcolor=colors[i],
|
| 80 |
opacity=0.7,
|
| 81 |
-
hovertemplate=
|
| 82 |
-
width=0.6
|
|
|
|
|
|
|
| 83 |
))
|
| 84 |
|
| 85 |
-
# baseline
|
| 86 |
if task in baselines and baselines[task] is not None:
|
| 87 |
-
# baseline come linea orizzontale
|
| 88 |
fig.add_shape(
|
| 89 |
type="line",
|
| 90 |
-
x0=i-0.3, x1=i+0.3,
|
| 91 |
y0=baselines[task], y1=baselines[task],
|
| 92 |
line=dict(color="black", width=2, dash="dash"),
|
| 93 |
xref="x", yref="y"
|
| 94 |
)
|
| 95 |
-
|
| 96 |
-
# label con valore baseline
|
| 97 |
fig.add_annotation(
|
| 98 |
x=i, y=baselines[task],
|
| 99 |
text=f"{baselines[task]}%",
|
|
@@ -103,19 +104,19 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 103 |
)
|
| 104 |
|
| 105 |
fig.update_layout(
|
| 106 |
-
title="Distribution of Model Accuracy by Task
|
| 107 |
xaxis_title="Task",
|
| 108 |
yaxis_title="Accuracy (%)",
|
| 109 |
template="plotly_white",
|
| 110 |
boxmode="group",
|
| 111 |
dragmode=False,
|
| 112 |
font=dict(family="Arial", size=13),
|
| 113 |
-
margin=dict(b=
|
| 114 |
-
annotations
|
| 115 |
dict(
|
| 116 |
text=(
|
| 117 |
-
"Boxplots show LLM accuracy in zero/few-shot settings. <br>"
|
| 118 |
-
"
|
| 119 |
),
|
| 120 |
xref="paper", yref="paper",
|
| 121 |
x=0.5, y=-0.33,
|
|
@@ -124,7 +125,6 @@ def boxplot_per_task(dataframe=None, baselines=None):
|
|
| 124 |
)
|
| 125 |
]
|
| 126 |
)
|
| 127 |
-
#fig.update_yaxes(fixedrange=True)
|
| 128 |
fig.update_yaxes(range=[0, 100], fixedrange=True)
|
| 129 |
|
| 130 |
return fig
|
|
@@ -137,6 +137,74 @@ BASELINES = {
|
|
| 137 |
}
|
| 138 |
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
def line_chart(dataframe):
|
|
@@ -255,11 +323,11 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
| 255 |
|
| 256 |
for _, row in sorted_dataframe.iterrows():
|
| 257 |
if row['IS_FS']: # 5-Few-Shot
|
| 258 |
-
if row["#Params (B)"] >
|
| 259 |
-
new_model_column.append(f"{row['Model']}
|
| 260 |
large_medal_fs_assigned = True
|
| 261 |
-
elif 10 < row["#Params (B)"] <=
|
| 262 |
-
new_model_column.append(f"{row['Model']}
|
| 263 |
medium_medal_fs_assigned = True
|
| 264 |
elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
|
| 265 |
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
|
|
@@ -267,11 +335,11 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
| 267 |
else:
|
| 268 |
new_model_column.append(row["Model"])
|
| 269 |
else: # 0-Shot
|
| 270 |
-
if row["#Params (B)"] >
|
| 271 |
-
new_model_column.append(f"{row['Model']}
|
| 272 |
large_medal_0shot_assigned = True
|
| 273 |
-
elif 10 < row["#Params (B)"] <=
|
| 274 |
-
new_model_column.append(f"{row['Model']}
|
| 275 |
medium_medal_0shot_assigned = True
|
| 276 |
elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
|
| 277 |
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
|
|
@@ -279,6 +347,14 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
| 279 |
else:
|
| 280 |
new_model_column.append(row["Model"])
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
# Aggiorna la colonna Model
|
| 283 |
sorted_dataframe["Model"] = new_model_column
|
| 284 |
|
|
@@ -503,6 +579,7 @@ with demo:
|
|
| 503 |
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
| 504 |
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
| 505 |
gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES))
|
|
|
|
| 506 |
|
| 507 |
# About tab
|
| 508 |
with gr.TabItem("📝 About"):
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def boxplot_per_task(dataframe=None, baselines=None):
|
| 50 |
+
|
| 51 |
+
print(dataframe.columns)
|
| 52 |
+
|
| 53 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 54 |
|
| 55 |
if dataframe is None:
|
|
|
|
| 59 |
for task in tasks
|
| 60 |
})
|
| 61 |
|
|
|
|
| 62 |
if baselines is None:
|
| 63 |
baselines = {task: np.random.randint(50, 70) for task in tasks}
|
| 64 |
|
|
|
|
| 75 |
fig.add_trace(go.Box(
|
| 76 |
y=y_data,
|
| 77 |
name=task,
|
| 78 |
+
marker=dict(color=colors[i]),
|
| 79 |
+
# Modifica: Impostiamo il colore della linea della scatola su un colore diverso dal riempimento
|
| 80 |
+
line=dict(color="black", width=2),
|
| 81 |
fillcolor=colors[i],
|
| 82 |
opacity=0.7,
|
| 83 |
+
hovertemplate="<b>"+task+"</b><br>Accuracy: %{y:.2f}%<extra></extra>",
|
| 84 |
+
width=0.6,
|
| 85 |
+
whiskerwidth=0.2,
|
| 86 |
+
quartilemethod="linear"
|
| 87 |
))
|
| 88 |
|
| 89 |
+
# baseline
|
| 90 |
if task in baselines and baselines[task] is not None:
|
|
|
|
| 91 |
fig.add_shape(
|
| 92 |
type="line",
|
| 93 |
+
x0=i-0.3, x1=i+0.3,
|
| 94 |
y0=baselines[task], y1=baselines[task],
|
| 95 |
line=dict(color="black", width=2, dash="dash"),
|
| 96 |
xref="x", yref="y"
|
| 97 |
)
|
|
|
|
|
|
|
| 98 |
fig.add_annotation(
|
| 99 |
x=i, y=baselines[task],
|
| 100 |
text=f"{baselines[task]}%",
|
|
|
|
| 104 |
)
|
| 105 |
|
| 106 |
fig.update_layout(
|
| 107 |
+
title="Distribution of Model Accuracy by Task",
|
| 108 |
xaxis_title="Task",
|
| 109 |
yaxis_title="Accuracy (%)",
|
| 110 |
template="plotly_white",
|
| 111 |
boxmode="group",
|
| 112 |
dragmode=False,
|
| 113 |
font=dict(family="Arial", size=13),
|
| 114 |
+
margin=dict(b=140),
|
| 115 |
+
annotations=[
|
| 116 |
dict(
|
| 117 |
text=(
|
| 118 |
+
"Boxplots show LLM accuracy in zero/few-shot settings. Black dashed lines<br>"
|
| 119 |
+
"indicate best-performing supervised models evaluated on EVALITA."
|
| 120 |
),
|
| 121 |
xref="paper", yref="paper",
|
| 122 |
x=0.5, y=-0.33,
|
|
|
|
| 125 |
)
|
| 126 |
]
|
| 127 |
)
|
|
|
|
| 128 |
fig.update_yaxes(range=[0, 100], fixedrange=True)
|
| 129 |
|
| 130 |
return fig
|
|
|
|
| 137 |
}
|
| 138 |
|
| 139 |
|
| 140 |
+
def boxplot_prompts_per_task(dataframe, tasks=None):
|
| 141 |
+
if tasks is None:
|
| 142 |
+
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 143 |
+
|
| 144 |
+
fig = go.Figure()
|
| 145 |
+
|
| 146 |
+
# Liste per creare una sola voce in legenda per Average e Best
|
| 147 |
+
avg_x, avg_y = [], []
|
| 148 |
+
best_x, best_y, best_text = [], [], []
|
| 149 |
+
|
| 150 |
+
for task in tasks:
|
| 151 |
+
avg_col = f"{task} Prompt Average"
|
| 152 |
+
best_col = f"{task} Best Prompt"
|
| 153 |
+
best_id_col = f"{task} Best Prompt Id"
|
| 154 |
+
|
| 155 |
+
if all(col in dataframe.columns for col in [avg_col, best_col, best_id_col]):
|
| 156 |
+
avg_value = dataframe[avg_col].mean()
|
| 157 |
+
avg_x.append(task)
|
| 158 |
+
avg_y.append(avg_value)
|
| 159 |
+
|
| 160 |
+
best_value = dataframe[best_col].mean()
|
| 161 |
+
best_x.append(task)
|
| 162 |
+
best_y.append(best_value)
|
| 163 |
+
best_id = dataframe[best_id_col].mode()[0] # Most frequent best prompt id
|
| 164 |
+
best_text.append(f"P:{best_id}")
|
| 165 |
+
|
| 166 |
+
# Barre Average Accuracy (azzurro)
|
| 167 |
+
fig.add_trace(go.Bar(
|
| 168 |
+
x=avg_x,
|
| 169 |
+
y=avg_y,
|
| 170 |
+
name="Average Accuracy",
|
| 171 |
+
marker_color="#1f77b4",
|
| 172 |
+
#hovertemplate="%{y:.2f}%<extra></extra>"
|
| 173 |
+
#hovertemplate="<b>" + task + "</b><br>Accuracy: %{y:.2f}%<extra></extra>",
|
| 174 |
+
))
|
| 175 |
+
|
| 176 |
+
# Barre Best Prompt (rosso)
|
| 177 |
+
fig.add_trace(go.Bar(
|
| 178 |
+
x=best_x,
|
| 179 |
+
y=best_y,
|
| 180 |
+
name="Best Prompt",
|
| 181 |
+
marker_color="#d62728",
|
| 182 |
+
#hovertemplate="%{y:.2f}%<extra></extra>"
|
| 183 |
+
#hovertemplate = "<b>" + task + "</b><br>Accuracy: %{y:.2f}%<extra></extra>",
|
| 184 |
+
))
|
| 185 |
+
|
| 186 |
+
# Testo sopra barre Best Prompt con ID
|
| 187 |
+
for x, y, text in zip(best_x, best_y, best_text):
|
| 188 |
+
fig.add_annotation(
|
| 189 |
+
x=x,
|
| 190 |
+
y=y + 1, # leggermente sopra la barra
|
| 191 |
+
text=text,
|
| 192 |
+
showarrow=False,
|
| 193 |
+
font=dict(size=12, color="black")
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
fig.update_layout(
|
| 197 |
+
title="Comparison of Average Prompt Accuracy vs Best Prompt Accuracy per Task",
|
| 198 |
+
xaxis_title="Task",
|
| 199 |
+
yaxis_title="Accuracy (%)",
|
| 200 |
+
barmode='group',
|
| 201 |
+
template="plotly_white",
|
| 202 |
+
font=dict(family="Arial", size=13),
|
| 203 |
+
yaxis=dict(range=[0, 100], fixedrange=True)
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
return fig
|
| 207 |
+
|
| 208 |
|
| 209 |
|
| 210 |
def line_chart(dataframe):
|
|
|
|
| 323 |
|
| 324 |
for _, row in sorted_dataframe.iterrows():
|
| 325 |
if row['IS_FS']: # 5-Few-Shot
|
| 326 |
+
if row["#Params (B)"] > 50 and not large_medal_fs_assigned:
|
| 327 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🏆")
|
| 328 |
large_medal_fs_assigned = True
|
| 329 |
+
elif 10 < row["#Params (B)"] <= 50 and not medium_medal_fs_assigned:
|
| 330 |
+
new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🏆")
|
| 331 |
medium_medal_fs_assigned = True
|
| 332 |
elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
|
| 333 |
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
|
|
|
|
| 335 |
else:
|
| 336 |
new_model_column.append(row["Model"])
|
| 337 |
else: # 0-Shot
|
| 338 |
+
if row["#Params (B)"] > 50 and not large_medal_0shot_assigned:
|
| 339 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🎖️")
|
| 340 |
large_medal_0shot_assigned = True
|
| 341 |
+
elif 10 < row["#Params (B)"] <= 50 and not medium_medal_0shot_assigned:
|
| 342 |
+
new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🎖️")
|
| 343 |
medium_medal_0shot_assigned = True
|
| 344 |
elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
|
| 345 |
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
|
|
|
|
| 347 |
else:
|
| 348 |
new_model_column.append(row["Model"])
|
| 349 |
|
| 350 |
+
|
| 351 |
+
# Lista delle colonne da aggiornare
|
| 352 |
+
cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
|
| 353 |
+
# Applichiamo la trasformazione
|
| 354 |
+
for col in cols_to_update:
|
| 355 |
+
dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
|
| 356 |
+
|
| 357 |
+
|
| 358 |
# Aggiorna la colonna Model
|
| 359 |
sorted_dataframe["Model"] = new_model_column
|
| 360 |
|
|
|
|
| 579 |
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
| 580 |
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
| 581 |
gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES))
|
| 582 |
+
gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF))
|
| 583 |
|
| 584 |
# About tab
|
| 585 |
with gr.TabItem("📝 About"):
|
src/tasks.py
CHANGED
|
@@ -125,8 +125,8 @@ LS_DESCRIPTION = """### Lexical Substitution (LS) --- *Generative task*
|
|
| 125 |
|
| 126 |
| # | Prompt |
|
| 127 |
|-----|--------------------------------------------------------------------------------|
|
| 128 |
-
|
|
| 129 |
-
|
|
| 130 |
|
| 131 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 132 |
|
|
@@ -137,8 +137,8 @@ SU_DESCRIPTION = """### Summarization (SUM) --- *Generative task*
|
|
| 137 |
|
| 138 |
| # | Prompt |
|
| 139 |
|-----|--------------------------------------------------------------------------------|
|
| 140 |
-
|
|
| 141 |
-
|
|
| 142 |
|
| 143 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 144 |
|
|
@@ -149,8 +149,8 @@ NER_DESCRIPTION = """### Named Entity Recognition (NER) --- *Generative task*
|
|
| 149 |
|
| 150 |
| # | Prompt |
|
| 151 |
|-----|--------------------------------------------------------------------------------|
|
| 152 |
-
|
|
| 153 |
-
|
|
| 154 |
|
| 155 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 156 |
|
|
@@ -161,8 +161,8 @@ REL_DESCRIPTION = """### Relation Extraction (REL) --- *Generative task*
|
|
| 161 |
|
| 162 |
| # | Prompt |
|
| 163 |
|-----|--------------------------------------------------------------------------------|
|
| 164 |
-
|
|
| 165 |
-
|
|
| 166 |
|
| 167 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 168 |
|
|
|
|
| 125 |
|
| 126 |
| # | Prompt |
|
| 127 |
|-----|--------------------------------------------------------------------------------|
|
| 128 |
+
| 7 | Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
|
| 129 |
+
| 8 | Devi risolvere un compito di sostituzione lessicale. Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
|
| 130 |
|
| 131 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 132 |
|
|
|
|
| 137 |
|
| 138 |
| # | Prompt |
|
| 139 |
|-----|--------------------------------------------------------------------------------|
|
| 140 |
+
| 7 | Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto: |
|
| 141 |
+
| 8 | Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto: |
|
| 142 |
|
| 143 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 144 |
|
|
|
|
| 149 |
|
| 150 |
| # | Prompt |
|
| 151 |
|-----|--------------------------------------------------------------------------------|
|
| 152 |
+
| 7 | Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\\nTesto: '{{text}}'\\nEntità: |
|
| 153 |
+
| 8 | Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\\nTesto: '{{text}}'\\nEntità: |
|
| 154 |
|
| 155 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 156 |
|
|
|
|
| 161 |
|
| 162 |
| # | Prompt |
|
| 163 |
|-----|--------------------------------------------------------------------------------|
|
| 164 |
+
| 7 | Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\\nTesto: '{{text}}'\\nRelazioni: |
|
| 165 |
+
| 8 | Devi svolgere un compito di estrazione di relazioni da documenti medici. Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\\nTesto: '{{text}}'\\nRelazioni: |
|
| 166 |
|
| 167 |
<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
|
| 168 |
|