Spaces:

ling99
/

OCRBench-v2-leaderboard

Running

App Files Files Community

ling99 commited on Dec 30, 2024

Commit

f9b7238

verified ·

1 Parent(s): 7a5b093

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -20

app.py CHANGED Viewed

@@ -164,23 +164,23 @@ def get_cn_table(model_table_df):
         values.append(row)
     return values
-def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show_plot=False):
     if leaderboard_table_file:
-        data = load_leaderboard_table_csv(leaderboard_table_file)
-        data_2 = load_leaderboard_table_csv(leaderboard_table_file_2)
-        model_table_df = pd.DataFrame(data)
-        model_table_df_2 = pd.DataFrame(data_2)
         md_head = f"""
         # 🏆 OCRBench v2 Leaderboard
-        | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2305.07895) |
         """
         gr.Markdown(md_head, elem_id="leaderboard_markdown")
         with gr.Tabs() as tabs:
             # arena table
-            with gr.Tab("OCRBench v2", id=0):
-                arena_table_vals = get_arena_table(model_table_df)
-                md = "OCRBench v2 is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
                     headers=[
@@ -213,12 +213,12 @@ def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show
                     ],
                     value=arena_table_vals,
                     elem_id="arena_leaderboard_dataframe",
-                    column_widths=[90, 150, 120, 150, 150, 150, 150, 150, 170, 170, 150, 150],
                     wrap=True,
                 )
-            with gr.Tab("OCRBench v2 cn", id=1):
-                arena_table_vals = get_cn_table(model_table_df_2)
-                md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
                     headers=[
@@ -253,11 +253,11 @@ def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show
         pass
     md_tail = f"""
     # Notice
-    Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench.
-    If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
     gr.Markdown(md_tail, elem_id="leaderboard_markdown")
-def build_demo(leaderboard_table_file, leaderboard_table_file_2):
     text_size = gr.themes.sizes.text_lg
     with gr.Blocks(
@@ -266,16 +266,16 @@ def build_demo(leaderboard_table_file, leaderboard_table_file_2):
         css=block_css,
     ) as demo:
         leader_components = build_leaderboard_tab(
-            leaderboard_table_file, leaderboard_table_file_2, show_plot=True
         )
     return demo
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--share", action="store_true")
-    parser.add_argument("--OCRBench_file", type=str, default="./OCRBench_en.csv")
-    parser.add_argument("--OCRBench_file_2", type=str, default="./OCRBench_cn.csv")
     args = parser.parse_args()
-    demo = build_demo(args.OCRBench_file, args.OCRBench_file_2)
     demo.launch()

         values.append(row)
     return values
+def build_leaderboard_tab(leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=False):
     if leaderboard_table_file:
+        data_en = load_leaderboard_table_csv(leaderboard_table_file_en)
+        data_cn = load_leaderboard_table_csv(leaderboard_table_file_cn)
+        model_table_df_en = pd.DataFrame(data_en)
+        model_table_df_cn = pd.DataFrame(data_cn)
         md_head = f"""
         # 🏆 OCRBench v2 Leaderboard
+        | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) |
         """
         gr.Markdown(md_head, elem_id="leaderboard_markdown")
         with gr.Tabs() as tabs:
             # arena table
+            with gr.Tab("OCRBench v2 English subset", id=0):
+                arena_table_vals = get_arena_table(model_table_df_en)
+                md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4× more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples."
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
                     headers=[
                     ],
                     value=arena_table_vals,
                     elem_id="arena_leaderboard_dataframe",
+                    column_widths=[90, 150, 120, 170, 150, 150, 150, 150, 170, 170, 150, 150],
                     wrap=True,
                 )
+            with gr.Tab("OCRBench v2 Chinese subsets", id=1):
+                arena_table_vals = get_cn_table(model_table_df_cn)
+                md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4× more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples."
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
                     headers=[
         pass
     md_tail = f"""
     # Notice
+    Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response.
+    If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
     gr.Markdown(md_tail, elem_id="leaderboard_markdown")
+def build_demo(leaderboard_table_file_en, leaderboard_table_file_cn):
     text_size = gr.themes.sizes.text_lg
     with gr.Blocks(
         css=block_css,
     ) as demo:
         leader_components = build_leaderboard_tab(
+            leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=True
         )
     return demo
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--share", action="store_true")
+    parser.add_argument("--OCRBench_file_en", type=str, default="./OCRBench_en.csv")
+    parser.add_argument("--OCRBench_file_cn", type=str, default="./OCRBench_cn.csv")
     args = parser.parse_args()
+    demo = build_demo(args.OCRBench_file_en, args.OCRBench_file_cn)
     demo.launch()