alx-d commited on
Commit
71fcefe
·
verified ·
1 Parent(s): b256930

Upload folder using huggingface_hub

Browse files
MFQ-5-_Qwen3-32B_Nebius_20250719_110710.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Run,Output,Input Tokens,Output Tokens,Tokens,Error Retries,Time (s)
2
+ 1,"<think>
3
+
4
+ </think>
5
+
6
+ Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
7
+ 3,5,4,2,2,5,4,3,2,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4",Input tokens: 880,Output tokens: 212,212,0,12.842
8
+ 2,"<think>
9
+
10
+ </think>
11
+
12
+ Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
13
+ 3,5,4,3,3,5,4,4,3,3,5,5,4,3,3,2,4,3,3,3,5,4,3,3,5,3,3,3,3,3,3,3,3,3,3,3,3,3,3",Input tokens: 880,Output tokens: 212,212,0,7.747
14
+ 3,"<think>
15
+
16
+ </think>
17
+
18
+ Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
19
+ 4, 3, 2, 1, 2, 4, 3, 1, 2, 1, 4, 3, 2, 1, 2, 3, 4, 3, 2, 1, 3, 2, 3, 1, 2, 3, 2, 1, 2, 3, 4, 3, 2, 1, 2, 3, 4, 3, 2",Input tokens: 880,Output tokens: 212,212,0,8.148
20
+ 4,"<think>
21
+
22
+ </think>
23
+
24
+ Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
25
+ 4,4,3,2,2,5,4,2,2,2,5,4,2,3,1,2,4,3,2,1,5,4,3,2,1,2,3,2,2,1,5,2,3,4,2,3,3,4,2",Input tokens: 880,Output tokens: 212,212,0,9.508
26
+ 5,"<think>
27
+
28
+ </think>
29
+
30
+ Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
31
+ 3,5,4,2,1,5,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2",Input tokens: 880,Output tokens: 212,212,0,7.261
advanced_rag.py CHANGED
@@ -570,6 +570,7 @@ def cleanup_old_jobs():
570
  debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
571
  return f"Cleaned up {len(to_delete)} old jobs", "", ""
572
 
 
573
  # Improve the truncate_prompt function to be more aggressive with limiting context
574
  def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
575
  """Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
@@ -1444,6 +1445,39 @@ def periodic_update(is_checked):
1444
  def get_interval(is_checked):
1445
  return 2 if is_checked else None
1446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1447
  # Update the Gradio interface to include job status checking
1448
  with gr.Blocks(css=custom_css, js="""
1449
  document.addEventListener('DOMContentLoaded', function() {
@@ -1905,6 +1939,11 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
1905
  with gr.Row():
1906
  batch_status_tokens1 = gr.Markdown("")
1907
  batch_status_tokens2 = gr.Markdown("")
 
 
 
 
 
1908
 
1909
  with gr.TabItem("App Management"):
1910
  with gr.Row():
@@ -1929,6 +1968,9 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
1929
  # Add initialization info display
1930
  init_info = gr.Markdown("")
1931
 
 
 
 
1932
  # Update load_button click to include embedding model
1933
  load_button.click(
1934
  lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
@@ -2123,6 +2165,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
2123
  every=2
2124
  )
2125
 
 
 
 
 
 
 
 
2126
  def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
2127
  """Create a CSV file from batch query results and return the file path"""
2128
  # Create a temporary directory for CSV files if it doesn't exist
@@ -2204,6 +2253,7 @@ def format_batch_result_files(results: List[Dict], job_id: str) -> Tuple[str, st
2204
 
2205
  return formatted_results, csv_path
2206
 
 
2207
  if __name__ == "__main__":
2208
  debug_print("Launching Gradio interface.")
2209
  app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])
 
570
  debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
571
  return f"Cleaned up {len(to_delete)} old jobs", "", ""
572
 
573
+
574
  # Improve the truncate_prompt function to be more aggressive with limiting context
575
  def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
576
  """Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
 
1445
  def get_interval(is_checked):
1446
  return 2 if is_checked else None
1447
 
1448
+ # CSV file management functions (copied exactly from psyllm.py)
1449
+ def list_all_csv_files():
1450
+ csv_files = sorted(glob.glob("*.csv"), key=os.path.getmtime, reverse=True)
1451
+ zip_files = sorted(glob.glob("*.zip"), key=os.path.getmtime, reverse=True)
1452
+ all_files = csv_files + zip_files
1453
+ if not all_files:
1454
+ return "No CSV or ZIP files found.", [], [], []
1455
+ # Gather file info: name, date/time, size
1456
+ file_infos = []
1457
+ for f in all_files:
1458
+ stat = os.stat(f)
1459
+ dt = datetime.datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')
1460
+ size_kb = stat.st_size / 1024
1461
+ file_infos.append({
1462
+ "name": os.path.basename(f),
1463
+ "path": os.path.abspath(f),
1464
+ "datetime": dt,
1465
+ "size_kb": f"{size_kb:.1f} KB"
1466
+ })
1467
+ # HTML table with columns: Name, Date/Time, Size
1468
+ html_links = '<table><thead><tr><th>File</th><th>Date/Time</th><th>Size</th></tr></thead><tbody>'
1469
+ for info in file_infos:
1470
+ html_links += f'<tr><td><a href="/file={info["path"]}" download target="_blank">{info["name"]}</a></td>' \
1471
+ f'<td>{info["datetime"]}</td><td>{info["size_kb"]}</td></tr>'
1472
+ html_links += '</tbody></table>'
1473
+ # For gradio File, also return a DataFrame-like list for display
1474
+ gradio_table = [[info["name"], info["datetime"], info["size_kb"]] for info in file_infos]
1475
+ return html_links, all_files, [os.path.abspath(f) for f in all_files], gradio_table
1476
+
1477
+ def refresh_csv_files():
1478
+ html_links, csv_files, abs_paths, gradio_table = list_all_csv_files()
1479
+ return html_links, abs_paths, gradio_table
1480
+
1481
  # Update the Gradio interface to include job status checking
1482
  with gr.Blocks(css=custom_css, js="""
1483
  document.addEventListener('DOMContentLoaded', function() {
 
1939
  with gr.Row():
1940
  batch_status_tokens1 = gr.Markdown("")
1941
  batch_status_tokens2 = gr.Markdown("")
1942
+
1943
+ # --- CSV Refresh and Download ---
1944
+ refresh_csv_button_batch = gr.Button("Refresh CSV Files")
1945
+ csv_download_html_batch = gr.HTML(label="All CSV Download Links")
1946
+ csv_download_file_batch = gr.File(label="All CSV Files", file_types=[".csv"], interactive=True, file_count="multiple")
1947
 
1948
  with gr.TabItem("App Management"):
1949
  with gr.Row():
 
1968
  # Add initialization info display
1969
  init_info = gr.Markdown("")
1970
 
1971
+ # Add a DataFrame to show CSV file info (name, date/time, size)
1972
+ csv_file_info_df_batch = gr.DataFrame(headers=["File Name", "Date/Time", "Size"], label="CSV File Info", interactive=False)
1973
+
1974
  # Update load_button click to include embedding model
1975
  load_button.click(
1976
  lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
 
2165
  every=2
2166
  )
2167
 
2168
+ # Add CSV refresh functionality
2169
+ refresh_csv_button_batch.click(
2170
+ fn=refresh_csv_files,
2171
+ inputs=[],
2172
+ outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch]
2173
+ )
2174
+
2175
  def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
2176
  """Create a CSV file from batch query results and return the file path"""
2177
  # Create a temporary directory for CSV files if it doesn't exist
 
2253
 
2254
  return formatted_results, csv_path
2255
 
2256
+
2257
  if __name__ == "__main__":
2258
  debug_print("Launching Gradio interface.")
2259
  app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])