Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

alx-d commited on Sep 17

Commit

71fcefe

verified ·

1 Parent(s): b256930

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

MFQ-5-_Qwen3-32B_Nebius_20250719_110710.csv +31 -0
advanced_rag.py +50 -0

MFQ-5-_Qwen3-32B_Nebius_20250719_110710.csv ADDED Viewed

	@@ -0,0 +1,31 @@

+Run,Output,Input Tokens,Output Tokens,Tokens,Error Retries,Time (s)
+1,"<think>
+</think>
+Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
+3,5,4,2,2,5,4,3,2,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4",Input tokens: 880,Output tokens: 212,212,0,12.842
+2,"<think>
+</think>
+Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
+3,5,4,3,3,5,4,4,3,3,5,5,4,3,3,2,4,3,3,3,5,4,3,3,5,3,3,3,3,3,3,3,3,3,3,3,3,3,3",Input tokens: 880,Output tokens: 212,212,0,7.747
+3,"<think>
+</think>
+Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
+4, 3, 2, 1, 2, 4, 3, 1, 2, 1, 4, 3, 2, 1, 2, 3, 4, 3, 2, 1, 3, 2, 3, 1, 2, 3, 2, 1, 2, 3, 4, 3, 2, 1, 2, 3, 4, 3, 2",Input tokens: 880,Output tokens: 212,212,0,8.148
+4,"<think>
+</think>
+Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
+4,4,3,2,2,5,4,2,2,2,5,4,2,3,1,2,4,3,2,1,5,4,3,2,1,2,3,2,2,1,5,2,3,4,2,3,3,4,2",Input tokens: 880,Output tokens: 212,212,0,9.508
+5,"<think>
+</think>
+Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
+3,5,4,2,1,5,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2",Input tokens: 880,Output tokens: 212,212,0,7.261

advanced_rag.py CHANGED Viewed

@@ -570,6 +570,7 @@ def cleanup_old_jobs():
     debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
     return f"Cleaned up {len(to_delete)} old jobs", "", ""
 # Improve the truncate_prompt function to be more aggressive with limiting context
 def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
     """Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
@@ -1444,6 +1445,39 @@ def periodic_update(is_checked):
 def get_interval(is_checked):
     return 2 if is_checked else None
 # Update the Gradio interface to include job status checking
 with gr.Blocks(css=custom_css, js="""
 document.addEventListener('DOMContentLoaded', function() {
@@ -1905,6 +1939,11 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
             with gr.Row():
                 batch_status_tokens1 = gr.Markdown("")
                 batch_status_tokens2 = gr.Markdown("")
         with gr.TabItem("App Management"):
             with gr.Row():
@@ -1929,6 +1968,9 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
     # Add initialization info display
     init_info = gr.Markdown("")
     # Update load_button click to include embedding model
     load_button.click(
         lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
@@ -2123,6 +2165,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
         every=2
     )
 def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
     """Create a CSV file from batch query results and return the file path"""
     # Create a temporary directory for CSV files if it doesn't exist
@@ -2204,6 +2253,7 @@ def format_batch_result_files(results: List[Dict], job_id: str) -> Tuple[str, st
     return formatted_results, csv_path
 if __name__ == "__main__":
     debug_print("Launching Gradio interface.")
     app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])

     debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
     return f"Cleaned up {len(to_delete)} old jobs", "", ""
 # Improve the truncate_prompt function to be more aggressive with limiting context
 def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
     """Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
 def get_interval(is_checked):
     return 2 if is_checked else None
+# CSV file management functions (copied exactly from psyllm.py)
+def list_all_csv_files():
+    csv_files = sorted(glob.glob("*.csv"), key=os.path.getmtime, reverse=True)
+    zip_files = sorted(glob.glob("*.zip"), key=os.path.getmtime, reverse=True)
+    all_files = csv_files + zip_files
+    if not all_files:
+        return "No CSV or ZIP files found.", [], [], []
+    # Gather file info: name, date/time, size
+    file_infos = []
+    for f in all_files:
+        stat = os.stat(f)
+        dt = datetime.datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')
+        size_kb = stat.st_size / 1024
+        file_infos.append({
+            "name": os.path.basename(f),
+            "path": os.path.abspath(f),
+            "datetime": dt,
+            "size_kb": f"{size_kb:.1f} KB"
+        })
+    # HTML table with columns: Name, Date/Time, Size
+    html_links = '<table><thead><tr><th>File</th><th>Date/Time</th><th>Size</th></tr></thead><tbody>'
+    for info in file_infos:
+        html_links += f'<tr><td><a href="/file={info["path"]}" download target="_blank">{info["name"]}</a></td>' \
+                      f'<td>{info["datetime"]}</td><td>{info["size_kb"]}</td></tr>'
+    html_links += '</tbody></table>'
+    # For gradio File, also return a DataFrame-like list for display
+    gradio_table = [[info["name"], info["datetime"], info["size_kb"]] for info in file_infos]
+    return html_links, all_files, [os.path.abspath(f) for f in all_files], gradio_table
+def refresh_csv_files():
+    html_links, csv_files, abs_paths, gradio_table = list_all_csv_files()
+    return html_links, abs_paths, gradio_table
 # Update the Gradio interface to include job status checking
 with gr.Blocks(css=custom_css, js="""
 document.addEventListener('DOMContentLoaded', function() {
             with gr.Row():
                 batch_status_tokens1 = gr.Markdown("")
                 batch_status_tokens2 = gr.Markdown("")
+            # --- CSV Refresh and Download ---
+            refresh_csv_button_batch = gr.Button("Refresh CSV Files")
+            csv_download_html_batch = gr.HTML(label="All CSV Download Links")
+            csv_download_file_batch = gr.File(label="All CSV Files", file_types=[".csv"], interactive=True, file_count="multiple")
         with gr.TabItem("App Management"):
             with gr.Row():
     # Add initialization info display
     init_info = gr.Markdown("")
+    # Add a DataFrame to show CSV file info (name, date/time, size)
+    csv_file_info_df_batch = gr.DataFrame(headers=["File Name", "Date/Time", "Size"], label="CSV File Info", interactive=False)
     # Update load_button click to include embedding model
     load_button.click(
         lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
         every=2
     )
+    # Add CSV refresh functionality
+    refresh_csv_button_batch.click(
+        fn=refresh_csv_files,
+        inputs=[],
+        outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch]
+    )
 def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
     """Create a CSV file from batch query results and return the file path"""
     # Create a temporary directory for CSV files if it doesn't exist
     return formatted_results, csv_path
 if __name__ == "__main__":
     debug_print("Launching Gradio interface.")
     app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])