Upload folder using huggingface_hub
Browse files- MFQ-5-_Qwen3-32B_Nebius_20250719_110710.csv +31 -0
- advanced_rag.py +50 -0
MFQ-5-_Qwen3-32B_Nebius_20250719_110710.csv
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Run,Output,Input Tokens,Output Tokens,Tokens,Error Retries,Time (s)
|
| 2 |
+
1,"<think>
|
| 3 |
+
|
| 4 |
+
</think>
|
| 5 |
+
|
| 6 |
+
Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
|
| 7 |
+
3,5,4,2,2,5,4,3,2,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4,2,5,5,3,2,2,2,5,4,4",Input tokens: 880,Output tokens: 212,212,0,12.842
|
| 8 |
+
2,"<think>
|
| 9 |
+
|
| 10 |
+
</think>
|
| 11 |
+
|
| 12 |
+
Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
|
| 13 |
+
3,5,4,3,3,5,4,4,3,3,5,5,4,3,3,2,4,3,3,3,5,4,3,3,5,3,3,3,3,3,3,3,3,3,3,3,3,3,3",Input tokens: 880,Output tokens: 212,212,0,7.747
|
| 14 |
+
3,"<think>
|
| 15 |
+
|
| 16 |
+
</think>
|
| 17 |
+
|
| 18 |
+
Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
|
| 19 |
+
4, 3, 2, 1, 2, 4, 3, 1, 2, 1, 4, 3, 2, 1, 2, 3, 4, 3, 2, 1, 3, 2, 3, 1, 2, 3, 2, 1, 2, 3, 4, 3, 2, 1, 2, 3, 4, 3, 2",Input tokens: 880,Output tokens: 212,212,0,8.148
|
| 20 |
+
4,"<think>
|
| 21 |
+
|
| 22 |
+
</think>
|
| 23 |
+
|
| 24 |
+
Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
|
| 25 |
+
4,4,3,2,2,5,4,2,2,2,5,4,2,3,1,2,4,3,2,1,5,4,3,2,1,2,3,2,2,1,5,2,3,4,2,3,3,4,2",Input tokens: 880,Output tokens: 212,212,0,9.508
|
| 26 |
+
5,"<think>
|
| 27 |
+
|
| 28 |
+
</think>
|
| 29 |
+
|
| 30 |
+
Part 1/Part 2, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39
|
| 31 |
+
3,5,4,2,1,5,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2,1,5,4,3,2,1,0,4,3,2",Input tokens: 880,Output tokens: 212,212,0,7.261
|
advanced_rag.py
CHANGED
|
@@ -570,6 +570,7 @@ def cleanup_old_jobs():
|
|
| 570 |
debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
|
| 571 |
return f"Cleaned up {len(to_delete)} old jobs", "", ""
|
| 572 |
|
|
|
|
| 573 |
# Improve the truncate_prompt function to be more aggressive with limiting context
|
| 574 |
def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
|
| 575 |
"""Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
|
|
@@ -1444,6 +1445,39 @@ def periodic_update(is_checked):
|
|
| 1444 |
def get_interval(is_checked):
|
| 1445 |
return 2 if is_checked else None
|
| 1446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1447 |
# Update the Gradio interface to include job status checking
|
| 1448 |
with gr.Blocks(css=custom_css, js="""
|
| 1449 |
document.addEventListener('DOMContentLoaded', function() {
|
|
@@ -1905,6 +1939,11 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
|
|
| 1905 |
with gr.Row():
|
| 1906 |
batch_status_tokens1 = gr.Markdown("")
|
| 1907 |
batch_status_tokens2 = gr.Markdown("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1908 |
|
| 1909 |
with gr.TabItem("App Management"):
|
| 1910 |
with gr.Row():
|
|
@@ -1929,6 +1968,9 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
|
|
| 1929 |
# Add initialization info display
|
| 1930 |
init_info = gr.Markdown("")
|
| 1931 |
|
|
|
|
|
|
|
|
|
|
| 1932 |
# Update load_button click to include embedding model
|
| 1933 |
load_button.click(
|
| 1934 |
lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
|
|
@@ -2123,6 +2165,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
|
|
| 2123 |
every=2
|
| 2124 |
)
|
| 2125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2126 |
def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
|
| 2127 |
"""Create a CSV file from batch query results and return the file path"""
|
| 2128 |
# Create a temporary directory for CSV files if it doesn't exist
|
|
@@ -2204,6 +2253,7 @@ def format_batch_result_files(results: List[Dict], job_id: str) -> Tuple[str, st
|
|
| 2204 |
|
| 2205 |
return formatted_results, csv_path
|
| 2206 |
|
|
|
|
| 2207 |
if __name__ == "__main__":
|
| 2208 |
debug_print("Launching Gradio interface.")
|
| 2209 |
app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])
|
|
|
|
| 570 |
debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.")
|
| 571 |
return f"Cleaned up {len(to_delete)} old jobs", "", ""
|
| 572 |
|
| 573 |
+
|
| 574 |
# Improve the truncate_prompt function to be more aggressive with limiting context
|
| 575 |
def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str:
|
| 576 |
"""Truncate prompt to fit within token limit, preserving the most recent/relevant parts."""
|
|
|
|
| 1445 |
def get_interval(is_checked):
|
| 1446 |
return 2 if is_checked else None
|
| 1447 |
|
| 1448 |
+
# CSV file management functions (copied exactly from psyllm.py)
|
| 1449 |
+
def list_all_csv_files():
|
| 1450 |
+
csv_files = sorted(glob.glob("*.csv"), key=os.path.getmtime, reverse=True)
|
| 1451 |
+
zip_files = sorted(glob.glob("*.zip"), key=os.path.getmtime, reverse=True)
|
| 1452 |
+
all_files = csv_files + zip_files
|
| 1453 |
+
if not all_files:
|
| 1454 |
+
return "No CSV or ZIP files found.", [], [], []
|
| 1455 |
+
# Gather file info: name, date/time, size
|
| 1456 |
+
file_infos = []
|
| 1457 |
+
for f in all_files:
|
| 1458 |
+
stat = os.stat(f)
|
| 1459 |
+
dt = datetime.datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')
|
| 1460 |
+
size_kb = stat.st_size / 1024
|
| 1461 |
+
file_infos.append({
|
| 1462 |
+
"name": os.path.basename(f),
|
| 1463 |
+
"path": os.path.abspath(f),
|
| 1464 |
+
"datetime": dt,
|
| 1465 |
+
"size_kb": f"{size_kb:.1f} KB"
|
| 1466 |
+
})
|
| 1467 |
+
# HTML table with columns: Name, Date/Time, Size
|
| 1468 |
+
html_links = '<table><thead><tr><th>File</th><th>Date/Time</th><th>Size</th></tr></thead><tbody>'
|
| 1469 |
+
for info in file_infos:
|
| 1470 |
+
html_links += f'<tr><td><a href="/file={info["path"]}" download target="_blank">{info["name"]}</a></td>' \
|
| 1471 |
+
f'<td>{info["datetime"]}</td><td>{info["size_kb"]}</td></tr>'
|
| 1472 |
+
html_links += '</tbody></table>'
|
| 1473 |
+
# For gradio File, also return a DataFrame-like list for display
|
| 1474 |
+
gradio_table = [[info["name"], info["datetime"], info["size_kb"]] for info in file_infos]
|
| 1475 |
+
return html_links, all_files, [os.path.abspath(f) for f in all_files], gradio_table
|
| 1476 |
+
|
| 1477 |
+
def refresh_csv_files():
|
| 1478 |
+
html_links, csv_files, abs_paths, gradio_table = list_all_csv_files()
|
| 1479 |
+
return html_links, abs_paths, gradio_table
|
| 1480 |
+
|
| 1481 |
# Update the Gradio interface to include job status checking
|
| 1482 |
with gr.Blocks(css=custom_css, js="""
|
| 1483 |
document.addEventListener('DOMContentLoaded', function() {
|
|
|
|
| 1939 |
with gr.Row():
|
| 1940 |
batch_status_tokens1 = gr.Markdown("")
|
| 1941 |
batch_status_tokens2 = gr.Markdown("")
|
| 1942 |
+
|
| 1943 |
+
# --- CSV Refresh and Download ---
|
| 1944 |
+
refresh_csv_button_batch = gr.Button("Refresh CSV Files")
|
| 1945 |
+
csv_download_html_batch = gr.HTML(label="All CSV Download Links")
|
| 1946 |
+
csv_download_file_batch = gr.File(label="All CSV Files", file_types=[".csv"], interactive=True, file_count="multiple")
|
| 1947 |
|
| 1948 |
with gr.TabItem("App Management"):
|
| 1949 |
with gr.Row():
|
|
|
|
| 1968 |
# Add initialization info display
|
| 1969 |
init_info = gr.Markdown("")
|
| 1970 |
|
| 1971 |
+
# Add a DataFrame to show CSV file info (name, date/time, size)
|
| 1972 |
+
csv_file_info_df_batch = gr.DataFrame(headers=["File Name", "Date/Time", "Size"], label="CSV File Info", interactive=False)
|
| 1973 |
+
|
| 1974 |
# Update load_button click to include embedding model
|
| 1975 |
load_button.click(
|
| 1976 |
lambda file_links, bm25_weight, embedding_model: load_pdfs_async(file_links, default_prompt, bm25_weight, embedding_model),
|
|
|
|
| 2165 |
every=2
|
| 2166 |
)
|
| 2167 |
|
| 2168 |
+
# Add CSV refresh functionality
|
| 2169 |
+
refresh_csv_button_batch.click(
|
| 2170 |
+
fn=refresh_csv_files,
|
| 2171 |
+
inputs=[],
|
| 2172 |
+
outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch]
|
| 2173 |
+
)
|
| 2174 |
+
|
| 2175 |
def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
|
| 2176 |
"""Create a CSV file from batch query results and return the file path"""
|
| 2177 |
# Create a temporary directory for CSV files if it doesn't exist
|
|
|
|
| 2253 |
|
| 2254 |
return formatted_results, csv_path
|
| 2255 |
|
| 2256 |
+
|
| 2257 |
if __name__ == "__main__":
|
| 2258 |
debug_print("Launching Gradio interface.")
|
| 2259 |
app.queue().launch(share=False, allowed_paths=[os.path.join(tempfile.gettempdir(), "rag_batch_results")])
|