Spaces:
Running
on
Zero
Running
on
Zero
Bellok
commited on
Commit
·
1635a41
1
Parent(s):
81c2b00
feat(rag): enable additional datasets and add hybrid search fallback
Browse files- Uncommented and enabled 'arxiv', 'edustories', 'manuals', 'enterprise', and 'prompt-report' dataset packs for broader knowledge coverage
- Added hybrid fallback mechanism in query_warbler() to perform pure semantic search if fractalstat hybrid yields no results, improving retrieval reliability
- Updated document count in Gradio UI from '2.6M+' to '100k+' for accuracy
- Removed auto-sharing from demo launch for controlled access
This expands the RAG system's data sources, enhances query robustness with fallback logic, corrects misinformation, and secures the demo deployment.
app.py
CHANGED
|
@@ -82,14 +82,12 @@ if len(documents) == 0:
|
|
| 82 |
|
| 83 |
# Enable all available HF dataset packs for maximum knowledge diversity
|
| 84 |
datasets_to_download = [
|
| 85 |
-
|
| 86 |
-
|
| 87 |
"novels", # Fiction literature
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
"
|
| 91 |
-
#"portuguese-edu", # Portuguese educational content
|
| 92 |
-
#"prompt-report" # AI prompt engineering reports
|
| 93 |
]
|
| 94 |
|
| 95 |
total_docs = 0
|
|
@@ -225,6 +223,27 @@ def query_warbler(query_text: str, max_results: int = 5, use_hybrid: bool = True
|
|
| 225 |
else:
|
| 226 |
print(f"DEBUG: No results above threshold: {query.confidence_threshold}")
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
# Format results
|
| 229 |
output = f"## Query Results\n\n"
|
| 230 |
output += f"**Query:** {query_text}\n\n"
|
|
@@ -267,7 +286,7 @@ with gr.Blocks(title="Warbler CDA - FractalStat RAG") as demo:
|
|
| 267 |
Semantic retrieval with 8D FractalStat multi-dimensional addressing.
|
| 268 |
|
| 269 |
**Features:**
|
| 270 |
-
-
|
| 271 |
- Hybrid semantic + FractalStat scoring
|
| 272 |
- Bob the Skeptic bias detection
|
| 273 |
- Narrative coherence analysis
|
|
@@ -344,4 +363,4 @@ with gr.Blocks(title="Warbler CDA - FractalStat RAG") as demo:
|
|
| 344 |
""")
|
| 345 |
|
| 346 |
if __name__ == "__main__":
|
| 347 |
-
demo.launch(
|
|
|
|
| 82 |
|
| 83 |
# Enable all available HF dataset packs for maximum knowledge diversity
|
| 84 |
datasets_to_download = [
|
| 85 |
+
"arxiv", # Physics and mathematics papers
|
| 86 |
+
"edustories", # Educational narratives and stories
|
| 87 |
"novels", # Fiction literature
|
| 88 |
+
"manuals", # Technical documentation
|
| 89 |
+
"enterprise", # Business and corporate content
|
| 90 |
+
"prompt-report" # AI prompt engineering reports
|
|
|
|
|
|
|
| 91 |
]
|
| 92 |
|
| 93 |
total_docs = 0
|
|
|
|
| 223 |
else:
|
| 224 |
print(f"DEBUG: No results above threshold: {query.confidence_threshold}")
|
| 225 |
|
| 226 |
+
# Hybrid Fallback: If hybrid mode and no results, fall back to pure semantic search
|
| 227 |
+
if use_hybrid and len(assembly.results) == 0:
|
| 228 |
+
print(f"DEBUG: Hybrid returned 0 results, falling back to pure semantic search")
|
| 229 |
+
# Reset timer for fallback query
|
| 230 |
+
fallback_start = time.time()
|
| 231 |
+
query.confidence_threshold = 0.2 # Lower threshold for semantic fallback
|
| 232 |
+
query.fractalstat_hybrid = False # Disable hybrid for this query
|
| 233 |
+
|
| 234 |
+
# Re-execute query with semantic-only mode
|
| 235 |
+
assembly = api.retrieve_context(query)
|
| 236 |
+
fallback_ms = (time.time() - fallback_start) * 1000
|
| 237 |
+
elapsed_ms = (time.time() - start_time) * 1000 # Update total time
|
| 238 |
+
|
| 239 |
+
print(f"DEBUG: Semantic fallback completed in {fallback_ms:.0f}ms, found {len(assembly.results)} results")
|
| 240 |
+
if assembly.results:
|
| 241 |
+
print(f"DEBUG: Top 3 relevance scores from semantic: {[r.relevance_score for r in assembly.results[:3]]}")
|
| 242 |
+
print(f"DEBUG: Hybrid fallback successful - results returned via semantic search")
|
| 243 |
+
|
| 244 |
+
hybrid_fallback_used = use_hybrid and len(assembly.results) > 0 and not query.fractalstat_hybrid
|
| 245 |
+
query_mode_display = f"{query_mode} (+ Semantic Fallback)" if hybrid_fallback_used else query_mode
|
| 246 |
+
|
| 247 |
# Format results
|
| 248 |
output = f"## Query Results\n\n"
|
| 249 |
output += f"**Query:** {query_text}\n\n"
|
|
|
|
| 286 |
Semantic retrieval with 8D FractalStat multi-dimensional addressing.
|
| 287 |
|
| 288 |
**Features:**
|
| 289 |
+
- 100k+ documents from arXiv, education, fiction, and more
|
| 290 |
- Hybrid semantic + FractalStat scoring
|
| 291 |
- Bob the Skeptic bias detection
|
| 292 |
- Narrative coherence analysis
|
|
|
|
| 363 |
""")
|
| 364 |
|
| 365 |
if __name__ == "__main__":
|
| 366 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|