Spaces:
Runtime error
Runtime error
| """ | |
| Check RAG System Status - Verify all vector stores | |
| Checks all 6 specialized ChromaDB databases | |
| """ | |
| from pathlib import Path | |
| import sys | |
| # Add parent directory to path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| # Vector store definitions | |
| VECTOR_STORES = { | |
| 'medical_diseases': { | |
| 'name': 'ViMedical Diseases', | |
| 'path': 'rag/vector_store/medical_diseases', | |
| 'expected_size': 50, # MB | |
| 'test_query': 'đau đầu triệu chứng' | |
| }, | |
| 'mental_health': { | |
| 'name': 'Mental Health', | |
| 'path': 'rag/vector_store/mental_health', | |
| 'expected_size': 80, | |
| 'test_query': 'stress anxiety depression' | |
| }, | |
| 'nutrition': { | |
| 'name': 'Nutrition Plans', | |
| 'path': 'rag/vector_store/nutrition', | |
| 'expected_size': 20, | |
| 'test_query': 'diet meal plan calories' | |
| }, | |
| 'vietnamese_nutrition': { | |
| 'name': 'Vietnamese Food', | |
| 'path': 'rag/vector_store/vietnamese_nutrition', | |
| 'expected_size': 5, | |
| 'test_query': 'phở cơm nutrition' | |
| }, | |
| 'fitness': { | |
| 'name': 'Fitness Exercises', | |
| 'path': 'rag/vector_store/fitness', | |
| 'expected_size': 10, | |
| 'test_query': 'gym workout exercise' | |
| }, | |
| 'symptom_qa': { | |
| 'name': 'Medical Q&A', | |
| 'path': 'rag/vector_store/symptom_qa', | |
| 'expected_size': 8, | |
| 'test_query': 'triệu chứng bệnh' | |
| }, | |
| 'general_health_qa': { | |
| 'name': 'General Health Q&A', | |
| 'path': 'rag/vector_store/general_health_qa', | |
| 'expected_size': 7, | |
| 'test_query': 'sức khỏe tổng quát' | |
| } | |
| } | |
| def check_vector_store(store_info): | |
| """Check individual vector store""" | |
| print(f"\n📦 {store_info['name']}") | |
| print("-" * 50) | |
| store_path = Path(store_info['path']) | |
| # Check existence | |
| if not store_path.exists(): | |
| print(f"❌ Not found: {store_info['path']}") | |
| print(f" Reason: Directory does not exist") | |
| return {'status': False, 'reason': 'Directory not found'} | |
| print(f"✅ Exists: {store_info['path']}") | |
| # Check size | |
| total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file()) | |
| size_mb = total_size / (1024 * 1024) | |
| expected = store_info['expected_size'] | |
| print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)") | |
| if size_mb < 0.1: | |
| print("⚠️ Database seems empty") | |
| print(" Reason: Database size < 0.1 MB (likely not built)") | |
| return {'status': False, 'reason': 'Database empty or not built'} | |
| # Try to load and query | |
| try: | |
| import chromadb | |
| client = chromadb.PersistentClient(path=str(store_path)) | |
| collections = client.list_collections() | |
| if not collections: | |
| print("⚠️ No collections found") | |
| print(" Reason: ChromaDB has no collections") | |
| return {'status': False, 'reason': 'No collections in database'} | |
| collection = collections[0] | |
| count = collection.count() | |
| print(f"📚 Documents: {count:,} chunks") | |
| if count == 0: | |
| print("⚠️ Collection is empty") | |
| print(" Reason: Collection exists but has 0 documents") | |
| return {'status': False, 'reason': 'Collection is empty (0 documents)'} | |
| # Test query | |
| try: | |
| results = collection.query( | |
| query_texts=[store_info['test_query']], | |
| n_results=1 | |
| ) | |
| if results and results['documents'] and results['documents'][0]: | |
| print("✅ Query test passed") | |
| return {'status': True, 'reason': None} | |
| else: | |
| print("⚠️ Query returned no results") | |
| print(" Reason: Query executed but found no matching documents") | |
| return {'status': False, 'reason': 'Query returned no results'} | |
| except Exception as e: | |
| print(f"⚠️ Query test failed: {e}") | |
| print(f" Reason: {str(e)}") | |
| return {'status': False, 'reason': f'Query failed: {str(e)}'} | |
| except ImportError: | |
| print("⚠️ ChromaDB not installed") | |
| print(" Reason: pip install chromadb") | |
| return {'status': False, 'reason': 'ChromaDB package not installed'} | |
| except Exception as e: | |
| print(f"⚠️ Error: {e}") | |
| print(f" Reason: {str(e)}") | |
| return {'status': False, 'reason': f'Error loading database: {str(e)}'} | |
| def check_rag_status(): | |
| """Check all RAG vector stores""" | |
| print("="*60) | |
| print("🔍 RAG System Status Check") | |
| print("="*60) | |
| # Check base directory | |
| base_path = Path('rag/vector_store') | |
| if not base_path.exists(): | |
| print("\n❌ Vector store directory not found!") | |
| print(f" Expected: {base_path}") | |
| print("\n💡 Solution:") | |
| print(" bash scripts/setup_rag.sh") | |
| return False | |
| print(f"\n✅ Base directory exists: {base_path}") | |
| # Check each vector store | |
| results = {} | |
| for store_id, store_info in VECTOR_STORES.items(): | |
| results[store_id] = check_vector_store(store_info) | |
| # Summary | |
| print("\n" + "="*60) | |
| print("📊 Summary") | |
| print("="*60) | |
| total = len(results) | |
| passed = sum(1 for v in results.values() if v['status']) | |
| for store_id, result in results.items(): | |
| status = "✅" if result['status'] else "❌" | |
| name = VECTOR_STORES[store_id]['name'] | |
| print(f"{status} {name}") | |
| if not result['status'] and result['reason']: | |
| print(f" └─ {result['reason']}") | |
| print("\n" + "="*60) | |
| print(f"Result: {passed}/{total} databases OK") | |
| if passed == total: | |
| print("\n🎉 All vector stores are ready!") | |
| print("\nNext steps:") | |
| print(" python app.py") | |
| print(" Open http://localhost:7860") | |
| print("="*60) | |
| return True | |
| else: | |
| print("\n⚠️ Some databases are missing or have issues") | |
| print("\n💡 Solutions:") | |
| print("\n1️⃣ Quick fix (rebuild all):") | |
| print(" bash scripts/setup_rag.sh") | |
| print("\n2️⃣ Rebuild specific databases:") | |
| # Map store_id to script | |
| script_map = { | |
| 'medical_diseases': 'python data_mining/mining_vimedical.py', | |
| 'mental_health': 'python data_mining/mining_mentalchat.py', | |
| 'nutrition': 'python data_mining/mining_nutrition.py', | |
| 'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py', | |
| 'fitness': 'python data_mining/mining_fitness.py', | |
| 'symptom_qa': 'python data_mining/mining_medical_qa.py', | |
| 'general_health_qa': 'python data_mining/mining_medical_qa.py' | |
| } | |
| for store_id, result in results.items(): | |
| if not result['status']: | |
| name = VECTOR_STORES[store_id]['name'] | |
| script = script_map.get(store_id, 'Unknown') | |
| print(f"\n ❌ {name}:") | |
| print(f" Reason: {result['reason']}") | |
| print(f" Fix: {script}") | |
| print("\n" + "="*60) | |
| return False | |
| if __name__ == '__main__': | |
| success = check_rag_status() | |
| exit(0 if success else 1) | |