Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| from typing import List, Dict, Tuple | |
| import re | |
| # Install required packages (for Hugging Face Spaces compatibility) | |
| os.system("pip install transformers==4.35.2 accelerate==0.24.1 sentencepiece==0.1.99 torch==2.1.2 gradio==4.13.0") | |
| import torch | |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| print("PyTorch version:", torch.__version__) | |
| class SwahiliLiteratureRAG: | |
| def __init__(self, knowledge_base_path: str = None, model_name: str = None): | |
| """ | |
| Initialize the RAG system | |
| """ | |
| self.knowledge_base = self.load_knowledge_base(knowledge_base_path) | |
| self.model = self.load_model(model_name) | |
| self.similarity_threshold = 0.15 | |
| self.swahili_stop_words = { | |
| 'ni', 'nini', 'na', 'ya', 'za', 'wa', 'la', 'kwa', 'katika', 'au', 'je', | |
| 'hii', 'ile', 'hilo', 'hiyo', 'hayo', 'hao', 'yule', 'huyu', 'huu', | |
| 'kuhusu', 'juu', 'chini', 'mbele', 'nyuma', 'ndani', 'nje', 'karibu', | |
| 'mbali', 'hapa', 'pale', 'kule', 'sasa', 'jana', 'kesho', 'leo' | |
| } | |
| def load_knowledge_base(self, path: str = None) -> List[Dict]: | |
| """Load your knowledge base from JSON file""" | |
| if path and os.path.exists(path): | |
| try: | |
| with open(path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| except Exception as e: | |
| print(f"Error loading knowledge base: {e}") | |
| # Default sample data | |
| return [ | |
| { | |
| "instruction": "Eleza kuhusu Fasihi Simulizi.", | |
| "input": "", | |
| "output": "Fasihi simulizi ni sanaa inayotumia lugha kuwasilisha ujumbe unaomhusu binadamu..." | |
| }, | |
| { | |
| "instruction": "Tofautisha Fasihi na Sanaa Nyingine.", | |
| "input": "", | |
| "output": "Fasihi hutumia lugha na wahusika kuwasilisha maudhui..." | |
| }, | |
| { | |
| "instruction": "Tofautisha Fasihi Simulizi na Fasihi Andishi.", | |
| "input": "", | |
| "output": "Fasihi simulizi huwasilishwa kwa mdomo... Fasihi andishi huwasilishwa kwa maandishi..." | |
| }, | |
| { | |
| "instruction": "Eleza vipengele vya Fasihi Simulizi.", | |
| "input": "", | |
| "output": "Vipengele vya fasihi simulizi ni pamoja na lugha, mandhari, wahusika, maudhui, na mtindo." | |
| } | |
| ] | |
| def load_model(self, model_name: str = None): | |
| """Load your fine-tuned Gemma model from Hugging Face Hub""" | |
| if not model_name: | |
| print("No model name provided. Using knowledge base only.") | |
| return None | |
| try: | |
| print(f"Loading model from Hugging Face: {model_name}") | |
| # Load base tokenizer with special tokens | |
| base_tokenizer = AutoTokenizer.from_pretrained( | |
| "CraneAILabs/swahili-gemma-1b", | |
| trust_remote_code=True | |
| ) | |
| # Add special tokens | |
| special_tokens = { | |
| "additional_special_tokens": ["<|user|>", "<|assistant|>"], | |
| "bos_token": "<start_of_turn>", | |
| "eos_token": "<end_of_turn>" | |
| } | |
| base_tokenizer.add_special_tokens(special_tokens) | |
| base_tokenizer.pad_token = base_tokenizer.eos_token | |
| # Load model from Hugging Face Hub | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| trust_remote_code=True, | |
| use_cache=True | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=base_tokenizer, | |
| torch_dtype=torch.float32 | |
| ) | |
| print("✅ Model loaded successfully from Hugging Face Hub!") | |
| return pipe | |
| except Exception as e: | |
| print(f"❌ Error loading model: {e}") | |
| print("Continuing with knowledge base only...") | |
| return None | |
| # [KEEP ALL YOUR EXISTING METHODS - normalize_text, extract_key_terms, preprocess_query, calculate_similarity, retrieve_documents, generate_with_model, answer_query] | |
| class GeneralChatbot: | |
| def __init__(self, model_name: str = None): | |
| """ | |
| Initialize general chatbot that uses model without RAG | |
| """ | |
| self.model = self.load_model(model_name) | |
| def load_model(self, model_name: str = None): | |
| """Load the model for general conversation from Hugging Face Hub""" | |
| if not model_name: | |
| print("No model name provided for general chat. Using basic responses.") | |
| return None | |
| try: | |
| print(f"Loading general chat model from Hugging Face: {model_name}") | |
| # Load base tokenizer | |
| base_tokenizer = AutoTokenizer.from_pretrained( | |
| "CraneAILabs/swahili-gemma-1b", | |
| trust_remote_code=True | |
| ) | |
| # Add special tokens | |
| special_tokens = { | |
| "additional_special_tokens": ["<|user|>", "<|assistant|>"], | |
| "bos_token": "<start_of_turn>", | |
| "eos_token": "<end_of_turn>" | |
| } | |
| base_tokenizer.add_special_tokens(special_tokens) | |
| base_tokenizer.pad_token = base_tokenizer.eos_token | |
| # Load model from Hugging Face Hub | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| trust_remote_code=True, | |
| use_cache=True | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=base_tokenizer, | |
| torch_dtype=torch.float32 | |
| ) | |
| pipe.tokenizer = base_tokenizer | |
| print("✅ General chat model loaded successfully!") | |
| return pipe | |
| except Exception as e: | |
| print(f"❌ Error loading general chat model: {e}") | |
| return None | |
| def generate_response(self, query: str) -> str: | |
| """Generate response using model's general knowledge only""" | |
| if not query.strip(): | |
| return "Hello! How can I help you today?" | |
| if self.model is None: | |
| # Basic fallback responses | |
| fallback_responses = { | |
| "hello": "Hello! How are you doing today?", | |
| "hi": "Hi there! What would you like to talk about?", | |
| "how are you": "I'm doing well, thank you for asking! How about you?", | |
| "what is your name": "I'm a general conversation assistant. What's your name?", | |
| "bye": "Goodbye! Have a great day!", | |
| "thank you": "You're welcome! Is there anything else I can help you with?", | |
| } | |
| query_lower = query.lower().strip() | |
| for key, response in fallback_responses.items(): | |
| if key in query_lower: | |
| return response | |
| return "I understand you want to chat, but I don't have access to my full capabilities right now. What would you like to talk about?" | |
| try: | |
| # Use the chat format that the model was trained with | |
| formatted_prompt = f"<start_of_turn>user\n{query}<end_of_turn>\n<start_of_turn>model\n" | |
| response = self.model( | |
| formatted_prompt, | |
| max_new_tokens=150, | |
| temperature=0.7, | |
| do_sample=True, | |
| repetition_penalty=1.1, | |
| pad_token_id=self.model.tokenizer.eos_token_id, | |
| eos_token_id=self.model.tokenizer.eos_token_id, | |
| use_cache=True | |
| ) | |
| generated_text = response[0]['generated_text'] | |
| # Extract assistant response | |
| if "<start_of_turn>model\n" in generated_text: | |
| answer = generated_text.split("<start_of_turn>model\n")[-1] | |
| answer = answer.replace("<end_of_turn>", "").strip() | |
| else: | |
| answer = generated_text.replace(formatted_prompt, "").strip() | |
| return answer if answer else "I'm not sure how to respond to that. Could you try rephrasing?" | |
| except Exception as e: | |
| print(f"Error generating general response: {e}") | |
| return "I'm having trouble generating a response right now. What else would you like to talk about?" | |
| # Initialize both systems | |
| def initialize_systems(): | |
| """Initialize both RAG and general chat systems""" | |
| # Use your Hugging Face model | |
| model_name = "BrianGithaiga/swahili-gemma-finetuned" | |
| # For knowledge base | |
| knowledge_base_path = "combined.json" # Upload this file to your Space if needed | |
| print("🚀 Initializing systems with model:", model_name) | |
| # Initialize both systems | |
| rag_system = SwahiliLiteratureRAG(knowledge_base_path, model_name) | |
| general_chat = GeneralChatbot(model_name) | |
| return rag_system, general_chat | |
| # Create both system instances | |
| print("Initializing both systems...") | |
| rag_system, general_chat = initialize_systems() | |
| print("Both systems initialized successfully!") | |
| # [KEEP YOUR EXISTING create_app() FUNCTION EXACTLY AS IS] | |
| # For Hugging Face Spaces deployment | |
| if __name__ == "__main__": | |
| # Set environment variables for better performance | |
| os.environ['TORCH_LOGS'] = '' | |
| os.environ['TORCHDYNAMO_VERBOSE'] = '0' | |
| os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
| print("Creating Gradio app...") | |
| app = create_app() | |
| # For Hugging Face Spaces | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |