import os import gradio as gr import json import numpy as np import pandas as pd from typing import List, Dict, Tuple import re # Install required packages (for Hugging Face Spaces compatibility) os.system("pip install transformers==4.35.2 accelerate==0.24.1 sentencepiece==0.1.99 torch==2.1.2 gradio==4.13.0") import torch from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer print("PyTorch version:", torch.__version__) class SwahiliLiteratureRAG: def __init__(self, knowledge_base_path: str = None, model_name: str = None): """ Initialize the RAG system """ self.knowledge_base = self.load_knowledge_base(knowledge_base_path) self.model = self.load_model(model_name) self.similarity_threshold = 0.15 self.swahili_stop_words = { 'ni', 'nini', 'na', 'ya', 'za', 'wa', 'la', 'kwa', 'katika', 'au', 'je', 'hii', 'ile', 'hilo', 'hiyo', 'hayo', 'hao', 'yule', 'huyu', 'huu', 'kuhusu', 'juu', 'chini', 'mbele', 'nyuma', 'ndani', 'nje', 'karibu', 'mbali', 'hapa', 'pale', 'kule', 'sasa', 'jana', 'kesho', 'leo' } def load_knowledge_base(self, path: str = None) -> List[Dict]: """Load your knowledge base from JSON file""" if path and os.path.exists(path): try: with open(path, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: print(f"Error loading knowledge base: {e}") # Default sample data return [ { "instruction": "Eleza kuhusu Fasihi Simulizi.", "input": "", "output": "Fasihi simulizi ni sanaa inayotumia lugha kuwasilisha ujumbe unaomhusu binadamu..." }, { "instruction": "Tofautisha Fasihi na Sanaa Nyingine.", "input": "", "output": "Fasihi hutumia lugha na wahusika kuwasilisha maudhui..." }, { "instruction": "Tofautisha Fasihi Simulizi na Fasihi Andishi.", "input": "", "output": "Fasihi simulizi huwasilishwa kwa mdomo... Fasihi andishi huwasilishwa kwa maandishi..." }, { "instruction": "Eleza vipengele vya Fasihi Simulizi.", "input": "", "output": "Vipengele vya fasihi simulizi ni pamoja na lugha, mandhari, wahusika, maudhui, na mtindo." } ] def load_model(self, model_name: str = None): """Load your fine-tuned Gemma model from Hugging Face Hub""" if not model_name: print("No model name provided. Using knowledge base only.") return None try: print(f"Loading model from Hugging Face: {model_name}") # Load base tokenizer with special tokens base_tokenizer = AutoTokenizer.from_pretrained( "CraneAILabs/swahili-gemma-1b", trust_remote_code=True ) # Add special tokens special_tokens = { "additional_special_tokens": ["<|user|>", "<|assistant|>"], "bos_token": "", "eos_token": "" } base_tokenizer.add_special_tokens(special_tokens) base_tokenizer.pad_token = base_tokenizer.eos_token # Load model from Hugging Face Hub model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True, use_cache=True ) pipe = pipeline( "text-generation", model=model, tokenizer=base_tokenizer, torch_dtype=torch.float32 ) print("✅ Model loaded successfully from Hugging Face Hub!") return pipe except Exception as e: print(f"❌ Error loading model: {e}") print("Continuing with knowledge base only...") return None # [KEEP ALL YOUR EXISTING METHODS - normalize_text, extract_key_terms, preprocess_query, calculate_similarity, retrieve_documents, generate_with_model, answer_query] class GeneralChatbot: def __init__(self, model_name: str = None): """ Initialize general chatbot that uses model without RAG """ self.model = self.load_model(model_name) def load_model(self, model_name: str = None): """Load the model for general conversation from Hugging Face Hub""" if not model_name: print("No model name provided for general chat. Using basic responses.") return None try: print(f"Loading general chat model from Hugging Face: {model_name}") # Load base tokenizer base_tokenizer = AutoTokenizer.from_pretrained( "CraneAILabs/swahili-gemma-1b", trust_remote_code=True ) # Add special tokens special_tokens = { "additional_special_tokens": ["<|user|>", "<|assistant|>"], "bos_token": "", "eos_token": "" } base_tokenizer.add_special_tokens(special_tokens) base_tokenizer.pad_token = base_tokenizer.eos_token # Load model from Hugging Face Hub model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True, use_cache=True ) pipe = pipeline( "text-generation", model=model, tokenizer=base_tokenizer, torch_dtype=torch.float32 ) pipe.tokenizer = base_tokenizer print("✅ General chat model loaded successfully!") return pipe except Exception as e: print(f"❌ Error loading general chat model: {e}") return None def generate_response(self, query: str) -> str: """Generate response using model's general knowledge only""" if not query.strip(): return "Hello! How can I help you today?" if self.model is None: # Basic fallback responses fallback_responses = { "hello": "Hello! How are you doing today?", "hi": "Hi there! What would you like to talk about?", "how are you": "I'm doing well, thank you for asking! How about you?", "what is your name": "I'm a general conversation assistant. What's your name?", "bye": "Goodbye! Have a great day!", "thank you": "You're welcome! Is there anything else I can help you with?", } query_lower = query.lower().strip() for key, response in fallback_responses.items(): if key in query_lower: return response return "I understand you want to chat, but I don't have access to my full capabilities right now. What would you like to talk about?" try: # Use the chat format that the model was trained with formatted_prompt = f"user\n{query}\nmodel\n" response = self.model( formatted_prompt, max_new_tokens=150, temperature=0.7, do_sample=True, repetition_penalty=1.1, pad_token_id=self.model.tokenizer.eos_token_id, eos_token_id=self.model.tokenizer.eos_token_id, use_cache=True ) generated_text = response[0]['generated_text'] # Extract assistant response if "model\n" in generated_text: answer = generated_text.split("model\n")[-1] answer = answer.replace("", "").strip() else: answer = generated_text.replace(formatted_prompt, "").strip() return answer if answer else "I'm not sure how to respond to that. Could you try rephrasing?" except Exception as e: print(f"Error generating general response: {e}") return "I'm having trouble generating a response right now. What else would you like to talk about?" # Initialize both systems def initialize_systems(): """Initialize both RAG and general chat systems""" # Use your Hugging Face model model_name = "BrianGithaiga/swahili-gemma-finetuned" # For knowledge base knowledge_base_path = "combined.json" # Upload this file to your Space if needed print("🚀 Initializing systems with model:", model_name) # Initialize both systems rag_system = SwahiliLiteratureRAG(knowledge_base_path, model_name) general_chat = GeneralChatbot(model_name) return rag_system, general_chat # Create both system instances print("Initializing both systems...") rag_system, general_chat = initialize_systems() print("Both systems initialized successfully!") # [KEEP YOUR EXISTING create_app() FUNCTION EXACTLY AS IS] # For Hugging Face Spaces deployment if __name__ == "__main__": # Set environment variables for better performance os.environ['TORCH_LOGS'] = '' os.environ['TORCHDYNAMO_VERBOSE'] = '0' os.environ['TOKENIZERS_PARALLELISM'] = 'false' print("Creating Gradio app...") app = create_app() # For Hugging Face Spaces app.launch( server_name="0.0.0.0", server_port=7860, share=False )