import os
import gradio as gr
import json
import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
import re

# Install required packages (for Hugging Face Spaces compatibility)
os.system("pip install transformers==4.35.2 accelerate==0.24.1 sentencepiece==0.1.99 torch==2.1.2 gradio==4.13.0")

import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

print("PyTorch version:", torch.__version__)

class SwahiliLiteratureRAG:
    def __init__(self, knowledge_base_path: str = None, model_name: str = None):
        """
        Initialize the RAG system
        """
        self.knowledge_base = self.load_knowledge_base(knowledge_base_path)
        self.model = self.load_model(model_name)
        self.similarity_threshold = 0.15
        self.swahili_stop_words = {
            'ni', 'nini', 'na', 'ya', 'za', 'wa', 'la', 'kwa', 'katika', 'au', 'je', 
            'hii', 'ile', 'hilo', 'hiyo', 'hayo', 'hao', 'yule', 'huyu', 'huu', 
            'kuhusu', 'juu', 'chini', 'mbele', 'nyuma', 'ndani', 'nje', 'karibu', 
            'mbali', 'hapa', 'pale', 'kule', 'sasa', 'jana', 'kesho', 'leo'
        }
    
    def load_knowledge_base(self, path: str = None) -> List[Dict]:
        """Load your knowledge base from JSON file"""
        if path and os.path.exists(path):
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except Exception as e:
                print(f"Error loading knowledge base: {e}")
        
        # Default sample data
        return [
            {
                "instruction": "Eleza kuhusu Fasihi Simulizi.",
                "input": "",
                "output": "Fasihi simulizi ni sanaa inayotumia lugha kuwasilisha ujumbe unaomhusu binadamu..."
            },
            {
                "instruction": "Tofautisha Fasihi na Sanaa Nyingine.",
                "input": "",
                "output": "Fasihi hutumia lugha na wahusika kuwasilisha maudhui..."
            },
            {
                "instruction": "Tofautisha Fasihi Simulizi na Fasihi Andishi.",
                "input": "",
                "output": "Fasihi simulizi huwasilishwa kwa mdomo... Fasihi andishi huwasilishwa kwa maandishi..."
            },
            {
                "instruction": "Eleza vipengele vya Fasihi Simulizi.",
                "input": "",
                "output": "Vipengele vya fasihi simulizi ni pamoja na lugha, mandhari, wahusika, maudhui, na mtindo."
            }
        ]
    
    def load_model(self, model_name: str = None):
        """Load your fine-tuned Gemma model from Hugging Face Hub"""
        if not model_name:
            print("No model name provided. Using knowledge base only.")
            return None
        
        try:
            print(f"Loading model from Hugging Face: {model_name}")
            
            # Load base tokenizer with special tokens
            base_tokenizer = AutoTokenizer.from_pretrained(
                "CraneAILabs/swahili-gemma-1b",
                trust_remote_code=True
            )
            
            # Add special tokens
            special_tokens = {
                "additional_special_tokens": ["<|user|>", "<|assistant|>"],
                "bos_token": "<start_of_turn>",
                "eos_token": "<end_of_turn>"
            }
            base_tokenizer.add_special_tokens(special_tokens)
            base_tokenizer.pad_token = base_tokenizer.eos_token
            
            # Load model from Hugging Face Hub
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float32,
                device_map="cpu",
                trust_remote_code=True,
                use_cache=True
            )
            
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=base_tokenizer,
                torch_dtype=torch.float32
            )
            
            print("✅ Model loaded successfully from Hugging Face Hub!")
            return pipe
            
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            print("Continuing with knowledge base only...")
            return None

    # [KEEP ALL YOUR EXISTING METHODS - normalize_text, extract_key_terms, preprocess_query, calculate_similarity, retrieve_documents, generate_with_model, answer_query]

class GeneralChatbot:
    def __init__(self, model_name: str = None):
        """
        Initialize general chatbot that uses model without RAG
        """
        self.model = self.load_model(model_name)
    
    def load_model(self, model_name: str = None):
        """Load the model for general conversation from Hugging Face Hub"""
        if not model_name:
            print("No model name provided for general chat. Using basic responses.")
            return None
        
        try:
            print(f"Loading general chat model from Hugging Face: {model_name}")
            
            # Load base tokenizer
            base_tokenizer = AutoTokenizer.from_pretrained(
                "CraneAILabs/swahili-gemma-1b",
                trust_remote_code=True
            )
            
            # Add special tokens
            special_tokens = {
                "additional_special_tokens": ["<|user|>", "<|assistant|>"],
                "bos_token": "<start_of_turn>",
                "eos_token": "<end_of_turn>"
            }
            base_tokenizer.add_special_tokens(special_tokens)
            base_tokenizer.pad_token = base_tokenizer.eos_token
            
            # Load model from Hugging Face Hub
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float32,
                device_map="cpu",
                trust_remote_code=True,
                use_cache=True
            )
            
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=base_tokenizer,
                torch_dtype=torch.float32
            )
            
            pipe.tokenizer = base_tokenizer
            
            print("✅ General chat model loaded successfully!")
            return pipe
            
        except Exception as e:
            print(f"❌ Error loading general chat model: {e}")
            return None

    def generate_response(self, query: str) -> str:
        """Generate response using model's general knowledge only"""
        if not query.strip():
            return "Hello! How can I help you today?"
        
        if self.model is None:
            # Basic fallback responses
            fallback_responses = {
                "hello": "Hello! How are you doing today?",
                "hi": "Hi there! What would you like to talk about?",
                "how are you": "I'm doing well, thank you for asking! How about you?",
                "what is your name": "I'm a general conversation assistant. What's your name?",
                "bye": "Goodbye! Have a great day!",
                "thank you": "You're welcome! Is there anything else I can help you with?",
            }
            
            query_lower = query.lower().strip()
            for key, response in fallback_responses.items():
                if key in query_lower:
                    return response
            
            return "I understand you want to chat, but I don't have access to my full capabilities right now. What would you like to talk about?"
        
        try:
            # Use the chat format that the model was trained with
            formatted_prompt = f"<start_of_turn>user\n{query}<end_of_turn>\n<start_of_turn>model\n"
            
            response = self.model(
                formatted_prompt,
                max_new_tokens=150,
                temperature=0.7,
                do_sample=True,
                repetition_penalty=1.1,
                pad_token_id=self.model.tokenizer.eos_token_id,
                eos_token_id=self.model.tokenizer.eos_token_id,
                use_cache=True
            )
            
            generated_text = response[0]['generated_text']
            
            # Extract assistant response
            if "<start_of_turn>model\n" in generated_text:
                answer = generated_text.split("<start_of_turn>model\n")[-1]
                answer = answer.replace("<end_of_turn>", "").strip()
            else:
                answer = generated_text.replace(formatted_prompt, "").strip()
            
            return answer if answer else "I'm not sure how to respond to that. Could you try rephrasing?"
                
        except Exception as e:
            print(f"Error generating general response: {e}")
            return "I'm having trouble generating a response right now. What else would you like to talk about?"

# Initialize both systems
def initialize_systems():
    """Initialize both RAG and general chat systems"""
    # Use your Hugging Face model
    model_name = "BrianGithaiga/swahili-gemma-finetuned"
    
    # For knowledge base
    knowledge_base_path = "combined.json"  # Upload this file to your Space if needed
    
    print("🚀 Initializing systems with model:", model_name)
    
    # Initialize both systems
    rag_system = SwahiliLiteratureRAG(knowledge_base_path, model_name)
    general_chat = GeneralChatbot(model_name)
    
    return rag_system, general_chat

# Create both system instances
print("Initializing both systems...")
rag_system, general_chat = initialize_systems()
print("Both systems initialized successfully!")

# [KEEP YOUR EXISTING create_app() FUNCTION EXACTLY AS IS]

# For Hugging Face Spaces deployment
if __name__ == "__main__":
    # Set environment variables for better performance
    os.environ['TORCH_LOGS'] = ''
    os.environ['TORCHDYNAMO_VERBOSE'] = '0'
    os.environ['TOKENIZERS_PARALLELISM'] = 'false'
    
    print("Creating Gradio app...")
    app = create_app()
    
    # For Hugging Face Spaces
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )