import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
import spaces

# Model configuration
MODEL_ID = "numind/NuMarkdown-8B-reasoning"

# Load processor
processor = AutoProcessor.from_pretrained(
    MODEL_ID,
    trust_remote_code=True,
    min_pixels=100*28*28, 
    max_pixels=5000*28*28   
)

# Load model
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2",
    device_map="auto",
    trust_remote_code=True,
)

@spaces.GPU
def process_image(image):
    """
    Process an image using the NuMarkdown-8B-reasoning model.
    
    Args:
        image: PIL Image object
    
    Returns:
        tuple: (reasoning, answer) extracted from model output
    """
    if image is None:
        return "Please upload an image.", ""
    
    try:
        # Convert image to RGB if needed
        img = image.convert("RGB")
        
        # Prepare messages for the model
        messages = [{
            "role": "user",
            "content": [
                {"type": "image"},
            ],
        }]
        
        # Apply chat template
        prompt = processor.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        # Process inputs
        model_input = processor(
            text=prompt, 
            images=[img], 
            return_tensors="pt"
        ).to(model.device)
        
        # Generate output
        with torch.no_grad():
            model_output = model.generate(
                **model_input, 
                temperature=0.7, 
                max_new_tokens=5000
            )
        
        # Decode result
        result = processor.decode(model_output[0])
        
        # Extract reasoning and answer
        try:
            reasoning = result.split("<think>")[1].split("</think>")[0]
        except IndexError:
            reasoning = "No reasoning found in output."
        
        try:
            answer = result.split("<answer>")[1].split("</answer>")[0]
        except IndexError:
            answer = "No answer found in output."
        
        return reasoning.strip(), answer.strip()
        
    except Exception as e:
        error_msg = f"Error processing image: {str(e)}"
        return error_msg, error_msg

def create_interface():
    """Create and configure the Gradio interface."""
    
    with gr.Blocks(
        title="NuMarkdown-8B Reasoning Demo",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
        }
        .image-container, .output-container {
            height: 600px !important;
        }
        """
    ) as demo:
        
        gr.Markdown(
            """
            # 🤖 NuMarkdown-8B Reasoning Demo
            
            Upload an image and let the NuMarkdown-8B model analyze it with detailed reasoning.
            The model will show both its thinking process and final answer.
            """
        )
        
        with gr.Row(equal_height=True):
            with gr.Column(scale=1):
                gr.Markdown("### 📸 Upload Your Image")
                image_input = gr.Image(
                    type="pil",
                    label="Input Image",
                    height=600,
                    container=True
                )
                
                process_btn = gr.Button(
                    "🔍 Analyze Image", 
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column(scale=1):
                gr.Markdown("### 🧠 Model Reasoning")
                reasoning_output = gr.Textbox(
                    label="Thinking Process",
                    lines=15,
                    max_lines=20,
                    placeholder="The model's reasoning will appear here...",
                    container=True,
                    show_copy_button=True
                )
                
                gr.Markdown("### 💡 Final Answer")
                answer_output = gr.Textbox(
                    label="Answer",
                    lines=10,
                    max_lines=15,
                    placeholder="The model's answer will appear here...",
                    container=True,
                    show_copy_button=True
                )
        
        # Event handlers
        process_btn.click(
            fn=process_image,
            inputs=[image_input],
            outputs=[reasoning_output, answer_output],
            show_progress=True
        )
        
        # Also trigger on image upload
        image_input.change(
            fn=process_image,
            inputs=[image_input],
            outputs=[reasoning_output, answer_output],
            show_progress=True
        )
        
        gr.Markdown(
            """
            ---
            
            ### 📋 How to Use:
            1. **Upload an image** using the file uploader on the left
            2. **Click "Analyze Image"** or wait for automatic processing
            3. **View the results** on the right:
               - **Reasoning**: See how the model thinks through the problem
               - **Answer**: Get the final conclusion or analysis
            
            ### 🔧 Model Details:
            - **Model**: numind/NuMarkdown-8B-reasoning
            - **Type**: Vision-Language Model with reasoning capabilities
            - **Features**: Detailed thinking process + final answer
            
            *This demo runs on HuggingFace Zero GPU Spaces for fast inference.*
            """
        )
    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )