import gradio as gr import torch from PIL import Image from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration import spaces # Model configuration MODEL_ID = "numind/NuMarkdown-8B-reasoning" # Load processor processor = AutoProcessor.from_pretrained( MODEL_ID, trust_remote_code=True, min_pixels=100*28*28, max_pixels=5000*28*28 ) # Load model model = Qwen2_5_VLForConditionalGeneration.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto", trust_remote_code=True, ) @spaces.GPU def process_image(image): """ Process an image using the NuMarkdown-8B-reasoning model. Args: image: PIL Image object Returns: tuple: (reasoning, answer) extracted from model output """ if image is None: return "Please upload an image.", "" try: # Convert image to RGB if needed img = image.convert("RGB") # Prepare messages for the model messages = [{ "role": "user", "content": [ {"type": "image"}, ], }] # Apply chat template prompt = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Process inputs model_input = processor( text=prompt, images=[img], return_tensors="pt" ).to(model.device) # Generate output with torch.no_grad(): model_output = model.generate( **model_input, temperature=0.7, max_new_tokens=5000 ) # Decode result result = processor.decode(model_output[0]) # Extract reasoning and answer try: reasoning = result.split("")[1].split("")[0] except IndexError: reasoning = "No reasoning found in output." try: answer = result.split("")[1].split("")[0] except IndexError: answer = "No answer found in output." return reasoning.strip(), answer.strip() except Exception as e: error_msg = f"Error processing image: {str(e)}" return error_msg, error_msg def create_interface(): """Create and configure the Gradio interface.""" with gr.Blocks( title="NuMarkdown-8B Reasoning Demo", theme=gr.themes.Soft(), css=""" .gradio-container { max-width: 1200px !important; } .image-container, .output-container { height: 600px !important; } """ ) as demo: gr.Markdown( """ # 🤖 NuMarkdown-8B Reasoning Demo Upload an image and let the NuMarkdown-8B model analyze it with detailed reasoning. The model will show both its thinking process and final answer. """ ) with gr.Row(equal_height=True): with gr.Column(scale=1): gr.Markdown("### 📸 Upload Your Image") image_input = gr.Image( type="pil", label="Input Image", height=600, container=True ) process_btn = gr.Button( "🔍 Analyze Image", variant="primary", size="lg" ) with gr.Column(scale=1): gr.Markdown("### 🧠 Model Reasoning") reasoning_output = gr.Textbox( label="Thinking Process", lines=15, max_lines=20, placeholder="The model's reasoning will appear here...", container=True, show_copy_button=True ) gr.Markdown("### 💡 Final Answer") answer_output = gr.Textbox( label="Answer", lines=10, max_lines=15, placeholder="The model's answer will appear here...", container=True, show_copy_button=True ) # Event handlers process_btn.click( fn=process_image, inputs=[image_input], outputs=[reasoning_output, answer_output], show_progress=True ) # Also trigger on image upload image_input.change( fn=process_image, inputs=[image_input], outputs=[reasoning_output, answer_output], show_progress=True ) gr.Markdown( """ --- ### 📋 How to Use: 1. **Upload an image** using the file uploader on the left 2. **Click "Analyze Image"** or wait for automatic processing 3. **View the results** on the right: - **Reasoning**: See how the model thinks through the problem - **Answer**: Get the final conclusion or analysis ### 🔧 Model Details: - **Model**: numind/NuMarkdown-8B-reasoning - **Type**: Vision-Language Model with reasoning capabilities - **Features**: Detailed thinking process + final answer *This demo runs on HuggingFace Zero GPU Spaces for fast inference.* """ ) return demo if __name__ == "__main__": demo = create_interface() demo.launch( share=True, server_name="0.0.0.0", server_port=7860, show_error=True )