import gradio as gr from fastapi import FastAPI import torch from transformers import AutoModelForCausalLM, AutoTokenizer # --- Model Setup --- model_path = "WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B" model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, device_map="auto", # requires accelerate trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # --- Backend function --- def generate_code(user_prompt, temperature=0.75, top_p=1.0, max_tokens=2048, top_k=50): tokens = tokenizer.encode(user_prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") with torch.no_grad(): output = model.generate( input_ids=tokens, max_length=tokens.shape[1] + max_tokens, do_sample=True, temperature=temperature, top_p=top_p, top_k=top_k, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, ) generated_tokens = output[0][tokens.shape[1]:] code_string = tokenizer.decode(generated_tokens, skip_special_tokens=True) return f"```python\n{code_string}\n```" # --- FastAPI backend --- api = FastAPI() @api.get("/ping") async def ping(): return {"status": "pong"} # --- Gradio UI --- with gr.Blocks(title="Spec Kit Copilot") as demo: with gr.Tab("AI Code Generation"): gr.Markdown("## WhiteRabbitNeo AI Code Generator") user_input = gr.Textbox(label="Describe code to generate", lines=4, placeholder="E.g., Python function to sort a list") temperature = gr.Slider(0.0, 1.0, 0.75, label="Temperature") top_p = gr.Slider(0.0, 1.0, 1.0, label="Top-p") max_tokens = gr.Slider(256, 4096, 2048, step=128, label="Max Tokens") top_k = gr.Slider(0, 100, 50, label="Top-k") generate_btn = gr.Button("Generate Code") preview = gr.Markdown() generate_btn.click( fn=generate_code, inputs=[user_input, temperature, top_p, max_tokens, top_k], outputs=preview ) # --- Mount FastAPI inside Gradio (optional) --- demo = gr.mount_app(demo, api, path="/api") if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)