PromptEnhancer_32B-FlashPack

Sleeping

App Files Files Community

rahul7star commited on Oct 16

Commit

401afad

verified ·

1 Parent(s): 8c48eed

Update app_low.py

Browse files

Files changed (1) hide show

app_low.py +67 -54

app_low.py CHANGED Viewed

@@ -1,75 +1,88 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from huggingface_hub import snapshot_download
-import os
 # ============================================================
-# 1️⃣ Download model efficiently (avoid exceeding space limits)
 # ============================================================
-MODEL_ID = "Qwen/Qwen2.5-1.5B"
-# Store in /tmp to reduce Space storage pressure
-model_dir = snapshot_download(repo_id=MODEL_ID, cache_dir="/tmp/qwen_model")
 # ============================================================
-# 2️⃣ Load model with CPU or GPU offload
 # ============================================================
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = AutoModelForCausalLM.from_pretrained(
-    model_dir,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto" if torch.cuda.is_available() else None,
-    low_cpu_mem_usage=True,
-)
-tokenizer = AutoTokenizer.from_pretrained(model_dir)
 # ============================================================
-# 3️⃣ Define chat function
 # ============================================================
-def chat_with_qwen(message, history):
-    history = history or []
-    messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
-    for human, bot in history:
-        messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": bot})
-    messages.append({"role": "user", "content": message})
-    # Tokenize input messages
-    inputs = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        tokenize=True,
-        return_tensors="pt"
     )
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=256,
-            temperature=0.8,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
-    history.append((message, response))
-    return history, history
 # ============================================================
-# 4️⃣ Gradio UI
 # ============================================================
-with gr.Blocks(theme="soft", title="Qwen 2.5 Chatbot") as demo:
-    gr.Markdown("## 🤖 Qwen 2.5 Chatbot — Optimized for CPU/GPU Offload")
-    chatbot = gr.Chatbot(height=480, label="Chat with Qwen 2.5", type="messages")
-    msg = gr.Textbox(placeholder="Type your question here...", label="Your Message")
-    clear = gr.Button("🧹 Clear Chat")
-    msg.submit(chat_with_qwen, [msg, chatbot], [chatbot, chatbot])
-    clear.click(lambda: None, None, chatbot, queue=False)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # ============================================================
+# 1️⃣ Load model and tokenizer
 # ============================================================
+MODEL_ID = "gokaygokay/prompt-enhancer-gemma-3-270m-it"
+# Use CPU-friendly settings
+device = 0 if torch.cuda.is_available() else -1
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+# Text-generation pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=device,       # 0 for GPU, -1 for CPU
+)
 # ============================================================
+# 2️⃣ Define the generation function
 # ============================================================
+def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
+    """Enhance user prompt and maintain chat history."""
+    if not user_prompt.strip():
+        return chat_history + [["", "⚠️ Please enter a prompt."]]
+    full_prompt = f"Enhance and expand the following prompt with more details and context: {user_prompt}"
+    # Generate output
+    output = pipe(
+        full_prompt,
+        max_new_tokens=int(max_tokens),
+        temperature=float(temperature),
+        do_sample=True,
+    )
+    result = output[0]['generated_text'].strip()
+    chat_history = chat_history + [[user_prompt, result]]
+    return chat_history
 # ============================================================
+# 3️⃣ Gradio UI
 # ============================================================
+with gr.Blocks(title="Prompt Enhancer – Gemma 3 270M", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # ✨ Prompt Enhancer (Gemma 3 270M)
+        Enter a short prompt, and the model will expand it with extra details, context, and creativity.
+        """
     )
+    with gr.Row():
+        chatbot = gr.Chatbot(height=400, label="Enhanced Prompts", type="messages")
+        with gr.Column(scale=1):
+            user_prompt = gr.Textbox(
+                placeholder="Enter a short prompt...",
+                label="Your Prompt",
+                lines=3,
+            )
+            temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
+            max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens")
+            send_btn = gr.Button("🚀 Enhance Prompt", variant="primary")
+            clear_btn = gr.Button("🧹 Clear Chat")
+    # Bind functions
+    send_btn.click(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
+    user_prompt.submit(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
+    clear_btn.click(lambda: [], None, chatbot)
+    gr.Markdown(
+        """
+        ---
+        💡 Tips:
+        - Works best with short, descriptive prompts (e.g., "A cat sitting on a chair").
+        - Adjust temperature for creativity: higher = more diverse output.
+        """
+    )
 # ============================================================
+# 4️⃣ Launch
 # ============================================================
+if __name__ == "__main__":
+    demo.launch(show_error=True)