PromptEnhancer_32B-FlashPack

Sleeping

App Files Files Community

rahul7star commited on Oct 16

Commit

2051f2d

verified ·

1 Parent(s): f2fce5e

Update app_cpu.py

Browse files

Files changed (1) hide show

app_cpu.py +61 -59

app_cpu.py CHANGED Viewed

@@ -6,17 +6,17 @@ import gradio as gr
 from huggingface_hub import snapshot_download
 # ============================================================
-# 1️⃣ Model auto-download during app load
 # ============================================================
-DEFAULT_MODEL_PATH = os.environ.get("MODEL_OUTPUT_PATH", "PromptEnhancer/PromptEnhancer-32B")
-print(f"🔄 Checking local model at startup: {DEFAULT_MODEL_PATH}")
-local_model_dir = snapshot_download(repo_id=DEFAULT_MODEL_PATH)
-print(f"✅ Model downloaded and cached at: {local_model_dir}")
 # ============================================================
-# 2️⃣ Helper utils
 # ============================================================
 try:
@@ -26,46 +26,52 @@ except Exception:
         return None, None
 def replace_single_quotes(text):
     pattern = r"\B'([^']*)'\B"
     replaced_text = re.sub(pattern, r'"\1"', text)
-    replaced_text = replaced_text.replace("’", "”").replace("‘", "“")
-    return replaced_text
 def _str_to_dtype(dtype_str):
-    if dtype_str in ("bfloat16", "float16", "float32"):
-        return dtype_str
-    return "float32"
 # ============================================================
-# 3️⃣ CPU inference function
 # ============================================================
-def cpu_predict(model_path, torch_dtype, prompt_cot, sys_prompt, temperature, max_new_tokens):
-    import torch
-    from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-    if not logging.getLogger(__name__).handlers:
-        logging.basicConfig(level=logging.INFO)
-    logger = logging.getLogger(__name__)
     dtype = {
         "bfloat16": torch.bfloat16,
         "float16": torch.float16,
         "float32": torch.float32,
     }.get(torch_dtype, torch.float32)
-    # Force CPU
     device = "cpu"
-    logger.info("🔧 Loading model to CPU...")
-    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        model_path,
-        torch_dtype=dtype,
-        device_map={"": device},  # CPU-only mapping
-        attn_implementation="sdpa",
-    )
-    processor = AutoProcessor.from_pretrained(model_path)
     org_prompt_cot = prompt_cot
     user_prompt_format = sys_prompt + "\n" + org_prompt_cot
     messages = [{"role": "user", "content": [{"type": "text", "text": user_prompt_format}]}]
@@ -81,7 +87,7 @@ def cpu_predict(model_path, torch_dtype, prompt_cot, sys_prompt, temperature, ma
         return_tensors="pt",
     ).to(device)
-    logger.info("🧠 Running generation on CPU...")
     generated_ids = model.generate(
         **inputs,
         max_new_tokens=int(max_new_tokens),
@@ -103,6 +109,7 @@ def cpu_predict(model_path, torch_dtype, prompt_cot, sys_prompt, temperature, ma
     output_res = output_text[0]
     try:
         assert output_res.count("think>") == 2
         new_prompt = output_res.split("think>")[-1].lstrip("\n")
         new_prompt = replace_single_quotes(new_prompt)
@@ -112,65 +119,60 @@ def cpu_predict(model_path, torch_dtype, prompt_cot, sys_prompt, temperature, ma
     return new_prompt, ""
 # ============================================================
-# 4️⃣ Gradio interface
 # ============================================================
 def run_single(prompt, sys_prompt, temperature, max_new_tokens, torch_dtype, state):
     if not prompt.strip():
-        return "", "请先输入提示词。", state
     t0 = time.time()
     try:
-        new_prompt, err = cpu_predict(
-            model_path=local_model_dir,
-            torch_dtype=_str_to_dtype(torch_dtype),
-            prompt_cot=prompt,
-            sys_prompt=sys_prompt,
-            temperature=temperature,
-            max_new_tokens=max_new_tokens,
-        )
         dt = time.time() - t0
-        msg = f"耗时：{dt:.2f}s"
         if err:
-            msg = f"{err}（{msg}）"
         return new_prompt, msg, state
     except Exception as e:
-        return "", f"调用失败：{e}", state
 # ============================================================
-# 5️⃣ UI
 # ============================================================
-test_list_zh = [
-    "第三人称视角，赛车在城市赛道上飞驰，左上角是小地图，地图下面是当前名次，右下角仪表盘显示当前速度。",
-]
-test_list_en = [
-    "Create a painting depicting a 30-year-old white-collar worker on a business trip by plane.",
 ]
-with gr.Blocks(title="Prompt Enhancer (CPU Mode)") as demo:
-    gr.Markdown("## 🧩 Prompt Enhancer (CPU Mode — model preloaded)")
     with gr.Row():
         sys_prompt = gr.Textbox(
-            label="系统提示词",
-            value="请根据用户的输入，生成思考过程的思维链并改写提示词：",
             lines=3
         )
         temperature = gr.Slider(0, 1, value=0.1, step=0.05, label="Temperature")
         max_new_tokens = gr.Slider(16, 4096, value=2048, step=16, label="Max New Tokens")
-        torch_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="torch_dtype")
     state = gr.State(value=None)
-    with gr.Tab("推理"):
         with gr.Row():
             with gr.Column(scale=2):
-                prompt = gr.Textbox(label="输入提示词", lines=6, placeholder="在此粘贴要改写的提示词...")
-                run_btn = gr.Button("生成重写", variant="primary")
-                gr.Examples(examples=test_list_zh + test_list_en, inputs=prompt)
             with gr.Column(scale=3):
-                out_text = gr.Textbox(label="重写结果", lines=10)
-                out_info = gr.Markdown("✅ 模型已在CPU加载。")
         run_btn.click(
             run_single,

 from huggingface_hub import snapshot_download
 # ============================================================
+# 1️⃣ Pre-download model during app startup
 # ============================================================
+DEFAULT_MODEL_REPO = os.environ.get("MODEL_OUTPUT_PATH", "PromptEnhancer/PromptEnhancer-32B")
+print(f"🔄 Checking and downloading model repo: {DEFAULT_MODEL_REPO}")
+local_model_dir = snapshot_download(repo_id=DEFAULT_MODEL_REPO)
+print(f"✅ Model cached locally at: {local_model_dir}")
 # ============================================================
+# 2️⃣ Utilities
 # ============================================================
 try:
         return None, None
 def replace_single_quotes(text):
+    """Replace single quotes inside words with double quotes for consistency."""
     pattern = r"\B'([^']*)'\B"
     replaced_text = re.sub(pattern, r'"\1"', text)
+    return replaced_text.replace("’", "”").replace("‘", "“")
 def _str_to_dtype(dtype_str):
+    """Normalize torch dtype string."""
+    return dtype_str if dtype_str in ("bfloat16", "float16", "float32") else "float32"
 # ============================================================
+# 3️⃣ Load model once (from local snapshot)
 # ============================================================
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("PromptEnhancerCPU")
+dtype = torch.float32  # Default for CPU
+logger.info("🔧 Loading pre-downloaded model from local path...")
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    local_model_dir,
+    torch_dtype=dtype,
+    device_map={"": "cpu"},  # Force CPU only
+    attn_implementation="sdpa",
+)
+processor = AutoProcessor.from_pretrained(local_model_dir)
+logger.info("✅ Model loaded and ready on CPU.")
+# ============================================================
+# 4️⃣ Inference (uses already-loaded model)
+# ============================================================
+def cpu_predict(prompt_cot, sys_prompt, temperature, max_new_tokens, torch_dtype):
+    """Generate rewritten prompt using preloaded model on CPU."""
     dtype = {
         "bfloat16": torch.bfloat16,
         "float16": torch.float16,
         "float32": torch.float32,
     }.get(torch_dtype, torch.float32)
     device = "cpu"
     org_prompt_cot = prompt_cot
     user_prompt_format = sys_prompt + "\n" + org_prompt_cot
     messages = [{"role": "user", "content": [{"type": "text", "text": user_prompt_format}]}]
         return_tensors="pt",
     ).to(device)
+    logger.info("🧠 Running generation (CPU)...")
     generated_ids = model.generate(
         **inputs,
         max_new_tokens=int(max_new_tokens),
     output_res = output_text[0]
     try:
+        # Extract part after "think>" if present
         assert output_res.count("think>") == 2
         new_prompt = output_res.split("think>")[-1].lstrip("\n")
         new_prompt = replace_single_quotes(new_prompt)
     return new_prompt, ""
 # ============================================================
+# 5️⃣ Gradio Logic
 # ============================================================
 def run_single(prompt, sys_prompt, temperature, max_new_tokens, torch_dtype, state):
+    """Handle one user query from Gradio."""
     if not prompt.strip():
+        return "", "Please enter a prompt first.", state
     t0 = time.time()
     try:
+        new_prompt, err = cpu_predict(prompt, sys_prompt, temperature, max_new_tokens, torch_dtype)
         dt = time.time() - t0
+        msg = f"Time taken: {dt:.2f}s"
         if err:
+            msg = f"{err} ({msg})"
         return new_prompt, msg, state
     except Exception as e:
+        return "", f"Error: {e}", state
 # ============================================================
+# 6️⃣ Gradio UI
 # ============================================================
+example_prompts = [
+    "Third-person view: a race car speeding through a city track, with a mini-map in the top-left corner and a speedometer in the bottom-right.",
+    "Anime-style portrait of a girl with short purple hair and soft lighting.",
+    "Pointillism painting: two fishermen carrying crates by the seaside, with boats docked nearby.",
+    "A Van Gogh-inspired wheat field tangled with swirling blue nebulae and fiery sunflowers.",
+    "Create a painting depicting a 30-year-old businesswoman on a plane trip.",
 ]
+with gr.Blocks(title="Prompt Enhancer (CPU Preload)") as demo:
+    gr.Markdown("## 🧩 Prompt Enhancer (CPU Mode — Model Preloaded via `snapshot_download`)")
     with gr.Row():
         sys_prompt = gr.Textbox(
+            label="System Prompt",
+            value="Please think step-by-step and rewrite the user’s prompt in a more refined, creative, and detailed way:",
             lines=3
         )
         temperature = gr.Slider(0, 1, value=0.1, step=0.05, label="Temperature")
         max_new_tokens = gr.Slider(16, 4096, value=2048, step=16, label="Max New Tokens")
+        torch_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Torch Dtype")
     state = gr.State(value=None)
+    with gr.Tab("Inference"):
         with gr.Row():
             with gr.Column(scale=2):
+                prompt = gr.Textbox(label="Input Prompt", lines=6, placeholder="Paste the prompt to rewrite here...")
+                run_btn = gr.Button("Generate Rewrite", variant="primary")
+                gr.Examples(examples=example_prompts, inputs=prompt)
             with gr.Column(scale=3):
+                out_text = gr.Textbox(label="Rewritten Prompt", lines=10)
+                out_info = gr.Markdown("✅ Model loaded on CPU (from `snapshot_download` cache).")
         run_btn.click(
             run_single,