Spaces:

what2up
/

qwen3

Runtime error

what2up commited on Aug 8

Commit

2b0192c

verified ·

1 Parent(s): 064a79c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,32 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
-# model_name = "Qwen/Qwen3-8B"
-model_name = "Qwen/Qwen3-0.6B"
-# load the tokenizer and the model
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-# tokenizer.save_pretrained("./qwen3")
-# model.save_pretrained("./qwen3")
 def greet(input):

+# 安装依赖
+!pip install transformers accelerate bitsandbytes huggingface_hub
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, AutoProcessor, AutoModelForVision2Seq, AutoModelForCausalLM,
+import torch
+# 加载模型（使用 4-bit 量化直接加载，避免 OOM）
+model_name = "ByteDance-Seed/UI-TARS-1.5-7B"  # 或 UI-TARS-1.5-7B（如果你有权访问）
+model = AutoModelForVision2Seq.from_pretrained(
+    model_name,
+    device_map="auto",         # ⬅️ 4-bit 量化
+    torch_dtype=torch.float16,
+    quantization_config={
+        "load_in_4bit": True,
+        "bnb_4bit_quant_type": "nf4",        # ✅ 必须是 nf4（CPU 只支持这个）
+        "bnb_4bit_compute_dtype": torch.float16,
+        "bnb_4bit_use_double_quant": True,   # 可选：减少 0.4% 体积
+    },
+    low_cpu_mem_usage=True
+    )
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# # 保存量化后的模型到本地（或 Hugging Face）
+# model.save_pretrained("./ui-tars-8b-4bit")
+# tokenizer.save_pretrained("./ui-tars-8b-4bit")
 def greet(input):