what2up commited on
Commit
2b0192c
·
verified ·
1 Parent(s): 064a79c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -1,16 +1,32 @@
 
 
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
- # model_name = "Qwen/Qwen3-8B"
5
- model_name = "Qwen/Qwen3-0.6B"
6
 
7
- # load the tokenizer and the model
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(model_name)
10
 
11
- # tokenizer.save_pretrained("./qwen3")
12
- # model.save_pretrained("./qwen3")
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
14
 
15
  def greet(input):
16
 
 
1
+ # 安装依赖
2
+ !pip install transformers accelerate bitsandbytes huggingface_hub
3
+
4
  import gradio as gr
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, AutoProcessor, AutoModelForVision2Seq, AutoModelForCausalLM,
6
 
7
+ import torch
 
8
 
9
+ # 加载模型(使用 4-bit 量化直接加载,避免 OOM)
10
+ model_name = "ByteDance-Seed/UI-TARS-1.5-7B" # 或 UI-TARS-1.5-7B(如果你有权访问)
 
11
 
12
+ model = AutoModelForVision2Seq.from_pretrained(
13
+ model_name,
14
+ device_map="auto", # ⬅️ 4-bit 量化
15
+ torch_dtype=torch.float16,
16
+ quantization_config={
17
+ "load_in_4bit": True,
18
+ "bnb_4bit_quant_type": "nf4", # ✅ 必须是 nf4(CPU 只支持这个)
19
+ "bnb_4bit_compute_dtype": torch.float16,
20
+ "bnb_4bit_use_double_quant": True, # 可选:减少 0.4% 体积
21
+ },
22
+ low_cpu_mem_usage=True
23
+ )
24
+
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
26
 
27
+ # # 保存量化后的模型到本地(或 Hugging Face)
28
+ # model.save_pretrained("./ui-tars-8b-4bit")
29
+ # tokenizer.save_pretrained("./ui-tars-8b-4bit")
30
 
31
  def greet(input):
32