Set half revision from base model weights
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 10 |
return_dict=True,
|
| 11 |
load_in_8bit=True,
|
| 12 |
device_map="auto",
|
|
|
|
|
|
|
| 13 |
)
|
| 14 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
| 15 |
# Load the Lora model
|
|
|
|
| 10 |
return_dict=True,
|
| 11 |
load_in_8bit=True,
|
| 12 |
device_map="auto",
|
| 13 |
+
revision="half",
|
| 14 |
+
# low_cpu_mem_usage=True
|
| 15 |
)
|
| 16 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
| 17 |
# Load the Lora model
|