Spaces:
Running
on
Zero
Running
on
Zero
William Mattingly
commited on
Commit
·
e2c034d
1
Parent(s):
1cb798b
trying to fix flash attention
Browse files- app.py +1 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -17,6 +17,7 @@ processor = AutoProcessor.from_pretrained(
|
|
| 17 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 18 |
model_id,
|
| 19 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 20 |
device_map="auto",
|
| 21 |
trust_remote_code=True,
|
| 22 |
)
|
|
|
|
| 17 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 18 |
model_id,
|
| 19 |
torch_dtype=torch.bfloat16,
|
| 20 |
+
attn_implementation="flash_attention_2",
|
| 21 |
device_map="auto",
|
| 22 |
trust_remote_code=True,
|
| 23 |
)
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ accelerate
|
|
| 6 |
pillow
|
| 7 |
safetensors
|
| 8 |
huggingface-hub
|
| 9 |
-
pydantic==2.10.6
|
|
|
|
|
|
| 6 |
pillow
|
| 7 |
safetensors
|
| 8 |
huggingface-hub
|
| 9 |
+
pydantic==2.10.6
|
| 10 |
+
https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.0.8/flash_attn-2.7.4.post1+cu126torch2.7-cp310-cp310-linux_x86_64.whl
|