Spaces:

rahul7star
/

wan2.1-Diffuser

Paused

App Files Files Community

rahul7star commited on Mar 20

Commit

74ff47f

verified ·

1 Parent(s): 82f32ec

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -27

app.py CHANGED Viewed

@@ -1,28 +1,82 @@
 import torch
-from diffusers.utils import export_to_video
-from diffusers import AutoencoderKLWan, WanPipeline
-from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
-model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
-vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
-flow_shift = 3.0  # 5.0 for 720P, 3.0 for 480P
-pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
-pipe.to("cuda")
-pipe.load_lora_weights("NIVEDAN/wan2.1-lora")
-pipe.enable_model_cpu_offload() #for low-vram environments
-prompt = "nivedan"
-negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
-output = pipe(
-    prompt=prompt,
-    negative_prompt=negative_prompt,
-    height=480,
-    width=832,
-    num_frames=81,
-    guidance_scale=5.0,
-).frames[0]
-export_to_video(output, "output.mp4", fps=16)

 import torch
+import gradio as gr
+import imageio
+import os
+import requests
+from safetensors.torch import load_file
+from torchvision import transforms
+from PIL import Image
+import numpy as np
+import random
+# Define model URL and local path
+MODEL_URL = "https://huggingface.co/sarthak247/Wan2.1-T2V-1.3B-nf4/resolve/main/diffusion_pytorch_model.safetensors"
+MODEL_FILE = "diffusion_pytorch_model.safetensors"
+# Function to download model if not present
+def download_model():
+    if not os.path.exists(MODEL_FILE):
+        print("Downloading model...")
+        response = requests.get(MODEL_URL, stream=True)
+        if response.status_code == 200:
+            with open(MODEL_FILE, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            print("Download complete!")
+        else:
+            raise RuntimeError(f"Failed to download model: {response.status_code}")
+# Load model weights manually
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model on {device}...")
+try:
+    download_model()
+    model_weights = load_file(MODEL_FILE, device=device)
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    model_weights = None
+# Function to generate video using the model
+def generate_video(prompt):
+    """
+    Generates a video using the model based on the provided text prompt.
+    """
+    if model_weights is None:
+        return "Model failed to load. Please check the logs."
+    # Placeholder - actual inference logic should be implemented here
+    # Example of using the model to generate an image from a prompt
+    # For now, we'll create a random color image as a placeholder.
+    # Assuming the model generates an image based on the prompt (modify with actual logic)
+    width, height = 512, 512
+    img = Image.new("RGB", (width, height),
+                    color=(random.randint(0, 255),
+                           random.randint(0, 255),
+                           random.randint(0, 255)))  # Random color
+    # Transform the image to a tensor and convert it to a numpy array
+    transform = transforms.ToTensor()
+    frame = (transform(img).permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+    # Create a fake video with repeated frames (replace with actual frame generation)
+    frames = [frame] * 16  # 16 repeated frames (replace with actual video frames from the model)
+    output_path = "output.mp4"
+    # Save frames as a video with 8 fps
+    imageio.mimsave(output_path, frames, fps=8)
+    return output_path
+# Gradio UI
+iface = gr.Interface(
+    fn=generate_video,
+    inputs=gr.Textbox(label="Enter Text Prompt"),
+    outputs=gr.Video(label="Generated Video"),
+    title="Wan2.1-T2V-1.3B Video Generation",
+    description="This app loads the model manually and generates text-to-video output."
+)
+iface.launch()