dream2589632147 commited on
Commit
393fc4f
·
verified ·
1 Parent(s): fde2beb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -172
app.py CHANGED
@@ -1,196 +1,248 @@
1
  import spaces
2
  import torch
3
- import os
4
- import subprocess
 
5
  import gradio as gr
6
- import sys
7
- # 🌟 إضافة هذا لإزالة تحذير tokenizers
8
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
9
- # 🌟 تحقق من إصدار diffusers وتحديث إذا لزم الأمر (في بيئة Spaces، أضف diffusers>=0.33.0 إلى requirements.txt)
10
- try:
11
- import diffusers
12
- if diffusers.__version__ < '0.33.0':
13
- raise ImportError("diffusers version too old")
14
- from diffusers import WanImageToVideoPipeline
15
- from diffusers.utils import export_to_video, load_image
16
- except ImportError as e:
17
- print(f"Import error: {e}")
18
- print("Please update diffusers: pip install diffusers>=0.33.0")
19
- sys.exit(1)
20
  import tempfile
21
  import numpy as np
22
  from PIL import Image
 
23
  import gc
24
- # (بقية تعريفات الثوابت و MODELS كما هي)
25
- MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 
 
 
 
 
 
 
 
 
26
  MAX_DIM = 832
27
  MIN_DIM = 480
28
  SQUARE_DIM = 640
29
  MULTIPLE_OF = 16
30
- DIMENSION_PRESETS = {
31
- "4K (16:9 - Scaled Down)": (832, 468),
32
- "YouTube Full HD (16:9)": (832, 468),
33
- "Instagram Square (1:1)": (640, 640),
34
- "Instagram Reels / TikTok (9:16)": (468, 832),
35
- "Instagram Portrait (4:5)": (512, 640),
36
- "Custom (Default)": (640, 360),
37
- }
38
- INPUT_IMAGE_INSTRUCTIONS = {
39
- "4K (16:9 - Scaled Down)": "For best results, use an input image with a 16:9 aspect ratio, such as 1920x1080 or 3840x2160 pixels. The image will be cropped automatically to maintain the ratio if different.",
40
- "YouTube Full HD (16:9)": "For best results, use an input image with a 16:9 aspect ratio, such as 1920x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
41
- "Instagram Square (1:1)": "For best results, use a square input image with a 1:1 aspect ratio, such as 1080x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
42
- "Instagram Reels / TikTok (9:16)": "For best results, use a vertical input image with a 9:16 aspect ratio, such as 1080x1920 pixels. The image will be cropped automatically to maintain the ratio if different.",
43
- "Instagram Portrait (4:5)": "For best results, use a vertical input image with a 4:5 aspect ratio, such as 1080x1350 pixels. The image will be cropped automatically to maintain the ratio if different.",
44
- "Custom (Default)": "For best results, use a horizontal input image with a 16:9 aspect ratio, such as 1920x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
45
- }
46
  FIXED_FPS = 16
47
  MIN_FRAMES_MODEL = 8
48
- MAX_FRAMES_MODEL = 720 # 45 ثانية عند 16 FPS (45 * 16 = 720)
 
49
  MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
50
  MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
51
- # 🌟 وظيفة لتحضير الصورة حسب الـ preset
52
- def prepare_image(image, preset_key):
53
- if image is None:
54
- raise ValueError("No image provided!")
55
-
56
- target_width, target_height = DIMENSION_PRESETS.get(preset_key, DIMENSION_PRESETS["Custom (Default)"])
57
-
58
- # Resize and crop to target dimensions while maintaining aspect ratio
59
- image = image.convert("RGB")
60
- image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
61
-
62
- # Calculate padding or cropping
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  width, height = image.size
64
- if width < target_width or height < target_height:
65
- # Pad if smaller
66
- padded = Image.new("RGB", (target_width, target_height), (0, 0, 0))
67
- padded.paste(image, ((target_width - width) // 2, (target_height - height) // 2))
68
- image = padded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  else:
70
- # Crop center if larger
71
- left = (width - target_width) // 2
72
- top = (height - target_height) // 2
73
- image = image.crop((left, top, left + target_width, top + target_height))
74
-
75
- # Ensure dimensions are multiples of MULTIPLE_OF
76
- width, height = image.size
77
- width = (width // MULTIPLE_OF) * MULTIPLE_OF
78
- height = (height // MULTIPLE_OF) * MULTIPLE_OF
79
- if width > MAX_DIM: width = MAX_DIM
80
- if height > MAX_DIM: height = MAX_DIM
81
- if width < MIN_DIM: width = MIN_DIM
82
- if height < MIN_DIM: height = MIN_DIM
83
- image = image.resize((width, height), Image.Resampling.LANCZOS)
84
-
85
- return image
86
- # 🌟 الوظيفة الرئيسية للتطبيق: توليد فيديو من صورة ونص فقط (مع lazy loading للنموذج)
87
- @spaces.GPU(duration=600) # 10 دقائق timeout للسماح بوقت أطول
88
- @torch.no_grad()
89
- def generate_video_only(image, prompt, negative_prompt, num_frames, preset_key, guidance_scale=7.5, num_inference_steps=10): # Reduced steps to 10
90
- try:
91
- # Lazy load the pipeline inside the function to avoid startup issues
92
- print("Loading model...")
93
- pipe = WanImageToVideoPipeline.from_pretrained(
94
- MODEL_ID,
95
- torch_dtype=torch.bfloat16,
96
- ).to('cuda')
97
- pipe.enable_model_cpu_offload() # Offload to CPU for memory savings
98
-
99
- # Optional: Load LoRA if possible
100
- try:
101
- pipe.load_lora_weights(
102
- "Kijai/WanVideo_comfy",
103
- weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
104
- adapter_name="lightx2v",
105
- low_cpu_mem_usage=True
106
- )
107
- print("LoRA weights loaded successfully!")
108
- except Exception as e:
109
- print(f"Warning: LoRA load failed: {e}")
110
- print("Proceeding without LoRA.")
111
-
112
- # Memory cleanup before generation
113
- gc.collect()
114
- if torch.cuda.is_available():
115
- torch.cuda.empty_cache()
116
- print(f"GPU Memory before generation: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
117
-
118
- # Prepare image and generate
119
- prepared_image = prepare_image(image, preset_key)
120
- height, width = prepared_image.size[1], prepared_image.size[0]
121
- num_frames = max(MIN_FRAMES_MODEL, min(num_frames, MAX_FRAMES_MODEL))
122
-
123
- print("Generating video...")
124
- video_frames = pipe(
125
- prompt=prompt,
126
- image=prepared_image,
127
- negative_prompt=negative_prompt,
128
- num_frames=num_frames,
129
- height=height,
130
- width=width,
131
- guidance_scale=guidance_scale,
132
- num_inference_steps=num_inference_steps,
133
- ).frames[0]
134
-
135
- # Export to temporary MP4
136
- with tempfile.TemporaryDirectory() as tmpdirname:
137
- temp_video_path = os.path.join(tmpdirname, "temp_video.mp4")
138
- export_to_video(video_frames, temp_video_path, fps=FIXED_FPS)
139
-
140
- # Cleanup after generation
141
- del pipe # Delete to free memory
142
- gc.collect()
143
- if torch.cuda.is_available():
144
- torch.cuda.empty_cache()
145
-
146
- return temp_video_path, "Success! Video generated."
147
- except torch.cuda.OutOfMemoryError:
148
- return None, "Error: Out of GPU memory. Try fewer frames (e.g., 8) or lower resolution."
149
- except Exception as e:
150
- return None, f"Error: {str(e)}"
151
- # 🌟 إعداد الواجهة بـ Gradio
152
- with gr.Blocks(title="Wan2.2 Image-to-Video Generator") as demo:
153
- gr.Markdown("# 🌟 Wan2.2 I2V Generator")
154
- gr.Markdown("Upload an image, add a prompt, and generate a video! **Tip: Start with 8 frames on free T4 GPU to avoid timeouts.**")
155
-
156
  with gr.Row():
157
- with gr.Column(scale=1):
158
- image_input = gr.Image(type="pil", label="Input Image")
159
- prompt_input = gr.Textbox(label="Prompt", placeholder="A dynamic scene from the image...", lines=2)
160
- negative_prompt_input = gr.Textbox(label="Negative Prompt", placeholder="blurry, low quality", lines=1)
161
- num_frames_slider = gr.Slider(MIN_FRAMES_MODEL, 32, value=8, step=8, label="Number of Frames (Start low to test)") # Limited to 32 max for free tier
162
- preset_dropdown = gr.Dropdown(choices=list(DIMENSION_PRESETS.keys()), value="Custom (Default)", label="Output Preset")
163
- steps_slider = gr.Slider(5, 20, value=10, step=5, label="Inference Steps (Lower = Faster)")
164
- generate_btn = gr.Button("Generate Video", variant="primary")
165
-
166
- with gr.Column(scale=1):
167
- output_video = gr.Video(label="Generated Video")
168
- status_output = gr.Textbox(label="Status", interactive=False)
169
-
170
- # Event handlers
171
- def update_instructions(preset):
172
- return INPUT_IMAGE_INSTRUCTIONS.get(preset, INPUT_IMAGE_INSTRUCTIONS["Custom (Default)"])
173
-
174
- preset_dropdown.change(update_instructions, preset_dropdown, gr.Markdown())
175
-
176
- generate_btn.click(
177
- fn=generate_video_only,
178
- inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown, gr.State(7.5), steps_slider], # Added steps
179
- outputs=[output_video, status_output]
180
- )
181
-
182
- # Examples (optional)
 
 
 
 
183
  gr.Examples(
184
  examples=[
185
  [
186
- None, # No example image; user to upload
187
- "The person in the image starts walking towards the camera with a smile.",
188
- "static, blurry",
189
- 8,
190
- "Custom (Default)"
191
- ]
192
  ],
193
- inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown]
 
 
 
194
  )
 
195
  if __name__ == "__main__":
196
- demo.launch(share=True, debug=True)
 
1
  import spaces
2
  import torch
3
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
4
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
5
+ from diffusers.utils.export_utils import export_to_video
6
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import tempfile
8
  import numpy as np
9
  from PIL import Image
10
+ import random
11
  import gc
12
+
13
+ from torchao.quantization import quantize_
14
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig
15
+
16
+ import aoti
17
+
18
+ # =========================================================
19
+ # MODEL CONFIGURATION
20
+ # =========================================================
21
+ MODEL_ID = "dream2589632147/Dream-wan2-2-faster-Pro" # ← المسار الجديد للنموذج
22
+
23
  MAX_DIM = 832
24
  MIN_DIM = 480
25
  SQUARE_DIM = 640
26
  MULTIPLE_OF = 16
27
+
28
+ MAX_SEED = np.iinfo(np.int32).max
29
+
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  FIXED_FPS = 16
31
  MIN_FRAMES_MODEL = 8
32
+ MAX_FRAMES_MODEL = 80
33
+
34
  MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
35
  MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
36
+
37
+ # =========================================================
38
+ # LOAD PIPELINE
39
+ # =========================================================
40
+ pipe = WanImageToVideoPipeline.from_pretrained(
41
+ MODEL_ID,
42
+ transformer=WanTransformer3DModel.from_pretrained(
43
+ MODEL_ID,
44
+ subfolder="transformer",
45
+ torch_dtype=torch.bfloat16,
46
+ device_map="cuda",
47
+ ),
48
+ transformer_2=WanTransformer3DModel.from_pretrained(
49
+ MODEL_ID,
50
+ subfolder="transformer_2",
51
+ torch_dtype=torch.bfloat16,
52
+ device_map="cuda",
53
+ ),
54
+ torch_dtype=torch.bfloat16,
55
+ ).to("cuda")
56
+
57
+ # =========================================================
58
+ # LOAD LORA ADAPTERS
59
+ # =========================================================
60
+ pipe.load_lora_weights(
61
+ "Kijai/WanVideo_comfy",
62
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
63
+ adapter_name="lightx2v"
64
+ )
65
+ pipe.load_lora_weights(
66
+ "Kijai/WanVideo_comfy",
67
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
68
+ adapter_name="lightx2v_2",
69
+ load_into_transformer_2=True
70
+ )
71
+
72
+ pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
73
+ pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
74
+ pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
75
+ pipe.unload_lora_weights()
76
+
77
+ # =========================================================
78
+ # QUANTIZATION & AOT OPTIMIZATION
79
+ # =========================================================
80
+ quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
81
+ quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
82
+ quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
83
+
84
+ aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
85
+ aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
86
+
87
+ # =========================================================
88
+ # DEFAULT PROMPTS
89
+ # =========================================================
90
+ default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
91
+ default_negative_prompt = (
92
+ "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, "
93
+ "最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, "
94
+ "畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
95
+ )
96
+
97
+ # =========================================================
98
+ # IMAGE RESIZING LOGIC
99
+ # =========================================================
100
+ def resize_image(image: Image.Image) -> Image.Image:
101
  width, height = image.size
102
+ if width == height:
103
+ return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
104
+
105
+ aspect_ratio = width / height
106
+ MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
107
+ MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
108
+
109
+ image_to_resize = image
110
+
111
+ if aspect_ratio > MAX_ASPECT_RATIO:
112
+ crop_width = int(round(height * MAX_ASPECT_RATIO))
113
+ left = (width - crop_width) // 2
114
+ image_to_resize = image.crop((left, 0, left + crop_width, height))
115
+ elif aspect_ratio < MIN_ASPECT_RATIO:
116
+ crop_height = int(round(width / MIN_ASPECT_RATIO))
117
+ top = (height - crop_height) // 2
118
+ image_to_resize = image.crop((0, top, width, top + crop_height))
119
+
120
+ if width > height:
121
+ target_w = MAX_DIM
122
+ target_h = int(round(target_w / aspect_ratio))
123
  else:
124
+ target_h = MAX_DIM
125
+ target_w = int(round(target_h * aspect_ratio))
126
+
127
+ final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
128
+ final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
129
+
130
+ final_w = max(MIN_DIM, min(MAX_DIM, final_w))
131
+ final_h = max(MIN_DIM, min(MAX_DIM, final_h))
132
+
133
+ return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
134
+
135
+ # =========================================================
136
+ # UTILITY FUNCTIONS
137
+ # =========================================================
138
+ def get_num_frames(duration_seconds: float):
139
+ return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
140
+
141
+ def get_duration(
142
+ input_image, prompt, steps, negative_prompt,
143
+ duration_seconds, guidance_scale, guidance_scale_2,
144
+ seed, randomize_seed, progress,
145
+ ):
146
+ BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
147
+ BASE_STEP_DURATION = 15
148
+ width, height = resize_image(input_image).size
149
+ frames = get_num_frames(duration_seconds)
150
+ factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
151
+ step_duration = BASE_STEP_DURATION * factor ** 1.5
152
+ return 10 + int(steps) * step_duration
153
+
154
+ # =========================================================
155
+ # MAIN GENERATION FUNCTION
156
+ # =========================================================
157
+ @spaces.GPU(duration=get_duration)
158
+ def generate_video(
159
+ input_image,
160
+ prompt,
161
+ steps=4,
162
+ negative_prompt=default_negative_prompt,
163
+ duration_seconds=MAX_DURATION,
164
+ guidance_scale=1,
165
+ guidance_scale_2=1,
166
+ seed=42,
167
+ randomize_seed=False,
168
+ progress=gr.Progress(track_tqdm=True),
169
+ ):
170
+ if input_image is None:
171
+ raise gr.Error("Please upload an input image.")
172
+
173
+ num_frames = get_num_frames(duration_seconds)
174
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
175
+ resized_image = resize_image(input_image)
176
+
177
+ output_frames_list = pipe(
178
+ image=resized_image,
179
+ prompt=prompt,
180
+ negative_prompt=negative_prompt,
181
+ height=resized_image.height,
182
+ width=resized_image.width,
183
+ num_frames=num_frames,
184
+ guidance_scale=float(guidance_scale),
185
+ guidance_scale_2=float(guidance_scale_2),
186
+ num_inference_steps=int(steps),
187
+ generator=torch.Generator(device="cuda").manual_seed(current_seed),
188
+ ).frames[0]
189
+
190
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
191
+ video_path = tmpfile.name
192
+ export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
193
+ return video_path, current_seed
194
+
195
+ # =========================================================
196
+ # GRADIO UI
197
+ # =========================================================
198
+ with gr.Blocks() as demo:
199
+ gr.Markdown("# 🚀 Dream Wan 2.2 Faster Pro (14B) — Ultra Fast I2V with Lightning LoRA")
200
+ gr.Markdown("Optimized FP8 quantized pipeline with AoT blocks & 4-step fast inference ⚡")
201
+
 
 
 
 
 
 
 
 
202
  with gr.Row():
203
+ with gr.Column():
204
+ input_image_component = gr.Image(type="pil", label="Input Image")
205
+ prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
206
+ duration_seconds_input = gr.Slider(
207
+ minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5,
208
+ label="Duration (seconds)",
209
+ info=f"Model range: {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
210
+ )
211
+
212
+ with gr.Accordion("Advanced Settings", open=False):
213
+ negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
214
+ seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
215
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
216
+ steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
217
+ guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale (high noise)")
218
+ guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 (low noise)")
219
+
220
+ generate_button = gr.Button("🎬 Generate Video", variant="primary")
221
+
222
+ with gr.Column():
223
+ video_output = gr.Video(label="Generated Video", autoplay=True)
224
+
225
+ ui_inputs = [
226
+ input_image_component, prompt_input, steps_slider,
227
+ negative_prompt_input, duration_seconds_input,
228
+ guidance_scale_input, guidance_scale_2_input,
229
+ seed_input, randomize_seed_checkbox
230
+ ]
231
+ generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
232
+
233
  gr.Examples(
234
  examples=[
235
  [
236
+ "wan_i2v_input.JPG",
237
+ "POV selfie video, white cat with sunglasses standing on surfboard, relaxed smile, tropical beach behind (clear water, green hills, blue sky with clouds). Surfboard tips, cat falls into ocean, camera plunges underwater with bubbles and sunlight beams. Brief underwater view of cat’s face, then cat resurfaces, still filming selfie, playful summer vacation mood.",
238
+ 4,
239
+ ],
 
 
240
  ],
241
+ inputs=[input_image_component, prompt_input, steps_slider],
242
+ outputs=[video_output, seed_input],
243
+ fn=generate_video,
244
+ cache_examples="lazy"
245
  )
246
+
247
  if __name__ == "__main__":
248
+ demo.queue().launch(mcp_server=True)