Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 1 day ago

Commit

9c8674b

verified ·

1 Parent(s): 6cd5c6f

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +17 -21

app_quant_latent.py CHANGED Viewed

@@ -691,56 +691,52 @@ def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scal
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
-    # --- Generate latent previews in a loop ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        # always keep latents float32 until decode
-        latents = latents.float()
         num_previews = min(10, steps)
         preview_steps = torch.linspace(0, 1, num_previews)
-        for i, alpha in enumerate(preview_steps):
             try:
                 with torch.no_grad():
-                    # --- simulate progression like Z-Image Turbo ---
-                    preview_latent = latents * alpha + torch.randn_like(latents) * (1 - alpha)
-                    # 🛠 FIX: move to same device as VAE and match dtype
                     preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
-                    # Decode latent
-                    decoded = pipe.vae.decode(preview_latent).sample  # [1,3,H,W]
-                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
-                    # Convert to PIL
-                    decoded = decoded[0].permute(1, 2, 0).cpu().numpy()  # HWC
-                    latent_img = Image.fromarray((decoded * 255).astype("uint8"))
             except Exception as e:
                 LOGS.append(f"⚠️ Latent preview decode failed: {e}")
                 latent_img = placeholder
             latent_gallery.append(latent_img)
-            yield None, latent_gallery, LOGS  # update Gradio with intermediate preview
-        # Optionally, you can store/upload last few latents here for later
-        # last_latents = latents[-4:].cpu()
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
-    # --- Final image: standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
@@ -752,7 +748,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         )
         final_img = output.images[0]
         final_gallery.append(final_img)
-        latent_gallery.append(final_img)  # fallback preview if needed
         LOGS.append("✅ Standard pipeline succeeded.")
         yield final_img, latent_gallery, LOGS

 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
+    device = "cuda"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
+    # --- Generate latent previews ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.float()  # keep float32 until decode
         num_previews = min(10, steps)
         preview_steps = torch.linspace(0, 1, num_previews)
+        for alpha in preview_steps:
             try:
                 with torch.no_grad():
+                    # Simulate denoising progression like Z-Image Turbo
+                    preview_latent = latents * alpha + latents * 0  # optional: simple progression
+                    # Move to same device and dtype as VAE
                     preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
+                    # Decode
+                    decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
+                    # Convert to PIL following same logic as final image
+                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
+                    decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
+                    decoded = (decoded * 255).round().astype("uint8")
+                    latent_img = Image.fromarray(decoded[0])
             except Exception as e:
                 LOGS.append(f"⚠️ Latent preview decode failed: {e}")
                 latent_img = placeholder
             latent_gallery.append(latent_img)
+            yield None, latent_gallery, LOGS
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
+    # --- Final image: untouched ---
     try:
         output = pipe(
             prompt=prompt,
         )
         final_img = output.images[0]
         final_gallery.append(final_img)
+        latent_gallery.append(final_img)  # fallback preview
         LOGS.append("✅ Standard pipeline succeeded.")
         yield final_img, latent_gallery, LOGS