Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 2 days ago

Commit

971ff27

verified ·

1 Parent(s): 4679639

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +35 -26

app_quant_latent.py CHANGED Viewed

@@ -691,57 +691,66 @@ def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scal
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
-    last_latents = []
     try:
-        # --- Step 1: generate initial noise latents ---
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        latents = latents.float().to(device)
-        # --- Step 2: partially denoise latents using a few diffusion steps ---
-        partial_latents = pipe(
-            prompt=prompt,
-            num_inference_steps=min(3, steps),  # 1-3 steps to inject image info
-            guidance_scale=guidance_scale,
-            generator=generator,
-            output_type="latent"
-        )
-        # --- Step 3: produce last 5 previews by interpolating partial -> final latent ---
-        num_previews = 5
-        for i, alpha in enumerate(np.linspace(0.2, 1.0, num_previews)):
             try:
-                # Linear interpolation between partial_latents and original
-                preview_latent = partial_latents * alpha + latents * (1 - alpha)
-                preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
-                # Decode preview
-                decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
-                decoded = (decoded / 2 + 0.5).clamp(0, 1)
-                decoded = decoded.cpu().permute(0, 2, 3, 1).numpy()
-                decoded = (decoded * 255).round().astype("uint8")
-                latent_img = Image.fromarray(decoded[0])
             except Exception as e:
                 LOGS.append(f"⚠️ Latent preview decode failed: {e}")
                 latent_img = placeholder
             latent_gallery.append(latent_img)
-            last_latents.append(preview_latent.cpu())
             yield None, latent_gallery[-5:], LOGS
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery[-5:], LOGS
-    # --- Step 4: generate final image ---
     try:
         output = pipe(
             prompt=prompt,
@@ -755,7 +764,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         final_gallery.append(final_img)
         latent_gallery.append(final_img)
         LOGS.append("✅ Standard pipeline succeeded.")
-        yield final_img, latent_gallery[-5:] + [final_img], LOGS
     except Exception as e2:
         LOGS.append(f"❌ Standard pipeline failed: {e2}")

 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
+    cpu_device = "cpu"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
+    last_latents = []  # store last 5 preview latents on CPU
     try:
+        # --- Initial latents ---
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.float().to(cpu_device)  # move to CPU
+        num_previews = min(10, steps)
+        preview_indices = torch.linspace(0, steps - 1, num_previews).long()
+        for i, step_idx in enumerate(preview_indices):
             try:
+                with torch.no_grad():
+                    # --- Z-Image Turbo-style denoise simulation ---
+                    t = 1.0 - (i / num_previews)  # linear decay [1.0 -> 0.0]
+                    noise_scale = t ** 0.5  # reduce noise over steps (sqrt for smoother)
+                    denoise_latent = latents * t + torch.randn_like(latents) * noise_scale
+                    # Move to VAE device & dtype
+                    denoise_latent = denoise_latent.to(pipe.vae.device).to(pipe.vae.dtype)
+                    # Decode latent to image
+                    decoded = pipe.vae.decode(denoise_latent, return_dict=False)[0]
+                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
+                    decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
+                    decoded = (decoded * 255).round().astype("uint8")
+                    latent_img = Image.fromarray(decoded[0])
             except Exception as e:
                 LOGS.append(f"⚠️ Latent preview decode failed: {e}")
                 latent_img = placeholder
             latent_gallery.append(latent_img)
+            # Keep last 5 latents only
+            last_latents.append(denoise_latent.cpu().clone())
+            if len(last_latents) > 5:
+                last_latents.pop(0)
+            # Show only last 5 previews in UI
             yield None, latent_gallery[-5:], LOGS
+        # Optionally: upload last 5 latents
+        # latent_dict = {"latents": last_latents, "prompt": prompt, "seed": seed}
+        # hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_last5_{seed}.pt")
+        # LOGS.append(f"🔹 Last 5 latents uploaded: {hf_url}")
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery[-5:], LOGS
+    # --- Final image on GPU ---
     try:
         output = pipe(
             prompt=prompt,
         final_gallery.append(final_img)
         latent_gallery.append(final_img)
         LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery[-5:] + [final_img], LOGS  # last 5 previews + final
     except Exception as e2:
         LOGS.append(f"❌ Standard pipeline failed: {e2}")