Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 9 days ago

Commit

2e8da0d

verified ·

1 Parent(s): 26bc6ef

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +39 -43

app_quant_latent.py CHANGED Viewed

@@ -12,6 +12,7 @@ import time
 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 from diffusers import ZImagePipeline, AutoModel
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
 # ============================================================
 # LOGGING BUFFER
@@ -248,58 +249,53 @@ log_system_stats("AFTER PIPELINE BUILD")
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed):
- try:
-    generator = torch.Generator(device).manual_seed(seed)
-    latent_history = []
-    # Callback to save latents and GPU info
-    def save_latents(step, timestep, latents):
-        latent_history.append(latents.detach().clone())
-        gpu_mem = torch.cuda.memory_allocated(0)/1e9
-        log(f"Step {step} - GPU Memory Used: {gpu_mem:.2f} GB")
-    # Step-wise loop just for latent capture
-    for step, _ in pipe(
-        prompt=prompt,
-        height=height,
-        width=width,
-        num_inference_steps=steps,
-        guidance_scale=0.0,
-        generator=generator,
-        callback=save_latents,
-        callback_steps=1
-    ).iter():
-        pass  # only capturing latents, ignoring intermediate images
-    # Convert latents to PIL images for gallery
-    latent_images = []
-    for latent in latent_history:
-        try:
-            img_tensor = pipe.vae.decode(latent)
-            img_tensor = (img_tensor / 2 + 0.5).clamp(0, 1)
-            pil_img = T.ToPILImage()(img_tensor[0].cpu())
-            latent_images.append(pil_img)
-        except Exception as e:
-            log(f"⚠️ Failed to convert latent to image: {e}")
-    # Original final image generation
-    output = pipe(
         prompt=prompt,
         height=height,
         width=width,
         num_inference_steps=steps,
-        guidance_scale=0.0,
         generator=generator,
     )
-    log("✅ Inference finished.")
-    log_system_stats("AFTER INFERENCE")
-    return output.images[0], latent_images, LOGS
- except Exception as e:
-    log(f"❌ Inference error: {e}")
-    return None, None, LOGS
 # ============================================================

 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 from diffusers import ZImagePipeline, AutoModel
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
+latent_history = []
 # ============================================================
 # LOGGING BUFFER
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed):
+    global latent_history
+    latent_history = []   # reset every run
+    generator = torch.Generator("cuda").manual_seed(int(seed))
+    logs = []
+    def log(msg):
+        logs.append(msg)
+    # Run pipeline manually step by step
+    out = pipe(
         prompt=prompt,
         height=height,
         width=width,
         num_inference_steps=steps,
         generator=generator,
+        output_type="latent"
     )
+    latents = out.latents
+    # Denoising loop - MANUAL callback
+    for i, t in enumerate(pipe.scheduler.timesteps):
+        latents = pipe.unet(latents, t, encoder_hidden_states=out.prompt_embeds).sample
+        # Store cloned latent
+        latent_history.append(latents.detach().cpu().clone())
+        # Log GPU memory
+        gpu = torch.cuda.memory_allocated() / 1e9
+        log(f"Step {i+1}/{steps} — GPU: {gpu:.2f} GB")
+        # Step scheduler
+        latents = pipe.scheduler.step(latents, timestep=t).prev_sample
+    # Decode final image
+    final_image = pipe.vae.decode(latents / pipe.vae.config.scaling_factor).sample[0]
+    final_image = (final_image / 2 + 0.5).clamp(0,1).cpu().permute(1,2,0).numpy()
+    # Convert latents to preview images
+    latent_imgs = []
+    for l in latent_history:
+        img = pipe.vae.decode(l / pipe.vae.config.scaling_factor).sample[0]
+        img = (img / 2 + 0.5).clamp(0,1).cpu().permute(1,2,0).numpy()
+        latent_imgs.append(img)
+    return final_image, latent_imgs, "\n".join(logs)
 # ============================================================