rahul7star commited on
Commit
4679639
·
verified ·
1 Parent(s): 1eead2e

Update app_quant_latent.py

Browse files
Files changed (1) hide show
  1. app_quant_latent.py +18 -16
app_quant_latent.py CHANGED
@@ -691,7 +691,6 @@ def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scal
691
  def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
692
  LOGS = []
693
  device = "cuda"
694
- cpu_device = "cpu"
695
  generator = torch.Generator(device).manual_seed(int(seed))
696
 
697
  placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
@@ -701,31 +700,31 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
701
  last_latents = []
702
 
703
  try:
704
- # --- Initial latents (noise) ---
705
  latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
706
  latents = latents.float().to(device)
707
 
708
- # --- Run 1-2 diffusion steps to inject prompt info ---
709
- with torch.no_grad():
710
- partial_output = pipe(
711
- prompt=prompt,
712
- num_inference_steps=min(2, steps),
713
- guidance_scale=guidance_scale,
714
- generator=generator,
715
- output_type="latent"
716
- )
717
- latents = partial_output # latents now contain partial image info
718
 
719
- # --- Last 5 previews: interpolate from partial to final latent ---
720
  num_previews = 5
721
  for i, alpha in enumerate(np.linspace(0.2, 1.0, num_previews)):
722
  try:
723
- preview_latent = latents * alpha
 
724
  preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
725
 
 
726
  decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
727
  decoded = (decoded / 2 + 0.5).clamp(0, 1)
728
- decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
729
  decoded = (decoded * 255).round().astype("uint8")
730
  latent_img = Image.fromarray(decoded[0])
731
 
@@ -742,7 +741,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
742
  latent_gallery.append(placeholder)
743
  yield None, latent_gallery[-5:], LOGS
744
 
745
- # --- Final image: full pipeline ---
746
  try:
747
  output = pipe(
748
  prompt=prompt,
@@ -763,6 +762,9 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
763
  final_gallery.append(placeholder)
764
  latent_gallery.append(placeholder)
765
  yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
 
 
 
766
  # this is astable vesopn tha can gen final and a noise to latent
767
  @spaces.GPU
768
  def generate_image_verygood_realnoise(prompt, height, width, steps, seed, guidance_scale=0.0):
 
691
  def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
692
  LOGS = []
693
  device = "cuda"
 
694
  generator = torch.Generator(device).manual_seed(int(seed))
695
 
696
  placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
 
700
  last_latents = []
701
 
702
  try:
703
+ # --- Step 1: generate initial noise latents ---
704
  latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
705
  latents = latents.float().to(device)
706
 
707
+ # --- Step 2: partially denoise latents using a few diffusion steps ---
708
+ partial_latents = pipe(
709
+ prompt=prompt,
710
+ num_inference_steps=min(3, steps), # 1-3 steps to inject image info
711
+ guidance_scale=guidance_scale,
712
+ generator=generator,
713
+ output_type="latent"
714
+ )
 
 
715
 
716
+ # --- Step 3: produce last 5 previews by interpolating partial -> final latent ---
717
  num_previews = 5
718
  for i, alpha in enumerate(np.linspace(0.2, 1.0, num_previews)):
719
  try:
720
+ # Linear interpolation between partial_latents and original
721
+ preview_latent = partial_latents * alpha + latents * (1 - alpha)
722
  preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
723
 
724
+ # Decode preview
725
  decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
726
  decoded = (decoded / 2 + 0.5).clamp(0, 1)
727
+ decoded = decoded.cpu().permute(0, 2, 3, 1).numpy()
728
  decoded = (decoded * 255).round().astype("uint8")
729
  latent_img = Image.fromarray(decoded[0])
730
 
 
741
  latent_gallery.append(placeholder)
742
  yield None, latent_gallery[-5:], LOGS
743
 
744
+ # --- Step 4: generate final image ---
745
  try:
746
  output = pipe(
747
  prompt=prompt,
 
762
  final_gallery.append(placeholder)
763
  latent_gallery.append(placeholder)
764
  yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
765
+
766
+
767
+
768
  # this is astable vesopn tha can gen final and a noise to latent
769
  @spaces.GPU
770
  def generate_image_verygood_realnoise(prompt, height, width, steps, seed, guidance_scale=0.0):