Spaces:
Paused
Paused
Update app_quant_latent.py
Browse files- app_quant_latent.py +18 -16
app_quant_latent.py
CHANGED
|
@@ -691,7 +691,6 @@ def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scal
|
|
| 691 |
def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
|
| 692 |
LOGS = []
|
| 693 |
device = "cuda"
|
| 694 |
-
cpu_device = "cpu"
|
| 695 |
generator = torch.Generator(device).manual_seed(int(seed))
|
| 696 |
|
| 697 |
placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
|
|
@@ -701,31 +700,31 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
|
|
| 701 |
last_latents = []
|
| 702 |
|
| 703 |
try:
|
| 704 |
-
# ---
|
| 705 |
latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
|
| 706 |
latents = latents.float().to(device)
|
| 707 |
|
| 708 |
-
# ---
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
)
|
| 717 |
-
latents = partial_output # latents now contain partial image info
|
| 718 |
|
| 719 |
-
# ---
|
| 720 |
num_previews = 5
|
| 721 |
for i, alpha in enumerate(np.linspace(0.2, 1.0, num_previews)):
|
| 722 |
try:
|
| 723 |
-
|
|
|
|
| 724 |
preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
|
| 725 |
|
|
|
|
| 726 |
decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
|
| 727 |
decoded = (decoded / 2 + 0.5).clamp(0, 1)
|
| 728 |
-
decoded = decoded.cpu().permute(0, 2, 3, 1).
|
| 729 |
decoded = (decoded * 255).round().astype("uint8")
|
| 730 |
latent_img = Image.fromarray(decoded[0])
|
| 731 |
|
|
@@ -742,7 +741,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
|
|
| 742 |
latent_gallery.append(placeholder)
|
| 743 |
yield None, latent_gallery[-5:], LOGS
|
| 744 |
|
| 745 |
-
# ---
|
| 746 |
try:
|
| 747 |
output = pipe(
|
| 748 |
prompt=prompt,
|
|
@@ -763,6 +762,9 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
|
|
| 763 |
final_gallery.append(placeholder)
|
| 764 |
latent_gallery.append(placeholder)
|
| 765 |
yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
|
|
|
|
|
|
|
|
|
|
| 766 |
# this is astable vesopn tha can gen final and a noise to latent
|
| 767 |
@spaces.GPU
|
| 768 |
def generate_image_verygood_realnoise(prompt, height, width, steps, seed, guidance_scale=0.0):
|
|
|
|
| 691 |
def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
|
| 692 |
LOGS = []
|
| 693 |
device = "cuda"
|
|
|
|
| 694 |
generator = torch.Generator(device).manual_seed(int(seed))
|
| 695 |
|
| 696 |
placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
|
|
|
|
| 700 |
last_latents = []
|
| 701 |
|
| 702 |
try:
|
| 703 |
+
# --- Step 1: generate initial noise latents ---
|
| 704 |
latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
|
| 705 |
latents = latents.float().to(device)
|
| 706 |
|
| 707 |
+
# --- Step 2: partially denoise latents using a few diffusion steps ---
|
| 708 |
+
partial_latents = pipe(
|
| 709 |
+
prompt=prompt,
|
| 710 |
+
num_inference_steps=min(3, steps), # 1-3 steps to inject image info
|
| 711 |
+
guidance_scale=guidance_scale,
|
| 712 |
+
generator=generator,
|
| 713 |
+
output_type="latent"
|
| 714 |
+
)
|
|
|
|
|
|
|
| 715 |
|
| 716 |
+
# --- Step 3: produce last 5 previews by interpolating partial -> final latent ---
|
| 717 |
num_previews = 5
|
| 718 |
for i, alpha in enumerate(np.linspace(0.2, 1.0, num_previews)):
|
| 719 |
try:
|
| 720 |
+
# Linear interpolation between partial_latents and original
|
| 721 |
+
preview_latent = partial_latents * alpha + latents * (1 - alpha)
|
| 722 |
preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
|
| 723 |
|
| 724 |
+
# Decode preview
|
| 725 |
decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
|
| 726 |
decoded = (decoded / 2 + 0.5).clamp(0, 1)
|
| 727 |
+
decoded = decoded.cpu().permute(0, 2, 3, 1).numpy()
|
| 728 |
decoded = (decoded * 255).round().astype("uint8")
|
| 729 |
latent_img = Image.fromarray(decoded[0])
|
| 730 |
|
|
|
|
| 741 |
latent_gallery.append(placeholder)
|
| 742 |
yield None, latent_gallery[-5:], LOGS
|
| 743 |
|
| 744 |
+
# --- Step 4: generate final image ---
|
| 745 |
try:
|
| 746 |
output = pipe(
|
| 747 |
prompt=prompt,
|
|
|
|
| 762 |
final_gallery.append(placeholder)
|
| 763 |
latent_gallery.append(placeholder)
|
| 764 |
yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
|
| 765 |
+
|
| 766 |
+
|
| 767 |
+
|
| 768 |
# this is astable vesopn tha can gen final and a noise to latent
|
| 769 |
@spaces.GPU
|
| 770 |
def generate_image_verygood_realnoise(prompt, height, width, steps, seed, guidance_scale=0.0):
|