akhaliq's picture
akhaliq HF Staff
🎨 Redesign from AnyCoder
01901fb verified
raw
history blame
8 kB
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline
import numpy as np
import os
# Global variables for model
pipe = None
device = None
def initialize_model():
"""Initialize the model on first use"""
global pipe, device
if pipe is not None:
return
try:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
checkpoint_dir = './weights/LongCat-Image-Edit'
# Check if model exists
if not os.path.exists(checkpoint_dir):
raise ValueError(f"Model not found at {checkpoint_dir}. Please download the model first.")
text_processor = AutoProcessor.from_pretrained(
checkpoint_dir,
subfolder='tokenizer'
)
transformer = LongCatImageTransformer2DModel.from_pretrained(
checkpoint_dir,
subfolder='transformer',
torch_dtype=torch.bfloat16,
use_safetensors=True
).to(device)
pipe = LongCatImageEditPipeline.from_pretrained(
checkpoint_dir,
transformer=transformer,
text_processor=text_processor,
)
pipe.to(device, torch.bfloat16)
print(f"βœ… Model loaded successfully on {device}")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise
def edit_image(
input_image: Image.Image,
prompt: str,
negative_prompt: str,
guidance_scale: float,
num_inference_steps: int,
seed: int,
progress=gr.Progress()
):
"""Edit image based on text prompt"""
if input_image is None:
raise gr.Error("Please upload an image first")
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter an edit instruction")
try:
# Initialize model if not already loaded
initialize_model()
progress(0.1, desc="Preparing image...")
# Convert to RGB if needed
if input_image.mode != 'RGB':
input_image = input_image.convert('RGB')
progress(0.2, desc="Generating edited image...")
# Set random seed for reproducibility
generator = torch.Generator("cpu").manual_seed(seed)
# Run the pipeline
with torch.inference_mode():
output = pipe(
input_image,
prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
num_images_per_prompt=1,
generator=generator
)
progress(1.0, desc="Done!")
edited_image = output.images[0]
return edited_image
except Exception as e:
raise gr.Error(f"Error during image editing: {str(e)}")
# Example prompts
example_prompts = [
["ε°†ηŒ«ε˜ζˆη‹—", "", 4.5, 50, 42],
["Change the cat to a dog", "", 4.5, 50, 42],
["ε°†θƒŒζ™―ε˜ζˆζ΅·ζ»©", "", 4.5, 50, 43],
["Make it nighttime", "", 4.5, 50, 44],
["ε°†ε›Ύη‰‡θ½¬ζ’δΈΊζ²Ήη”»ι£Žζ Ό", "", 4.5, 50, 45],
]
# Build Gradio interface
with gr.Blocks(fill_height=True) as demo:
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h1>🎨 LongCat Image Edit</h1>
<p style="font-size: 16px; color: #666;">
Transform your images with AI-powered editing using natural language instructions
</p>
<p style="font-size: 14px; margin-top: 10px;">
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ Input")
input_image = gr.Image(
label="Upload Image",
type="pil",
sources=["upload", "clipboard"],
height=400
)
prompt = gr.Textbox(
label="Edit Instruction",
placeholder="Describe how you want to edit the image (e.g., 'ε°†ηŒ«ε˜ζˆη‹—' or 'Change the cat to a dog')",
lines=3
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative Prompt (Optional)",
placeholder="What you don't want in the image",
lines=2
)
guidance_scale = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.5,
step=0.5,
label="Guidance Scale",
info="Higher values = stronger adherence to prompt"
)
num_inference_steps = gr.Slider(
minimum=20,
maximum=100,
value=50,
step=5,
label="Inference Steps",
info="More steps = higher quality but slower"
)
seed = gr.Slider(
minimum=0,
maximum=999999,
value=42,
step=1,
label="Random Seed",
info="Use same seed for reproducible results"
)
edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### 🎯 Output")
output_image = gr.Image(
label="Edited Image",
type="pil",
height=400,
show_download_button=True
)
gr.Markdown("### πŸ’‘ Tips")
gr.Markdown("""
- Upload a clear, well-lit image for best results
- Be specific in your edit instructions
- Supports both English and Chinese prompts
- Try different guidance scales for varied results
- Higher inference steps = better quality (but slower)
""")
# Examples section
gr.Markdown("### πŸ“ Example Prompts")
gr.Examples(
examples=example_prompts,
inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
label="Click to try these examples"
)
# Event handlers
edit_btn.click(
fn=edit_image,
inputs=[
input_image,
prompt,
negative_prompt,
guidance_scale,
num_inference_steps,
seed
],
outputs=output_image,
api_name="edit_image"
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
<p style="color: #666; font-size: 14px;">
Powered by LongCat Image Edit |
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
</p>
</div>
""")
# Launch the app
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
]
)