Spaces:

anycoderapps
/

LongCat-Image-Edit

Running on Zero

File size: 7,999 Bytes

01901fb

import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline
import numpy as np
import os

# Global variables for model
pipe = None
device = None

def initialize_model():
    """Initialize the model on first use"""
    global pipe, device
    
    if pipe is not None:
        return
    
    try:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        checkpoint_dir = './weights/LongCat-Image-Edit'
        
        # Check if model exists
        if not os.path.exists(checkpoint_dir):
            raise ValueError(f"Model not found at {checkpoint_dir}. Please download the model first.")
        
        text_processor = AutoProcessor.from_pretrained(
            checkpoint_dir, 
            subfolder='tokenizer'
        )
        
        transformer = LongCatImageTransformer2DModel.from_pretrained(
            checkpoint_dir, 
            subfolder='transformer',
            torch_dtype=torch.bfloat16, 
            use_safetensors=True
        ).to(device)
        
        pipe = LongCatImageEditPipeline.from_pretrained(
            checkpoint_dir,
            transformer=transformer,
            text_processor=text_processor,
        )
        pipe.to(device, torch.bfloat16)
        
        print(f"✅ Model loaded successfully on {device}")
        
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        raise

def edit_image(
    input_image: Image.Image,
    prompt: str,
    negative_prompt: str,
    guidance_scale: float,
    num_inference_steps: int,
    seed: int,
    progress=gr.Progress()
):
    """Edit image based on text prompt"""
    
    if input_image is None:
        raise gr.Error("Please upload an image first")
    
    if not prompt or prompt.strip() == "":
        raise gr.Error("Please enter an edit instruction")
    
    try:
        # Initialize model if not already loaded
        initialize_model()
        
        progress(0.1, desc="Preparing image...")
        
        # Convert to RGB if needed
        if input_image.mode != 'RGB':
            input_image = input_image.convert('RGB')
        
        progress(0.2, desc="Generating edited image...")
        
        # Set random seed for reproducibility
        generator = torch.Generator("cpu").manual_seed(seed)
        
        # Run the pipeline
        with torch.inference_mode():
            output = pipe(
                input_image,
                prompt,
                negative_prompt=negative_prompt,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=generator
            )
        
        progress(1.0, desc="Done!")
        
        edited_image = output.images[0]
        
        return edited_image
        
    except Exception as e:
        raise gr.Error(f"Error during image editing: {str(e)}")

# Example prompts
example_prompts = [
    ["将猫变成狗", "", 4.5, 50, 42],
    ["Change the cat to a dog", "", 4.5, 50, 42],
    ["将背景变成海滩", "", 4.5, 50, 43],
    ["Make it nighttime", "", 4.5, 50, 44],
    ["将图片转换为油画风格", "", 4.5, 50, 45],
]

# Build Gradio interface
with gr.Blocks(fill_height=True) as demo:
    gr.HTML("""
        <div style="text-align: center; margin-bottom: 20px;">
            <h1>🎨 LongCat Image Edit</h1>
            <p style="font-size: 16px; color: #666;">
                Transform your images with AI-powered editing using natural language instructions
            </p>
            <p style="font-size: 14px; margin-top: 10px;">
                Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
            </p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Input")
            input_image = gr.Image(
                label="Upload Image",
                type="pil",
                sources=["upload", "clipboard"],
                height=400
            )
            
            prompt = gr.Textbox(
                label="Edit Instruction",
                placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
                lines=3
            )
            
            with gr.Accordion("⚙️ Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (Optional)",
                    placeholder="What you don't want in the image",
                    lines=2
                )
                
                guidance_scale = gr.Slider(
                    minimum=1.0,
                    maximum=10.0,
                    value=4.5,
                    step=0.5,
                    label="Guidance Scale",
                    info="Higher values = stronger adherence to prompt"
                )
                
                num_inference_steps = gr.Slider(
                    minimum=20,
                    maximum=100,
                    value=50,
                    step=5,
                    label="Inference Steps",
                    info="More steps = higher quality but slower"
                )
                
                seed = gr.Slider(
                    minimum=0,
                    maximum=999999,
                    value=42,
                    step=1,
                    label="Random Seed",
                    info="Use same seed for reproducible results"
                )
            
            edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
            
        with gr.Column(scale=1):
            gr.Markdown("### 🎯 Output")
            output_image = gr.Image(
                label="Edited Image",
                type="pil",
                height=400,
                show_download_button=True
            )
            
            gr.Markdown("### 💡 Tips")
            gr.Markdown("""
            - Upload a clear, well-lit image for best results
            - Be specific in your edit instructions
            - Supports both English and Chinese prompts
            - Try different guidance scales for varied results
            - Higher inference steps = better quality (but slower)
            """)
    
    # Examples section
    gr.Markdown("### 📝 Example Prompts")
    gr.Examples(
        examples=example_prompts,
        inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
        label="Click to try these examples"
    )
    
    # Event handlers
    edit_btn.click(
        fn=edit_image,
        inputs=[
            input_image,
            prompt,
            negative_prompt,
            guidance_scale,
            num_inference_steps,
            seed
        ],
        outputs=output_image,
        api_name="edit_image"
    )
    
    # Footer
    gr.HTML("""
        <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
            <p style="color: #666; font-size: 14px;">
                Powered by LongCat Image Edit | 
                <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
            </p>
        </div>
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo",
            neutral_hue="slate",
            font=gr.themes.GoogleFont("Inter"),
            text_size="lg",
            spacing_size="lg",
            radius_size="md"
        ),
        footer_links=[
            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
        ]
    )