Spaces:

anycoderapps
/

LongCat-Image-Edit

Running on Zero

App Files Files Community

LongCat-Image-Edit / app.py

akhaliq HF Staff

🎨 Redesign from AnyCoder

01901fb verified 7 days ago

raw

history blame

8 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoProcessor
	from longcat_image.models import LongCatImageTransformer2DModel
	from longcat_image.pipelines import LongCatImageEditPipeline
	import numpy as np
	import os

	# Global variables for model
	pipe = None
	device = None

	def initialize_model():
	"""Initialize the model on first use"""
	global pipe, device

	if pipe is not None:
	return

	try:
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	checkpoint_dir = './weights/LongCat-Image-Edit'

	# Check if model exists
	if not os.path.exists(checkpoint_dir):
	raise ValueError(f"Model not found at {checkpoint_dir}. Please download the model first.")

	text_processor = AutoProcessor.from_pretrained(
	checkpoint_dir,
	subfolder='tokenizer'
	)

	transformer = LongCatImageTransformer2DModel.from_pretrained(
	checkpoint_dir,
	subfolder='transformer',
	torch_dtype=torch.bfloat16,
	use_safetensors=True
	).to(device)

	pipe = LongCatImageEditPipeline.from_pretrained(
	checkpoint_dir,
	transformer=transformer,
	text_processor=text_processor,
	)
	pipe.to(device, torch.bfloat16)

	print(f"✅ Model loaded successfully on {device}")

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise

	def edit_image(
	input_image: Image.Image,
	prompt: str,
	negative_prompt: str,
	guidance_scale: float,
	num_inference_steps: int,
	seed: int,
	progress=gr.Progress()
	):
	"""Edit image based on text prompt"""

	if input_image is None:
	raise gr.Error("Please upload an image first")

	if not prompt or prompt.strip() == "":
	raise gr.Error("Please enter an edit instruction")

	try:
	# Initialize model if not already loaded
	initialize_model()

	progress(0.1, desc="Preparing image...")

	# Convert to RGB if needed
	if input_image.mode != 'RGB':
	input_image = input_image.convert('RGB')

	progress(0.2, desc="Generating edited image...")

	# Set random seed for reproducibility
	generator = torch.Generator("cpu").manual_seed(seed)

	# Run the pipeline
	with torch.inference_mode():
	output = pipe(
	input_image,
	prompt,
	negative_prompt=negative_prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=1,
	generator=generator
	)

	progress(1.0, desc="Done!")

	edited_image = output.images[0]

	return edited_image

	except Exception as e:
	raise gr.Error(f"Error during image editing: {str(e)}")

	# Example prompts
	example_prompts = [
	["将猫变成狗", "", 4.5, 50, 42],
	["Change the cat to a dog", "", 4.5, 50, 42],
	["将背景变成海滩", "", 4.5, 50, 43],
	["Make it nighttime", "", 4.5, 50, 44],
	["将图片转换为油画风格", "", 4.5, 50, 45],
	]

	# Build Gradio interface
	with gr.Blocks(fill_height=True) as demo:
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🎨 LongCat Image Edit</h1>
	<p style="font-size: 16px; color: #666;">
	Transform your images with AI-powered editing using natural language instructions
	</p>
	<p style="font-size: 14px; margin-top: 10px;">
	Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Input")
	input_image = gr.Image(
	label="Upload Image",
	type="pil",
	sources=["upload", "clipboard"],
	height=400
	)

	prompt = gr.Textbox(
	label="Edit Instruction",
	placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
	lines=3
	)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	negative_prompt = gr.Textbox(
	label="Negative Prompt (Optional)",
	placeholder="What you don't want in the image",
	lines=2
	)

	guidance_scale = gr.Slider(
	minimum=1.0,
	maximum=10.0,
	value=4.5,
	step=0.5,
	label="Guidance Scale",
	info="Higher values = stronger adherence to prompt"
	)

	num_inference_steps = gr.Slider(
	minimum=20,
	maximum=100,
	value=50,
	step=5,
	label="Inference Steps",
	info="More steps = higher quality but slower"
	)

	seed = gr.Slider(
	minimum=0,
	maximum=999999,
	value=42,
	step=1,
	label="Random Seed",
	info="Use same seed for reproducible results"
	)

	edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 🎯 Output")
	output_image = gr.Image(
	label="Edited Image",
	type="pil",
	height=400,
	show_download_button=True
	)

	gr.Markdown("### 💡 Tips")
	gr.Markdown("""
	- Upload a clear, well-lit image for best results
	- Be specific in your edit instructions
	- Supports both English and Chinese prompts
	- Try different guidance scales for varied results
	- Higher inference steps = better quality (but slower)
	""")

	# Examples section
	gr.Markdown("### 📝 Example Prompts")
	gr.Examples(
	examples=example_prompts,
	inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
	label="Click to try these examples"
	)

	# Event handlers
	edit_btn.click(
	fn=edit_image,
	inputs=[
	input_image,
	prompt,
	negative_prompt,
	guidance_scale,
	num_inference_steps,
	seed
	],
	outputs=output_image,
	api_name="edit_image"
	)

	# Footer
	gr.HTML("""
	<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
	<p style="color: #666; font-size: 14px;">
	Powered by LongCat Image Edit \|
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
	</p>
	</div>
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	),
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
	]
	)