|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
import random |
|
|
import os |
|
|
from diffusers import DiffusionPipeline |
|
|
import imageio |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
pipe = DiffusionPipeline.from_pretrained("stepfun-ai/stepvideo-t2v", torch_dtype=torch_dtype) |
|
|
pipe = pipe.to(device) |
|
|
|
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
|
|
|
def infer(prompt, seed, randomize_seed, num_inference_steps): |
|
|
if randomize_seed: |
|
|
seed = random.randint(0, MAX_SEED) |
|
|
generator = torch.manual_seed(seed) |
|
|
|
|
|
output = pipe(prompt=prompt, num_inference_steps=num_inference_steps, generator=generator) |
|
|
frames = output.frames[0] |
|
|
|
|
|
video_path = "/tmp/video.mp4" |
|
|
imageio.mimsave(video_path, frames, fps=8) |
|
|
|
|
|
return video_path, seed |
|
|
|
|
|
examples = [ |
|
|
"Astronaut dancing on Mars, cinematic lighting", |
|
|
"A cat flying through the city on a skateboard", |
|
|
"Robot chef cooking in a futuristic kitchen" |
|
|
] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Text-to-Video with `stepvideo-t2v`") |
|
|
|
|
|
with gr.Row(): |
|
|
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here") |
|
|
run_btn = gr.Button("Generate Video") |
|
|
|
|
|
with gr.Row(): |
|
|
video_output = gr.Video(label="Generated Video") |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) |
|
|
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) |
|
|
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, value=25) |
|
|
|
|
|
gr.Examples(examples=examples, inputs=[prompt]) |
|
|
|
|
|
run_btn.click(fn=infer, inputs=[prompt, seed, randomize_seed, num_inference_steps], outputs=[video_output, seed]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |