Spaces:

dream2589632147
/

Dream-MultiStyle-Video-Colorizer

Running on Zero

App Files Files Community

dream2589632147 commited on Nov 13

Commit

cecbfad

verified ·

1 Parent(s): 8fd1ad8

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -79

app.py CHANGED Viewed

@@ -6,24 +6,23 @@ from PIL import Image
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
 from controlnet_aux import CannyDetector
 from diffusers.utils import load_image
-from moviepy.editor import VideoFileClip
 import os
 import shutil
 import tempfile
 import datetime
 # 1. تهيئة النموذج
-# تحديد جهاز التشغيل (GPU إذا كان متاحًا)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
 try:
     print(f"Loading models on: {device}...")
-    # تحميل ControlNet (نموذج Canny)
     controlnet_model = ControlNetModel.from_pretrained(
         "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
     )
-    # تحميل الـ Pipeline الرئيسية
     model_id = "runwayml/stable-diffusion-v1-5"
     pipe = StableDiffusionControlNetPipeline.from_pretrained(
         model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
@@ -33,100 +32,106 @@ try:
     print("Models loaded successfully.")
 except Exception as e:
     print(f"Error loading models on CUDA: {e}. Switching to CPU.")
-    # محاولة التحميل على CPU إذا فشل CUDA
     controlnet_model = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
     pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, controlnet=controlnet_model)
     pipe.to("cpu")
-# تهيئة مُعالِج Canny
 canny_processor = CannyDetector()
 # 2. دالة معالجة الفيديو والنموذج
 def colorize_video_multistyle(video_file, reference_image_path, prompt, style_choice, steps=25):
-    # 0. إنشاء اسم ملف فريد للناتج
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     final_output_name = f"colored_output_{timestamp}.mp4"
-    # استخدام مجلد مؤقت لضمان التنظيف
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # 1. استخراج الإطارات والصوت
-        clip = VideoFileClip(video_file)
-        audio_file = None
-        if clip.audio:
-            audio_file = os.path.join(temp_dir, "temp_audio.mp3")
-            clip.audio.write_audiofile(audio_file, verbose=False, logger=None)
-        fps = clip.fps
-        # 2. تجهيز المدخلات للنموذج
-        style_prompts = {
-            "Auto Color": "photorealistic color photo, cinematic, detailed, masterpiece",
-            "Vivid": "highly saturated, vibrant color photo, pop art colors",
-            "Vintage": "sepia tone, old film grain, 1940s vintage look",
-        }
-        final_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}"
-        negative_prompt = "lowres, bad anatomy, bad hands, blurry, distorted, nsfw, frame, border, changed details, monochrome"
-        # تجهيز الصورة المرجعية (إذا تم تحميل IP-Adapter)
-        # ip_adapter_images = []
-        # if reference_image_path:
-        #     ref_image = load_image(reference_image_path).convert("RGB")
-        #     ip_adapter_images.append(ref_image)
-            # pipe.set_ip_adapter_images(ip_adapter_images)
-        colored_frames = []
-        # 3. معالجة الإطارات (التلوين باستخدام ControlNet)
-        for i, frame in enumerate(clip.iter_frames(fps=fps, dtype='uint8')):
-            pil_image = Image.fromarray(frame).convert("RGB")
-            # استخراج خريطة Canny للحفاظ على الهيكل
-            canny_image = canny_processor(pil_image)
-            # تمرير خريطة Canny للنموذج
-            image_out = pipe(
-                prompt=final_prompt,
-                negative_prompt=negative_prompt,
-                image=canny_image, # ControlNet Canny Input
-                num_inference_steps=steps,
-                guidance_scale=7.5
-            ).images[0]
-            colored_frames.append(np.array(image_out))
-        # 4. تجميع الإطارات في فيديو مؤقت (AVI)
-        # نستخدم ترميز MJPG و AVI كملف مؤقت لموثوقية OpenCV
-        output_video_path = os.path.join(temp_dir, "colored_temp_video.avi")
-        height, width, layers = colored_frames[0].shape
-        # MJPG هو ترميز موثوق به في بيئات Linux/Docker مع OpenCV
-        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
-        out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
-        for frame in colored_frames:
-            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        out.release()
-        # 5. إضافة الصوت الأصلي وإعادة الترميز لـ MP4 باستخدام MoviePy
-        if audio_file and os.path.exists(audio_file):
-            video_clip = VideoFileClip(output_video_path)
-            final_clip = video_clip.set_audio(clip.audio)
-            # MoviePy تتولى إنشاء MP4 النهائي بترميمز libx264 الموثوق
-            final_clip.write_videofile(final_output_name, codec='libx264', audio_codec='aac', verbose=False, logger=None)
-        else:
-            shutil.copy(output_video_path, final_output_name)
-        clip.close()
-        return final_output_name
-# 3. واجهة Gradio النهائية
 iface = gr.Interface(
     fn=colorize_video_multistyle,
     inputs=[

 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
 from controlnet_aux import CannyDetector
 from diffusers.utils import load_image
+# تم إزالة: from moviepy.editor import VideoFileClip
 import os
 import shutil
 import tempfile
 import datetime
+import ffmpeg # المكتبة الجديدة
 # 1. تهيئة النموذج
+# ... (كود التهيئة يبقى كما هو)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
 try:
     print(f"Loading models on: {device}...")
     controlnet_model = ControlNetModel.from_pretrained(
         "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
     )
     model_id = "runwayml/stable-diffusion-v1-5"
     pipe = StableDiffusionControlNetPipeline.from_pretrained(
         model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
     print("Models loaded successfully.")
 except Exception as e:
     print(f"Error loading models on CUDA: {e}. Switching to CPU.")
     controlnet_model = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
     pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, controlnet=controlnet_model)
     pipe.to("cpu")
 canny_processor = CannyDetector()
 # 2. دالة معالجة الفيديو والنموذج
 def colorize_video_multistyle(video_file, reference_image_path, prompt, style_choice, steps=25):
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_temp_video_no_audio = os.path.join(tempfile.gettempdir(), f"temp_colored_{timestamp}_no_audio.mp4")
     final_output_name = f"colored_output_{timestamp}.mp4"
+    # === 1. استخراج الإطارات و الصوت (باستخدام FFMPEG مباشرةً عبر OpenCV) ===
+    # نستخدم FFMPEG-Python لاستخراج مسار ملف الصوت المؤقت
+    # 1.1 استخراج الصوت
+    audio_path = os.path.join(tempfile.gettempdir(), f"temp_audio_{timestamp}.aac")
+    try:
+        (
+            ffmpeg
+            .input(video_file)
+            .output(audio_path, acodec='copy')
+            .run(overwrite_output=True, quiet=True)
+        )
+        audio_exists = True
+    except ffmpeg.Error:
+        audio_exists = False
+        print("No audio found or extraction failed. Proceeding without audio.")
+    # 1.2 قراءة الفيديو للإطارات
+    cap = cv2.VideoCapture(video_file)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    # 2. تجهيز المدخلات للنموذج (كود الأنماط يبقى كما هو)
+    style_prompts = {
+        "Auto Color": "photorealistic color photo, cinematic, detailed, masterpiece",
+        "Vivid": "highly saturated, vibrant color photo, pop art colors",
+        "Vintage": "sepia tone, old film grain, 1940s vintage look",
+    }
+    final_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}"
+    negative_prompt = "lowres, bad anatomy, bad hands, blurry, distorted, nsfw, frame, border, changed details, monochrome"
+    colored_frames = []
+    # 3. معالجة الإطارات (التلوين)
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        canny_image = canny_processor(pil_image)
+        image_out = pipe(
+            prompt=final_prompt,
+            negative_prompt=negative_prompt,
+            image=canny_image,
+            num_inference_steps=steps,
+            guidance_scale=7.5
+        ).images[0]
+        colored_frames.append(np.array(image_out))
+    cap.release()
+    # 4. تجميع الإطارات في فيديو مؤقت (MP4) باستخدام OpenCV
+    # نستخدم MP4V-2 لتجنب الاعتماد على الترميز الخارجي
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_temp_video_no_audio, fourcc, fps, (width, height))
+    for frame in colored_frames:
+        out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
+    out.release()
+    # 5. دمج الفيديو الملون مع الصوت الأصلي باستخدام FFMPEG-Python
+    if audio_exists:
+        try:
+            (
+                ffmpeg
+                .input(output_temp_video_no_audio)
+                .output(ffmpeg.input(audio_path).audio, final_output_name, vcodec='copy', acodec='copy')
+                .run(overwrite_output=True, quiet=True)
+            )
+        except ffmpeg.Error as e:
+            print(f"FFMPEG merge failed: {e.stderr.decode('utf8')}")
+            shutil.copy(output_temp_video_no_audio, final_output_name) # العودة إلى الفيديو بدون صوت
+    else:
+        shutil.copy(output_temp_video_no_audio, final_output_name)
+    # 6. تنظيف الملفات المؤقتة
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+    return final_output_name
+# 3. واجهة Gradio النهائية (بدون تغيير)
 iface = gr.Interface(
     fn=colorize_video_multistyle,
     inputs=[