Spaces:

AlserFurma
/

LipSyncAI

Running

App Files Files Community

AlserFurma commited on 15 days ago

Commit

80564a8

verified ·

1 Parent(s): 194447e

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -25

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from transformers import VitsModel, AutoTokenizer, pipeline
 import scipy.io.wavfile as wavfile
 import traceback
 import random
 # =========================
 # Параметры
@@ -88,7 +89,6 @@ def generate_quiz(text: str):
         raise ValueError(f"Ошибка генерации вопроса:\n{str(e)}\nМодель вернула: {out}")
 def synthesize_audio(text_ru: str):
     """Переводит русскую строку на казахский, синтезирует аудио и возвращает путь к файлу .wav"""
     translation = translator(text_ru, src_lang="rus_Cyrl", tgt_lang="kaz_Cyrl")
@@ -108,23 +108,50 @@ def synthesize_audio(text_ru: str):
     return tmpf.name
-def make_talking_head(image_path: str, audio_path: str):
     """Вызывает SkyReels/Talking Head space и возвращает путь или URL видео."""
-    client = Client(TALKING_HEAD_SPACE)
-    result = client.predict(
-        image_path=handle_file(image_path),
-        audio_path=handle_file(audio_path),
-        guidance_scale=3.0,
-        steps=10,
-        api_name="/process_image_audio"
-    )
-    if isinstance(result, dict) and "video" in result:
-        return result["video"]
-    elif isinstance(result, str):
-        return result
-    else:
-        raise ValueError(f"Unexpected talking head result: {type(result)}")
 # =========================
@@ -152,14 +179,16 @@ def start_lesson(image: Image.Image, text: str, state):
         state_data = {'image_path': image_path, 'correct': correct, 'options': options}
         # удаляем временный аудио файл
-        try: os.remove(audio_path)
-        except: pass
-        return video_path, question, options, state_data, state_data
     except Exception as e:
         traceback.print_exc()
-        return None, f"Ошибка: {e}", [], [], state
 def answer_selected(selected_option: str, state):
@@ -181,8 +210,10 @@ def answer_selected(selected_option: str, state):
         audio_path = synthesize_audio(reaction_ru)
         reaction_video = make_talking_head(image_path, audio_path)
-        try: os.remove(audio_path)
-        except: pass
         return reaction_video, display_message
@@ -227,10 +258,15 @@ with gr.Blocks() as demo:
         outputs=[out_video, out_question, btn_opt1, btn_opt2, lesson_state]
     )
-    btn_opt1.click(fn=answer_selected, inputs=[btn_opt1, lesson_state], outputs=[out_reaction_video, out_status])
-    btn_opt2.click(fn=answer_selected, inputs=[btn_opt2, lesson_state], outputs=[out_reaction_video, out_status])
     demo.load(lambda: "Готово", outputs=out_status)
 if __name__ == '__main__':
-    demo.launch()

 import scipy.io.wavfile as wavfile
 import traceback
 import random
+import time
 # =========================
 # Параметры
         raise ValueError(f"Ошибка генерации вопроса:\n{str(e)}\nМодель вернула: {out}")
 def synthesize_audio(text_ru: str):
     """Переводит русскую строку на казахский, синтезирует аудио и возвращает путь к файлу .wav"""
     translation = translator(text_ru, src_lang="rus_Cyrl", tgt_lang="kaz_Cyrl")
     return tmpf.name
+def make_talking_head(image_path: str, audio_path: str, max_retries=3):
     """Вызывает SkyReels/Talking Head space и возвращает путь или URL видео."""
+    for attempt in range(max_retries):
+        try:
+            client = Client(TALKING_HEAD_SPACE)
+            result = client.predict(
+                image_path=handle_file(image_path),
+                audio_path=handle_file(audio_path),
+                guidance_scale=3.0,
+                steps=10,
+                api_name="/process_image_audio"
+            )
+            # Отладочный вывод
+            print(f"Result type: {type(result)}")
+            print(f"Result content: {result}")
+            # Обработка различных форматов результата
+            if isinstance(result, tuple):
+                # Если результат - кортеж, берем первый элемент
+                video_path = result[0]
+                if isinstance(video_path, dict) and "video" in video_path:
+                    return video_path["video"]
+                elif isinstance(video_path, str):
+                    return video_path
+                else:
+                    # Если первый элемент не подходит, пробуем найти путь к видео в кортеже
+                    for item in result:
+                        if isinstance(item, str) and (item.endswith('.mp4') or item.endswith('.webm') or os.path.exists(str(item))):
+                            return item
+                    raise ValueError(f"Не удалось найти видео в результате: {result}")
+            elif isinstance(result, dict) and "video" in result:
+                return result["video"]
+            elif isinstance(result, str):
+                return result
+            else:
+                raise ValueError(f"Unexpected talking head result: {type(result)}, value: {result}")
+        except Exception as e:
+            if attempt < max_retries - 1:
+                print(f"Попытка {attempt + 1} не удалась: {e}. Повторяю через 2 секунды...")
+                time.sleep(2)
+            else:
+                raise Exception(f"Ошибка после {max_retries} попыток: {str(e)}")
 # =========================
         state_data = {'image_path': image_path, 'correct': correct, 'options': options}
         # удаляем временный аудио файл
+        try:
+            os.remove(audio_path)
+        except:
+            pass
+        return video_path, question, gr.Button(options[0], visible=True), gr.Button(options[1], visible=True), state_data
     except Exception as e:
         traceback.print_exc()
+        return None, f"Ошибка: {e}", gr.Button("Вариант 1", visible=True), gr.Button("Вариант 2", visible=True), state
 def answer_selected(selected_option: str, state):
         audio_path = synthesize_audio(reaction_ru)
         reaction_video = make_talking_head(image_path, audio_path)
+        try:
+            os.remove(audio_path)
+        except:
+            pass
         return reaction_video, display_message
         outputs=[out_video, out_question, btn_opt1, btn_opt2, lesson_state]
     )
+    btn_opt1.click(fn=lambda state: answer_selected(state.get('options', [''])[0] if state else '', state),
+                   inputs=[lesson_state],
+                   outputs=[out_reaction_video, out_status])
+    btn_opt2.click(fn=lambda state: answer_selected(state.get('options', [''])[1] if state and len(state.get('options', [])) > 1 else '', state),
+                   inputs=[lesson_state],
+                   outputs=[out_reaction_video, out_status])
     demo.load(lambda: "Готово", outputs=out_status)
 if __name__ == '__main__':
+    demo.launch(server_name="0.0.0.0", server_port=7860)