Spaces:

anaszil
/

whisper-darija

Running on Zero

App Files Files Community

anaszil commited on Oct 25

Commit

557d6ce

1 Parent(s): aa571e1

V1

Browse files

Files changed (7) hide show

.gitattributes +0 -1
.gitignore +2 -0
README.md +5 -5
app.py +185 -0
packages.txt +1 -0
record.py +29 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ gradio_cached_examples

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
 title: Whisper Darija
-emoji: 🐠
-colorFrom: red
-colorTo: green
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Whisper Darija
+emoji: 🌍
+colorFrom: pink
+colorTo: pink
 sdk: gradio
+sdk_version: 3.29.0
 app_file: app.py
 pinned: false
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import os
+from typing import Optional
+import gradio as gr
+import torch
+from dotenv import load_dotenv
+from huggingface_hub import login
+from peft import PeftModel
+from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
+from transformers.pipelines.base import Pipeline
+load_dotenv()
+def ensure_hf_login() -> None:
+    token = os.getenv("HF_TOKEN")
+    if not token:
+        print("HF_TOKEN not set; skipping Hugging Face login.")
+        return
+    try:
+        login(token=token)
+    except Exception as exc:
+        print(f"Failed to login to Hugging Face Hub: {exc}")
+ensure_hf_login()
+LANGUAGE = "Arabic"
+BATCH_SIZE = 1
+DEVICE = 0 if torch.cuda.is_available() else "cpu"
+BASE_MODEL_PATH = "openai/whisper-large-v3-turbo"
+LORA_PATH = "anaszil/whisper-large-v3-turbo-darija"
+PIPELINE: Optional[Pipeline] = None
+def _build_pipeline() -> Pipeline:
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    base_model = WhisperForConditionalGeneration.from_pretrained(
+        BASE_MODEL_PATH,
+        torch_dtype=torch_dtype,
+    )
+    model = PeftModel.from_pretrained(base_model, LORA_PATH)
+    processor = WhisperProcessor.from_pretrained(
+        BASE_MODEL_PATH,
+        language=LANGUAGE,
+        task="transcribe",
+    )
+    model.generation_config.language = LANGUAGE
+    model.generation_config.task = "transcribe"
+    model.generation_config.forced_decoder_ids = None
+    model.eval()
+    return pipeline(
+        task="automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        chunk_length_s=30,
+        device=DEVICE,
+    )
+def get_pipeline() -> Pipeline:
+    global PIPELINE
+    if PIPELINE is None:
+        print("Loading Darija LoRA model...")
+        PIPELINE = _build_pipeline()
+    return PIPELINE
+def format_timestamp(
+    seconds: Optional[float],
+    always_include_hours: bool = False,
+    decimal_marker: str = ".",
+) -> Optional[str]:
+    if seconds is None:
+        return seconds
+    milliseconds = round(seconds * 1000.0)
+    hours = milliseconds // 3_600_000
+    milliseconds -= hours * 3_600_000
+    minutes = milliseconds // 60_000
+    milliseconds -= minutes * 60_000
+    whole_seconds = milliseconds // 1_000
+    milliseconds -= whole_seconds * 1_000
+    hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
+    return (
+        f"{hours_marker}{minutes:02d}:{whole_seconds:02d}"
+        f"{decimal_marker}{milliseconds:03d}"
+    )
+def transcribe(audio_input, return_timestamps: bool):
+    if audio_input is None:
+        return "Please provide audio input either via microphone or file upload."
+    asr_pipeline = get_pipeline()
+    outputs = asr_pipeline(
+        audio_input,
+        batch_size=BATCH_SIZE,
+        generate_kwargs={"task": "transcribe", "language": LANGUAGE},
+        return_timestamps=return_timestamps,
+    )
+    text = outputs["text"]
+    if return_timestamps:
+        chunks = outputs.get("chunks") or []
+        text = "\n".join(
+            f"[{format_timestamp(chunk['timestamp'][0])} -> "
+            f"{format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
+            for chunk in chunks
+        )
+    return text
+def process_audio(audio_file, uploaded_file, timestamps):
+    audio_input = audio_file or uploaded_file
+    if audio_input is None:
+        return "Please provide audio input.", "No audio input detected."
+    try:
+        transcription = transcribe(audio_input, timestamps)
+        return transcription, "Transcription completed with the Darija LoRA model."
+    except Exception as exc:
+        return f"Error: {exc}", f"Transcription failed: {exc}"
+with gr.Blocks(title="Darija Speech Transcription") as demo:
+    gr.Markdown("# Darija Speech Transcription Demo")
+    gr.Markdown("Transcribe Darija audio with the fine-tuned Whisper LoRA model.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            timestamps_checkbox = gr.Checkbox(
+                label="Return timestamps",
+                value=False,
+            )
+            audio_input = gr.Audio(
+                source="microphone",
+                type="filepath",
+                label="Record or Upload Audio",
+            )
+            file_input = gr.Audio(
+                source="upload",
+                type="filepath",
+                label="Upload Audio File",
+            )
+            transcribe_button = gr.Button("Transcribe", variant="primary")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="Transcription Output",
+                lines=10,
+                show_copy_button=True,
+            )
+            status_message = gr.Markdown("")
+    transcribe_button.click(
+        fn=process_audio,
+        inputs=[audio_input, file_input, timestamps_checkbox],
+        outputs=[output_text, status_message],
+    )
+    demo.load(fn=get_pipeline, inputs=None, outputs=None)
+demo.launch(enable_queue=True)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

record.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import sounddevice as sd
+import numpy as np
+import scipy.io.wavfile as wav
+# Configuration
+DURATION = 5  # Recording duration in seconds
+SAMPLERATE = 16000  # Whisper expects 16kHz sample rate
+OUTPUT_FILENAME = "recorded_audio.wav"  # Output WAV file
+def record_audio(duration=DURATION, samplerate=SAMPLERATE, filename=OUTPUT_FILENAME):
+    """Records audio from the microphone and saves it as a WAV file."""
+    print(f"🎤 Recording for {duration} seconds... Speak now!")
+    # Record audio
+    audio_data = sd.rec(
+        int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16
+    )
+    sd.wait()  # Wait for recording to complete
+    # Save as WAV file
+    wav.write(filename, samplerate, audio_data)
+    print(f"✅ Recording complete! Audio saved as '{filename}'")
+if __name__ == "__main__":
+    record_audio()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+transformers
+gradio==3.29.0
+python-dotenv
+sounddevice
+scipy
+peft==0.14.0