Spaces:

darag
/

kurdish-kurmanci-to-text-srt

Sleeping

App Files Files Community

darag commited on Aug 27, 2024

Commit

37cc811

•

1 Parent(s): d7633c6

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -1

app.py CHANGED Viewed

@@ -1,3 +1,72 @@
 import gradio as gr
-gr.Interface.load("https://89879aa8bd9cacb387.gradio.live").launch()

+# -*- coding: utf-8 -*-
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import librosa
+import numpy as np
+from datetime import timedelta
 import gradio as gr
+import os
+def format_time(seconds):
+    td = timedelta(seconds=seconds)
+    hours, remainder = divmod(td.seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    milliseconds = td.microseconds // 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def estimate_word_timings(transcription, total_duration):
+    words = transcription.split()
+    total_chars = sum(len(word) for word in words)
+    char_duration = total_duration / total_chars
+    word_timings = []
+    current_time = 0
+    for word in words:
+        word_duration = len(word) * char_duration
+        start_time = current_time
+        end_time = current_time + word_duration
+        word_timings.append((word, start_time, end_time))
+        current_time = end_time
+    return word_timings
+model_name = "Akashpb13/xlsr_kurmanji_kurdish"
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+def transcribe_audio(file):
+    speech, rate = librosa.load(file, sr=16000)
+    input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)[0]
+    total_duration = len(speech) / rate
+    word_timings = estimate_word_timings(transcription, total_duration)
+    srt_content = ""
+    for i, (word, start_time, end_time) in enumerate(word_timings, start=1):
+        start_time_str = format_time(start_time)
+        end_time_str = format_time(end_time)
+        srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n"
+    output_filename = "output_word_by_word.srt"
+    with open(output_filename, "w", encoding="utf-8") as f:
+        f.write(srt_content)
+    return transcription, output_filename
+interface = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")],
+    title="Deng --- Nivîsandin ::: Kurdî-Kurmancî",
+    description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .",
+    article="By Derax Elî"
+)
+if __name__ == "__main__":
+    interface.launch()