darag commited on
Commit
cee1bbe
1 Parent(s): 5876c56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -92
app.py CHANGED
@@ -1,92 +1 @@
1
- # -*- coding: utf-8 -*-
2
- """gradio_kurdi.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1DFSu80KU5dHKmbbqb2bPA5R8hzPbTP76
8
- """
9
-
10
- !pip install torch transformers datasets librosa gradio
11
-
12
- import torch
13
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
14
- import librosa
15
- import numpy as np
16
- from datetime import timedelta
17
- import gradio as gr
18
- import os
19
-
20
- def format_time(seconds):
21
- td = timedelta(seconds=seconds)
22
- hours, remainder = divmod(td.seconds, 3600)
23
- minutes, seconds = divmod(remainder, 60)
24
- milliseconds = td.microseconds // 1000
25
- return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
26
-
27
- def estimate_word_timings(transcription, total_duration):
28
- words = transcription.split()
29
- total_chars = sum(len(word) for word in words)
30
- char_duration = total_duration / total_chars
31
-
32
- word_timings = []
33
- current_time = 0
34
-
35
- for word in words:
36
- word_duration = len(word) * char_duration
37
- start_time = current_time
38
- end_time = current_time + word_duration
39
- word_timings.append((word, start_time, end_time))
40
- current_time = end_time
41
-
42
- return word_timings
43
-
44
- model_name = "Akashpb13/xlsr_kurmanji_kurdish"
45
- model = Wav2Vec2ForCTC.from_pretrained(model_name)
46
- processor = Wav2Vec2Processor.from_pretrained(model_name)
47
-
48
- def transcribe_audio(file):
49
- # معالجة الملف الصوتي
50
- speech, rate = librosa.load(file, sr=16000)
51
-
52
- # تحضير البيانات
53
- input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values
54
-
55
- # تنبؤات النموذج
56
- with torch.no_grad():
57
- logits = model(input_values).logits
58
-
59
- # الحصول على النص من التنبؤات
60
- predicted_ids = torch.argmax(logits, dim=-1)
61
- transcription = processor.batch_decode(predicted_ids)[0]
62
-
63
- # حساب الوقت الإجمالي للتسجيل
64
- total_duration = len(speech) / rate
65
-
66
- # تقدير توقيت كل كلمة
67
- word_timings = estimate_word_timings(transcription, total_duration)
68
-
69
- # إنشاء محتوى ملف SRT
70
- srt_content = ""
71
- for i, (word, start_time, end_time) in enumerate(word_timings, start=1):
72
- start_time_str = format_time(start_time)
73
- end_time_str = format_time(end_time)
74
- srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n"
75
-
76
- # حفظ الملف SRT
77
- output_filename = "output_word_by_word.srt"
78
- with open(output_filename, "w", encoding="utf-8") as f:
79
- f.write(srt_content)
80
-
81
- return transcription, output_filename
82
-
83
- interface = gr.Interface(
84
- fn=transcribe_audio,
85
- inputs=gr.Audio(type="filepath"),
86
- outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")],
87
- title="Deng --- Nivîsandin ::: Kurdî-Kurmancî",
88
- description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .",
89
- article="By Derax Elî"
90
- )
91
-
92
- interface.launch()
 
1
+ https://89879aa8bd9cacb387.gradio.live