srirama commited on
Commit
e1a10bd
1 Parent(s): 5454cd7

gradio demo

Browse files
Files changed (2) hide show
  1. app.py +46 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ from pydub import AudioSegment
4
+ import os
5
+
6
+ # Initialize the pipeline
7
+
8
+ pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked"
9
+
10
+
11
+
12
+ # Function to split audio into chunks
13
+ def split_audio(audio_path, chunk_length_ms):
14
+ audio = AudioSegment.from_file(audio_path)
15
+ chunks = []
16
+ for i in range(0, len(audio), chunk_length_ms):
17
+ chunk = audio[i:i + chunk_length_ms]
18
+ chunks.append(chunk)
19
+ return chunks
20
+
21
+ # Function to transcribe audio
22
+ def transcribe(audio_path):
23
+ chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds
24
+ audio_chunks = split_audio(audio_path, chunk_length_ms)
25
+ full_transcript = ""
26
+
27
+ for i, chunk in enumerate(audio_chunks):
28
+ temp_file = f"temp_chunk_{i}.wav"
29
+ chunk.export(temp_file, format="wav")
30
+
31
+ text = pipe(temp_file)["text"]
32
+ full_transcript += text + " "
33
+
34
+ os.remove(temp_file) # Clean up the temporary file
35
+
36
+ return full_transcript.strip()
37
+
38
+ iface = gr.Interface(
39
+ fn=transcribe,
40
+ inputs=gr.Audio(sources=["microphone"], type="filepath"),
41
+ outputs="text",
42
+ title="Whisper Small English",
43
+ description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.",
44
+ )
45
+ # Launch the interface
46
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers==4.28.1
2
+ pydub==0.25.1
3
+ gradio==2.9.1