Files changed (1) hide show
  1. app.py +13 -37
app.py CHANGED
@@ -1,42 +1,18 @@
1
-
2
- # import gradio as gr
3
- # from googletrans import Translator
4
- # import torch
5
- # # Initialize Translator
6
- # from transformers import pipeline
7
- # translator = Translator()
8
- # MODEL_NAME = "openai/whisper-base"
9
- # device = 0 if torch.cuda.is_available() else "cpu"
10
- # pipe = pipeline(
11
- # task="automatic-speech-recognition",
12
- # model=MODEL_NAME,
13
- # chunk_length_s=30,
14
- # device=device,
15
- # )
16
-
17
-
18
- # def transcribe_audio(audio):
19
- # text = pipe(audio)["text"]
20
- # return text
21
- # # return translated_text
22
-
23
- # audio_record = gr.inputs.Audio(source='microphone', label='Record Audio')
24
- # output_text = gr.outputs.Textbox(label='Transcription')
25
-
26
- # interface = gr.Interface(fn=transcribe_audio, inputs=audio_record, outputs=output_text)
27
- # interface.launch()
28
-
29
  import gradio as gr
30
  from transformers import pipeline
31
 
32
- modelo = pipeline("automatic-speech-recognition", model="openai/whisper-base")
 
 
 
 
 
 
33
 
34
- def transcribe(audio):
35
- text = modelo(audio)["text"]
36
- return text
37
 
38
- gr.Interface(
39
- fn=transcribe,
40
- inputs=[gr.Audio(source="microphone", type="filepath")],
41
- outputs=["textbox"]
42
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
+ # Load the automatic speech recognition pipeline
5
+ asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h")
6
+
7
+ def transcribe_audio(audio):
8
+ # Transcribe the audio input
9
+ transcription = asr_pipeline(audio)[0]["transcription"]
10
+ return transcription
11
 
12
+ # Define Gradio interface
13
+ audio_input = gr.inputs.Audio(source="microphone", type="auto", label="Record Audio")
14
+ text_output = gr.outputs.Textbox(label="Transcription")
15
 
16
+ # Create the interface and launch it
17
+ interface = gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs=text_output, title="Speech to Text")
18
+ interface.launch()