File size: 890 Bytes
7d0471b
 
 
 
2f6c921
 
1e24584
00f4803
 
 
 
 
 
2f6c921
013bd79
 
2f6c921
013bd79
 
013ee0a
81b844f
013bd79
 
2f6c921
7d0471b
75fd29e
 
 
 
 
 
81b844f
7d0471b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
import torch


import gradio as gr
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model2 = GPT2LMHeadModel.from_pretrained("gpt2")

#LOAD THE MODEL
model = whisper.load_model("base")

def speech_to_text(inp):
    result = model.transcribe(inp)
    input_ids = tokenizer.encode(result["text"], return_tensors="pt")
    output = model2.generate(input_ids, max_length=50, num_return_sequences=1, early_stopping=True)
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded_output

#LAUNCH THE UI WITH GRADIO

interface = gr.Interface(
    fn=speech_to_text, 
    inputs=[gr.Audio(type="filepath")], 
    outputs="text", 
)
interface.launch(share=True)