ASR / app.py
JoshuaPD's picture
Update app.py
81b844f verified
raw
history blame contribute delete
890 Bytes
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
import torch
import gradio as gr
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model2 = GPT2LMHeadModel.from_pretrained("gpt2")
#LOAD THE MODEL
model = whisper.load_model("base")
def speech_to_text(inp):
result = model.transcribe(inp)
input_ids = tokenizer.encode(result["text"], return_tensors="pt")
output = model2.generate(input_ids, max_length=50, num_return_sequences=1, early_stopping=True)
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
return decoded_output
#LAUNCH THE UI WITH GRADIO
interface = gr.Interface(
fn=speech_to_text,
inputs=[gr.Audio(type="filepath")],
outputs="text",
)
interface.launch(share=True)