|
import os |
|
os.system("pip install git+https://github.com/openai/whisper.git") |
|
import gradio as gr |
|
import whisper |
|
import torch |
|
|
|
|
|
import gradio as gr |
|
from transformers import GPT2Tokenizer, GPT2LMHeadModel |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
model2 = GPT2LMHeadModel.from_pretrained("gpt2") |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
def speech_to_text(inp): |
|
result = model.transcribe(inp) |
|
input_ids = tokenizer.encode(result["text"], return_tensors="pt") |
|
output = model2.generate(input_ids, max_length=50, num_return_sequences=1, early_stopping=True) |
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return decoded_output |
|
|
|
|
|
|
|
interface = gr.Interface( |
|
fn=speech_to_text, |
|
inputs=[gr.Audio(type="filepath")], |
|
outputs="text", |
|
) |
|
interface.launch(share=True) |
|
|