import os os.system("pip install git+https://github.com/openai/whisper.git") import gradio as gr import whisper import torch import gradio as gr from transformers import GPT2Tokenizer, GPT2LMHeadModel # Load tokenizer and model tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model2 = GPT2LMHeadModel.from_pretrained("gpt2") #LOAD THE MODEL model = whisper.load_model("base") def speech_to_text(inp): result = model.transcribe(inp) input_ids = tokenizer.encode(result["text"], return_tensors="pt") output = model2.generate(input_ids, max_length=50, num_return_sequences=1, early_stopping=True) decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) return decoded_output #LAUNCH THE UI WITH GRADIO interface = gr.Interface( fn=speech_to_text, inputs=[gr.Audio(type="filepath")], outputs="text", ) interface.launch(share=True)