Spaces:
Runtime error
Runtime error
import whisper | |
import gradio as gr | |
# load model and processor | |
processor = WhisperProcessor.from_pretrained("openai/whisper-medium") | |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium") | |
model.config.forced_decoder_ids = None | |
def transcribe(audio): | |
#time.sleep(3) | |
# load audio and pad/trim it to fit 30 seconds | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
# make log-Mel spectrogram and move to the same device as the model | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
# detect the spoken language | |
_, probs = model.detect_language(mel) | |
print(f"Detected language: {max(probs, key=probs.get)}") | |
# decode the audio | |
options = whisper.DecodingOptions(fp16 = False) | |
result = whisper.decode(model, mel, options) | |
return result.text | |
# gr.Interface( | |
# title = 'Talk to NP', | |
# fn=transcribe, | |
# inputs=[ | |
# gr.inputs.Audio(source="microphone", type="filepath") | |
# ], | |
# outputs=[ | |
# "textbox" | |
# ], | |
# live=True).launch() | |
def speech_to_text(speech): | |
text = asr(speech)["text"] | |
return text | |
def text_to_sentiment(text): | |
return classifier(text)[0]["label"] | |
demo = gr.Blocks() | |
with demo: | |
audio_file = gr.Audio(type="filepath") | |
text1 = gr.Textbox() | |
text2 = gr.Textbox() | |
b1 = gr.Button("Transcribe audio") | |
b2 = gr.Button("Summarize") | |
b1.click(transcribe, inputs=audio_file, outputs=text) | |
b2.click(text_to_sentiment, inputs=text1, outputs=text) | |
demo.launch() | |