Spaces:
Runtime error
Runtime error
import streamlit as st | |
import soundfile as sf | |
import librosa | |
from transformers import HubertForCTC, Wav2Vec2Processor , pipeline , Wav2Vec2ForCTC , Wav2Vec2Tokenizer | |
import torch | |
import spacy | |
from spacy import displacy | |
st.title('Audio-to-Text') | |
audio_file = st.file_uploader('Upload Audio' , type=['wav' , 'mp3','m4a']) | |
if st.button('Trascribe Audio'): | |
if audio_file is not None: | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
st.write(text) | |
else: | |
st.error('please upload the audio file') | |
if st.button('Summarize'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summarize = pipeline("summarization") | |
st.write(summarize(text)) | |
if st.button('sentiment-analysis'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
nlp_sa = pipeline("sentiment-analysis") | |
st.write(nlp_sa(text)) | |
if st.button('Name'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
str = ''.join(text) | |
trf = spacy.load('en_core_web_trf') | |
doc=trf(str) | |
print(displacy.render(doc,style='ent')) | |