krishnasai99 commited on
Commit
cb23395
1 Parent(s): 27999b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -8
app.py CHANGED
@@ -12,16 +12,18 @@ import nltk
12
  from nltk import tokenize
13
  nltk.download('punkt')
14
  import spacy_streamlit
 
 
15
 
16
 
17
  st.title('Audio-to-Text')
18
 
19
  audio_file = st.file_uploader('Upload Audio' , type=['wav' , 'mp3','m4a'])
20
 
21
- st.title( 'Please select any of the NLP tasks')
22
 
23
 
24
- if st.button('Trascribe Audio'):
25
  if audio_file is not None:
26
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
27
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
@@ -37,7 +39,6 @@ if st.button('Trascribe Audio'):
37
  st.error('please upload the audio file')
38
 
39
 
40
-
41
  if st.button('Summarize'):
42
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
43
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
@@ -48,10 +49,10 @@ if st.button('Summarize'):
48
  text = processor.batch_decode(predicted_ids)
49
  summary_list = [str(sentence) for sentence in text]
50
  result = ' '.join(summary_list)
51
- summarize = pipeline("summarization")
52
  st.markdown(summarize(result)[0]['summary_text'])
53
 
54
- if st.button('sentiment-analysis'):
55
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
56
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
57
  speech, rate = librosa.load(audio_file, sr=16000)
@@ -64,7 +65,25 @@ if st.button('sentiment-analysis'):
64
  nlp_sa = pipeline("sentiment-analysis")
65
  st.markdown(nlp_sa(result))
66
 
67
- if st.button('Name'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
69
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
70
  speech, rate = librosa.load(audio_file, sr=16000)
@@ -93,7 +112,7 @@ source_lang = st.selectbox("Source language",['English'])
93
  target_lang = st.selectbox("Target language",['German','French'])
94
 
95
 
96
- if st.button('Translate'):
97
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
98
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
99
  speech, rate = librosa.load(audio_file, sr=16000)
@@ -106,7 +125,7 @@ if st.button('Translate'):
106
  prefix = 'translate '+str(source_lang)+' to '+str(target_lang)
107
  sentence_token = tokenize.sent_tokenize(result)
108
  output = tokenizer([prefix+sentence for sentence in sentence_token], padding=True, return_tensors="pt")
109
- translated_id = model1.generate(output["input_ids"], attention_mask=output['attention_mask'], max_length=100)
110
  translated_word = tokenizer.batch_decode(translated_id, skip_special_tokens=True)
111
  st.subheader('Translated Text')
112
  st.write(' '.join(translated_word))
 
12
  from nltk import tokenize
13
  nltk.download('punkt')
14
  import spacy_streamlit
15
+ from datasets import load_dataset
16
+ from transformers import pipeline
17
 
18
 
19
  st.title('Audio-to-Text')
20
 
21
  audio_file = st.file_uploader('Upload Audio' , type=['wav' , 'mp3','m4a'])
22
 
23
+ st.subheader( 'Please select any of the NLP tasks')
24
 
25
 
26
+ if st.button('Audio Transcription'):
27
  if audio_file is not None:
28
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
29
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
 
39
  st.error('please upload the audio file')
40
 
41
 
 
42
  if st.button('Summarize'):
43
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
44
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
 
49
  text = processor.batch_decode(predicted_ids)
50
  summary_list = [str(sentence) for sentence in text]
51
  result = ' '.join(summary_list)
52
+ summarize = pipeline("summarization" , model='facebook/bart-large-cnn')
53
  st.markdown(summarize(result)[0]['summary_text'])
54
 
55
+ if st.button('Sentiment Analysis'):
56
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
57
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
58
  speech, rate = librosa.load(audio_file, sr=16000)
 
65
  nlp_sa = pipeline("sentiment-analysis")
66
  st.markdown(nlp_sa(result))
67
 
68
+
69
+ if st.button('Audio Classification'):
70
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
71
+ model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
72
+ speech, rate = librosa.load(audio_file, sr=16000)
73
+ input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values
74
+ logits = model(input_values).logits
75
+ predicted_ids = torch.argmax(logits, dim=-1)
76
+ text = processor.batch_decode(predicted_ids)
77
+ summary_list = [str(sentence) for sentence in text]
78
+ result = ' '.join(summary_list)
79
+ dataset = load_dataset("anton-l/superb_demo", "er", split="session1")
80
+ classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")
81
+ labels = classifier(dataset[0]["file"], top_k=5)
82
+ st.markdown(labels)
83
+
84
+
85
+
86
+ if st.button('Name Entity Recognition'):
87
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
88
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
89
  speech, rate = librosa.load(audio_file, sr=16000)
 
112
  target_lang = st.selectbox("Target language",['German','French'])
113
 
114
 
115
+ if st.button('Translate'):
116
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
117
  model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
118
  speech, rate = librosa.load(audio_file, sr=16000)
 
125
  prefix = 'translate '+str(source_lang)+' to '+str(target_lang)
126
  sentence_token = tokenize.sent_tokenize(result)
127
  output = tokenizer([prefix+sentence for sentence in sentence_token], padding=True, return_tensors="pt")
128
+ translated_id = model1.generate(output["input_ids"], attention_mask=output['attention_mask'], max_length=10000)
129
  translated_word = tokenizer.batch_decode(translated_id, skip_special_tokens=True)
130
  st.subheader('Translated Text')
131
  st.write(' '.join(translated_word))