RexChan commited on
Commit
5195825
·
verified ·
1 Parent(s): 236dc97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
3
+ from transformers import pipeline
4
+ import librosa
5
+ import torch
6
+ from spleeter.separator import Separator
7
+ from pydub import AudioSegment
8
+ from IPython.display import Audio
9
+ import os
10
+ import accelerate
11
+
12
+
13
+
14
+
15
+
16
+
17
+ # preprocess and crop audio file
18
+ def audio_preprocess(file_name = '/test1/vocals.wav'):
19
+ # separate music and vocal
20
+ separator = Separator('spleeter:2stems')
21
+ separator.separate_to_file(input_file, output_file)
22
+
23
+
24
+ # Crop the audio
25
+ start_time = 60000 # e.g. 30 seconds, 30000
26
+ end_time = 110000 # e.g. 40 seconds, 40000
27
+
28
+
29
+
30
+
31
+ audio = AudioSegment.from_file(file_name)
32
+ cropped_audio = audio[start_time:end_time]
33
+ processed_audio = cropped_audio
34
+ # .export('cropped_vocals.wav', format='wav') # save vocal audio file
35
+ return processed_audio
36
+
37
+
38
+
39
+
40
+ # ASR transcription
41
+ def asr_model(processed_audio):
42
+ # load audio file
43
+ y, sr = librosa.load(processed_audio, sr=16000)
44
+
45
+
46
+ # ASR model
47
+ MODEL_NAME = "RexChan/ISOM5240-whisper-small-zhhk_1"
48
+ processor = WhisperProcessor.from_pretrained(MODEL_NAME)
49
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
50
+
51
+
52
+ model.config.forced_decoder_ids = None
53
+ model.config.suppress_tokens = []
54
+ model.config.use_cache = False
55
+
56
+
57
+ processed_in = processor(y, sampling_rate=sr, return_tensors="pt")
58
+ gout = model.generate(
59
+ input_features=processed_in.input_features,
60
+ output_scores=True, return_dict_in_generate=True
61
+ )
62
+ transcription = processor.batch_decode(gout.sequences, skip_special_tokens=True)[0]
63
+
64
+
65
+ # print result
66
+ print(f"Song lyrics = {transcription}")
67
+
68
+
69
+ return transcription
70
+
71
+
72
+
73
+
74
+ # sentiment analysis
75
+ def senti_model(transcription):
76
+
77
+
78
+ pipe = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
79
+ final_result = pipe(transcription)
80
+ display = f"Sentiment Analysis shows that this song is {final_result[0]['label']}. Confident level of this analysis is {final_result[0]['score']*100:.1f}%."
81
+ print(display)
82
+ return display
83
+
84
+
85
+ # return final_result
86
+
87
+
88
+
89
+
90
+ # main
91
+ def main(input_file):
92
+
93
+
94
+ # processed_audio = audio_preprocess(input_file)
95
+ processed_audio = input_file
96
+
97
+
98
+ transcription = asr_model(processed_audio)
99
+ final_result = senti_model(transcription)
100
+ st.write(final_result)
101
+
102
+
103
+ if st.button("Play Audio"):
104
+ st.audio(audio_data['audio'],
105
+ format="audio/wav",
106
+ start_time=0,
107
+ sample_rate = audio_data['sampling_rate'])
108
+
109
+
110
+
111
+
112
+ if __name__ == '__main__':
113
+
114
+
115
+ # steamlit setup
116
+ st.set_page_config(page_title="Sentiment Analysis on Your Cantonese Song",)
117
+ st.header("Cantonese Song Sentiment Analyzer")
118
+ input_file = st.file_uploader("upload a song in mp3 format", type="mp3") # upload song
119
+ if input_file is not None:
120
+ st.write("File uploaded successfully!")
121
+ st.write(input_file)
122
+ else:
123
+ st.write("No file uploaded.")
124
+ button_click = st.button("Run Analysis", type="primary")
125
+
126
+
127
+ # load song
128
+ #input_file = os.path.isfile("test1.mp3")
129
+ # output_file = os.path.isdir("")
130
+
131
+
132
+ if button_click:
133
+ main(input_file=input_file)