yappeizhen commited on
Commit
7e3dd20
โ€ข
1 Parent(s): ec041ea

Upload 2 files

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +156 -0
  3. thmbnail.jpg +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ thmbnail.jpg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ from transformers import pipeline
4
+
5
+ model = whisper.load_model("base")
6
+ sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
7
+
8
+ def analyze_sentiment(text):
9
+ results = sentiment_analysis(text)
10
+ sentiment_results = {result['label']: result['score'] for result in results}
11
+ return sentiment_results
12
+
13
+ def get_sentiment_emoji(sentiment):
14
+ # Define the emojis corresponding to each sentiment
15
+ emoji_mapping = {
16
+ "disappointment": "๐Ÿ˜ž",
17
+ "sadness": "๐Ÿ˜ข",
18
+ "annoyance": "๐Ÿ˜ ",
19
+ "neutral": "๐Ÿ˜",
20
+ "disapproval": "๐Ÿ‘Ž",
21
+ "realization": "๐Ÿ˜ฎ",
22
+ "nervousness": "๐Ÿ˜ฌ",
23
+ "approval": "๐Ÿ‘",
24
+ "joy": "๐Ÿ˜„",
25
+ "anger": "๐Ÿ˜ก",
26
+ "embarrassment": "๐Ÿ˜ณ",
27
+ "caring": "๐Ÿค—",
28
+ "remorse": "๐Ÿ˜”",
29
+ "disgust": "๐Ÿคข",
30
+ "grief": "๐Ÿ˜ฅ",
31
+ "confusion": "๐Ÿ˜•",
32
+ "relief": "๐Ÿ˜Œ",
33
+ "desire": "๐Ÿ˜",
34
+ "admiration": "๐Ÿ˜Œ",
35
+ "optimism": "๐Ÿ˜Š",
36
+ "fear": "๐Ÿ˜จ",
37
+ "love": "โค๏ธ",
38
+ "excitement": "๐ŸŽ‰",
39
+ "curiosity": "๐Ÿค”",
40
+ "amusement": "๐Ÿ˜„",
41
+ "surprise": "๐Ÿ˜ฒ",
42
+ "gratitude": "๐Ÿ™",
43
+ "pride": "๐Ÿฆ"
44
+ }
45
+ return emoji_mapping.get(sentiment, "")
46
+
47
+ def display_sentiment_results(sentiment_results, option):
48
+ sentiment_text = ""
49
+ for sentiment, score in sentiment_results.items():
50
+ emoji = get_sentiment_emoji(sentiment)
51
+ if option == "Sentiment Only":
52
+ sentiment_text += f"{sentiment} {emoji}\n"
53
+ elif option == "Sentiment + Score":
54
+ sentiment_text += f"{sentiment} {emoji}: {score}\n"
55
+ return sentiment_text
56
+
57
+ def inference(audio, sentiment_option):
58
+ audio = whisper.load_audio(audio)
59
+ audio = whisper.pad_or_trim(audio)
60
+
61
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
62
+
63
+ _, probs = model.detect_language(mel)
64
+ lang = max(probs, key=probs.get)
65
+
66
+ options = whisper.DecodingOptions(fp16=False)
67
+ result = whisper.decode(model, mel, options)
68
+
69
+ sentiment_results = analyze_sentiment(result.text)
70
+ sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
71
+
72
+ return lang.upper(), result.text, sentiment_output
73
+
74
+ title = """<h1 align="center">๐ŸŽค Multilingual ASR ๐Ÿ’ฌ</h1>"""
75
+ image_path = "thmbnail.jpg"
76
+ description = """
77
+ ๐Ÿ’ป This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
78
+ <br>
79
+ โš™๏ธ Components of the tool:<br>
80
+ <br>
81
+ &nbsp;&nbsp;&nbsp;&nbsp; - Real-time multilingual speech recognition<br>
82
+ &nbsp;&nbsp;&nbsp;&nbsp; - Language identification<br>
83
+ &nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis of the transcriptions<br>
84
+ <br>
85
+ ๐ŸŽฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores.<br>
86
+ <br>
87
+
88
+ ๐Ÿ˜ƒ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
89
+ <br>
90
+
91
+ โœ… The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
92
+ <br>
93
+
94
+ โ“ Use the microphone for real-time speech recognition.<br>
95
+ <br>
96
+
97
+ โšก๏ธ The model will transcribe the audio and perform sentiment analysis on the transcribed text.<br>
98
+
99
+ """
100
+
101
+ custom_css = """
102
+ #banner-image {
103
+ display: block;
104
+ margin-left: auto;
105
+ margin-right: auto;
106
+ }
107
+ #chat-message {
108
+ font-size: 14px;
109
+ min-height: 300px;
110
+ }
111
+ """
112
+
113
+ block = gr.Blocks(css=custom_css)
114
+
115
+ with block:
116
+ gr.HTML(title)
117
+
118
+ with gr.Row():
119
+ with gr.Column():
120
+ gr.Image(image_path, elem_id="banner-image", show_label=False)
121
+ with gr.Column():
122
+ gr.HTML(description)
123
+
124
+ with gr.Group():
125
+ with gr.Box():
126
+ audio = gr.Audio(
127
+ label="Input Audio",
128
+ show_label=False,
129
+ source="microphone",
130
+ type="filepath"
131
+ )
132
+
133
+ sentiment_option = gr.Radio(
134
+ choices=["Sentiment Only", "Sentiment + Score"],
135
+ label="Select an option",
136
+ default="Sentiment Only"
137
+ )
138
+
139
+ btn = gr.Button("Transcribe")
140
+
141
+ lang_str = gr.Textbox(label="Language")
142
+
143
+ text = gr.Textbox(label="Transcription")
144
+
145
+ sentiment_output = gr.Textbox(label="Sentiment Analysis Results", output=True)
146
+
147
+ btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
148
+
149
+ gr.HTML('''
150
+ <div class="footer">
151
+ <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
152
+ </p>
153
+ </div>
154
+ ''')
155
+
156
+ block.launch()
thmbnail.jpg ADDED

Git LFS Details

  • SHA256: 7eee3957b750ba424660c193b0185bf53ccd0caeb209b465648574d86cb9e03a
  • Pointer size: 132 Bytes
  • Size of remote file: 2.03 MB