manasch commited on
Commit
8a68e19
·
verified ·
1 Parent(s): 12204b0

add sentiment analyser and refactor code

Browse files
.gitattributes CHANGED
@@ -33,8 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- *.wav filter=lfs diff=lfs merge=lfs -text
37
- *.jpeg filter=lfs diff=lfs merge=lfs -text
38
- *.jpg filter=lfs diff=lfs merge=lfs -text
39
- *.png filter=lfs diff=lfs merge=lfs -text
40
- *.mp4 filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
.gitignore CHANGED
@@ -8,9 +8,14 @@ __pycache__
8
 
9
  # Video
10
  *.mp4
 
11
 
12
  # Audio
13
  *.wav
14
  *.mp3
15
 
 
 
 
 
16
  *.log
 
8
 
9
  # Video
10
  *.mp4
11
+ *.mkv
12
 
13
  # Audio
14
  *.wav
15
  *.mp3
16
 
17
+ # Others
18
+ *.pdf
19
+ *.md
20
+
21
  *.log
app.py CHANGED
@@ -1,221 +1,26 @@
1
  import typing
2
  from pathlib import Path
3
 
4
- import numpy as np
5
  import gradio as gr
6
 
7
  import PIL
8
  from PIL import Image
9
  from moviepy.editor import *
10
 
11
- from lib.audio_generation import AudioGeneration
12
- from lib.image_captioning import ImageCaptioning
13
- from lib.pace_model import PaceModel
14
 
15
  pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
16
  resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
17
  height, width, channels = (224, 224, 3)
18
 
19
- class AudioPalette:
20
- def __init__(self):
21
- self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
22
- self.image_captioning = ImageCaptioning()
23
- self.audio_generation = AudioGeneration()
24
- self.pace_map = {
25
- "Fast": "high",
26
- "Medium": "medium",
27
- "Slow": "low"
28
- }
29
-
30
- def prompt_construction(self, caption: str, pace: str, instrument: typing.Union[str, None], first: bool = True):
31
- instrument = instrument if instrument is not None else ""
32
-
33
- if first:
34
- prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality"
35
- else:
36
- prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
37
-
38
- return prompt
39
-
40
- def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
41
- pace = self.pace_model.predict(input_image)
42
- print("Pace Prediction Done")
43
-
44
- generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
45
- print("Captioning Done")
46
- generated_text = generated_text if generated_text is not None else ""
47
-
48
- prompt = self.prompt_construction(generated_text, pace, instrument)
49
- print("Generated Prompt:", prompt)
50
-
51
- audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
52
- print("Audio Generation Done")
53
-
54
- outputs = [prompt, pace, generated_text, audio_file]
55
- return outputs
56
-
57
- def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
58
- clips = [ImageClip(m).set_duration(5) for m in file_paths]
59
- audio_clips = [AudioFileClip(a) for a in audio_paths]
60
- concat_audio = concatenate_audioclips(audio_clips)
61
- new_audio = CompositeAudioClip([concat_audio])
62
-
63
- concat_clip = concatenate_videoclips(clips, method="compose")
64
- concat_clip.audio = new_audio
65
-
66
- file_name = "generated_video.mp4"
67
- concat_clip.write_videofile(file_name, fps=24)
68
- return file_name
69
-
70
- def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
71
- images = [Image.open(image_path) for image_path in file_paths]
72
- pace = []
73
- generated_text = []
74
- prompts = []
75
-
76
- # Extracting the pace for all the images
77
- for image in images:
78
- pace_prediction = self.pace_model.predict(image)
79
- pace.append(pace_prediction)
80
- print("Pace Prediction Done")
81
-
82
- # Generating the caption for all the images
83
- for image in images:
84
- caption = self.image_captioning.query(image)[0].get("generated_text")
85
- generated_text.append(caption)
86
- print("Captioning Done")
87
-
88
- first = True
89
- for generated_caption, pace_pred in zip(generated_text, pace):
90
- prompts.append(self.prompt_construction(generated_caption, pace_pred, instrument, first))
91
- first = False
92
- print("Generated Prompts: ", prompts)
93
-
94
- audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
95
- print("Audio Generation Done")
96
-
97
- video_file = self.stitch_images(file_paths, [audio_file])
98
- return video_file
99
-
100
- def single_image_interface(model: AudioPalette):
101
- demo = gr.Interface(
102
- fn=model.generate_single,
103
- inputs=[
104
- gr.Image(
105
- type="pil",
106
- label="Upload an image",
107
- show_label=True,
108
- container=True
109
- ),
110
- gr.Radio(
111
- choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
112
- label="Instrument",
113
- show_label=True,
114
- container=True
115
- ),
116
- gr.Textbox(
117
- lines=1,
118
- placeholder="ngrok endpoint",
119
- label="colab endpoint",
120
- show_label=True,
121
- container=True,
122
- type="text",
123
- visible=True
124
- )
125
- ],
126
- outputs=[
127
- gr.Textbox(
128
- lines=1,
129
- placeholder="Prompt",
130
- label="Generated Prompt",
131
- show_label=True,
132
- container=True,
133
- type="text",
134
- visible=False
135
- ),
136
- gr.Textbox(
137
- lines=1,
138
- placeholder="Pace of the image",
139
- label="Pace",
140
- show_label=True,
141
- container=True,
142
- type="text",
143
- visible=False
144
- ),
145
- gr.Textbox(
146
- lines=1,
147
- placeholder="Caption for the image",
148
- label="Caption",
149
- show_label=True,
150
- container=True,
151
- type="text",
152
- visible=False
153
- ),
154
- gr.Audio(
155
- label="Generated Audio",
156
- show_label=True,
157
- container=True,
158
- visible=True,
159
- format="wav",
160
- autoplay=False,
161
- show_download_button=True,
162
- )
163
- ],
164
- cache_examples=False,
165
- live=False,
166
- description="Provide an image to generate an appropriate background soundtrack",
167
- )
168
-
169
- return demo
170
-
171
- def multi_image_interface(model: AudioPalette):
172
- demo = gr.Interface(
173
- fn=model.generate_multiple,
174
- inputs=[
175
- gr.File(
176
- file_count="multiple",
177
- file_types=["image"],
178
- type="filepath",
179
- label="Upload images",
180
- show_label=True,
181
- container=True,
182
- visible=True
183
- ),
184
- gr.Radio(
185
- choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
186
- label="Instrument",
187
- show_label=True,
188
- container=True
189
- ),
190
- gr.Textbox(
191
- lines=1,
192
- placeholder="ngrok endpoint",
193
- label="colab endpoint",
194
- show_label=True,
195
- container=True,
196
- type="text",
197
- visible=True
198
- )
199
- ],
200
- outputs=[
201
- gr.Video(
202
- format="mp4",
203
- label="Generated Video",
204
- show_label=True,
205
- container=True,
206
- visible=True,
207
- autoplay=False,
208
- )
209
- ],
210
- cache_examples=False,
211
- live=False,
212
- description="Provide images to generate an a slideshow of the images with appropriate music as background",
213
- )
214
-
215
- return demo
216
-
217
  def main():
218
- model = AudioPalette()
 
 
 
 
 
 
219
 
220
  tab_1 = single_image_interface(model)
221
  tab_2 = multi_image_interface(model)
 
1
  import typing
2
  from pathlib import Path
3
 
 
4
  import gradio as gr
5
 
6
  import PIL
7
  from PIL import Image
8
  from moviepy.editor import *
9
 
10
+ from utils import *
 
 
11
 
12
  pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
13
  resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
14
  height, width, channels = (224, 224, 3)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def main():
17
+ model = AudioPalette(
18
+ pace_model_weights_path,
19
+ resnet50_tf_model_weights_path,
20
+ height,
21
+ width,
22
+ channels
23
+ )
24
 
25
  tab_1 = single_image_interface(model)
26
  tab_2 = multi_image_interface(model)
lib/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .audio_generation import AudioGeneration
2
+ from .image_captioning import ImageCaptioning
3
+ from .pace_model import PaceModel
4
+ from .sentiment_analyser import SentimentAnalyser
lib/audio_generation.py CHANGED
@@ -33,5 +33,4 @@ class AudioGeneration:
33
  stored_file_path = self.request_generation(prompt)
34
 
35
  audio_file = self.request_download(stored_file_path)
36
- print(audio_file)
37
  return audio_file
 
33
  stored_file_path = self.request_generation(prompt)
34
 
35
  audio_file = self.request_download(stored_file_path)
 
36
  return audio_file
lib/image_captioning.py CHANGED
@@ -26,5 +26,4 @@ class ImageCaptioning:
26
  headers=self.headers,
27
  data=self.convert_to_bytes(image)
28
  )
29
- print(response.json())
30
  return response.json()
 
26
  headers=self.headers,
27
  data=self.convert_to_bytes(image)
28
  )
 
29
  return response.json()
lib/pace_model.py CHANGED
@@ -52,5 +52,5 @@ class PaceModel:
52
  image = np.expand_dims(resized_image, axis=0)
53
 
54
  prediction = self.resnet_model.predict(image)
55
- print(prediction, np.argmax(prediction))
56
  return self.class_names[np.argmax(prediction)]
 
52
  image = np.expand_dims(resized_image, axis=0)
53
 
54
  prediction = self.resnet_model.predict(image)
55
+ # print(prediction, np.argmax(prediction))
56
  return self.class_names[np.argmax(prediction)]
lib/sentiment_analyser.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import string
3
+ from collections import Counter
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.tokenize import word_tokenize
12
+
13
+ from utils import *
14
+
15
+ datetime_format = "%d/%m/%Y %H:%M:%S"
16
+
17
+ def now():
18
+ return datetime.now().strftime(datetime_format)
19
+
20
+ class SentimentAnalyser:
21
+ def __init__(self):
22
+ nltk.download('punkt')
23
+ nltk.download('stopwords')
24
+ nltk.download('wordnet')
25
+ self.emotions = Path("utils/emotions.txt").resolve()
26
+
27
+ def sentiment(self, text):
28
+ prompt = text
29
+ lower_case = text.lower()
30
+ cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
31
+
32
+ # Using word_tokenize because it's faster than split()
33
+ tokenized_words = word_tokenize(cleaned_text, "english")
34
+
35
+ # Removing Stop Words
36
+ final_words = []
37
+ for word in tokenized_words:
38
+ if word not in stopwords.words("english"):
39
+ final_words.append(word)
40
+
41
+ # Lemmatization - From plural to single + base form of a word (example better -> good)
42
+ lemma_words = []
43
+ for word in final_words:
44
+ word = WordNetLemmatizer().lemmatize(word)
45
+ lemma_words.append(word)
46
+
47
+ emotion_list = []
48
+ with open(self.emotions) as f:
49
+ for line in f:
50
+ clear_line = line.replace("\n", "").replace(",", "").replace("'", "").replace(" ", "").strip()
51
+ word, emotion = clear_line.split(":")
52
+
53
+ if word in lemma_words:
54
+ emotion_list.append(emotion)
55
+
56
+ print(f"[{now()}] Emotion List:", emotion_list)
57
+ if not len(emotion_list):
58
+ print(f"[{now()}] No emotion could be extracted.")
59
+ return None
60
+
61
+ emotions_count = Counter(emotion_list)
62
+ print(f"[{now()}] Emotions Count:", emotions_count)
63
+
64
+ common = emotions_count.most_common(1)
65
+ print(f"[{now()}] Common Emotions:", common)
66
+
67
+ sentiment, val = common[0]
68
+ print(f"[{now()}] Emotion:", sentiment)
69
+ return sentiment
utils/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .gradio_helper import single_image_interface, multi_image_interface
2
+ from .audio_palette import AudioPalette
utils/audio_palette.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import typing
2
+ from datetime import datetime
3
+
4
+ import PIL
5
+ from PIL import Image
6
+ from moviepy.editor import *
7
+
8
+ from lib import *
9
+
10
+ datetime_format = "%d/%m/%Y %H:%M:%S"
11
+ def now():
12
+ return datetime.now().strftime(datetime_format)
13
+
14
+ class AudioPalette:
15
+ def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
16
+ self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
17
+ self.image_captioning = ImageCaptioning()
18
+ self.audio_generation = AudioGeneration()
19
+ self.sentiment_analyser = SentimentAnalyser()
20
+ self.pace_map = {
21
+ "Fast": "high",
22
+ "Medium": "medium",
23
+ "Slow": "low"
24
+ }
25
+
26
+ def prompt_construction(self, caption: str, pace: str, sentiment: typing.Union[str, None], instrument: typing.Union[str, None], first: bool = True):
27
+ instrument = instrument if instrument is not None else ""
28
+
29
+ if first:
30
+ prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality."
31
+ else:
32
+ prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
33
+
34
+ # if sentiment:
35
+ # prompt += f" As a {sentiment} music."
36
+
37
+ return prompt
38
+
39
+ def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
40
+ pace = self.pace_model.predict(input_image)
41
+ print(f"[{now()}]", pace)
42
+ print(f"[{now()}] Pace Prediction Done")
43
+
44
+ generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
45
+ print(f"[{now()}]", generated_text)
46
+ print(f"[{now()}] Captioning Done")
47
+
48
+ sentiment = self.sentiment_analyser.sentiment(generated_text)
49
+ print(f"[{now()}] Sentiment Analysis Done")
50
+
51
+ prompt = self.prompt_construction(generated_text, pace, sentiment, instrument)
52
+ print(f"[{now()}] Generated Prompt:", prompt)
53
+
54
+ audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
55
+ print(f"[{now()}]", audio_file)
56
+ print(f"[{now()}] Audio Generation Done")
57
+
58
+ outputs = [prompt, pace, generated_text, audio_file]
59
+ return outputs
60
+
61
+ def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
62
+ clips = [ImageClip(m).set_duration(5) for m in file_paths]
63
+ audio_clips = [AudioFileClip(a) for a in audio_paths]
64
+ concat_audio = concatenate_audioclips(audio_clips)
65
+ new_audio = CompositeAudioClip([concat_audio])
66
+
67
+ concat_clip = concatenate_videoclips(clips, method="compose")
68
+ concat_clip.audio = new_audio
69
+
70
+ file_name = "generated_video.mp4"
71
+ concat_clip.write_videofile(file_name, fps=24)
72
+ return file_name
73
+
74
+ def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
75
+ images = [Image.open(image_path) for image_path in file_paths]
76
+ pace = []
77
+ generated_text = []
78
+ sentiments = []
79
+ prompts = []
80
+
81
+ # Extracting the pace for all the images
82
+ for image in images:
83
+ pace_prediction = self.pace_model.predict(image)
84
+ pace.append(pace_prediction)
85
+ print(f"[{now()}]", pace)
86
+ print(f"[{now()}] Pace Prediction Done")
87
+
88
+ # Generating the caption for all the images
89
+ for image in images:
90
+ caption = self.image_captioning.query(image)[0].get("generated_text")
91
+ generated_text.append(caption)
92
+ print(f"[{now()}]", generated_text)
93
+ print(f"[{now()}] Captioning Done")
94
+
95
+ # Extracting the sentiments from the generated captions
96
+ for text in generated_text:
97
+ sentiment = self.sentiment_analyser.sentiment(text)
98
+ sentiments.append(sentiment)
99
+ print(f"[{now()}] Sentiment Analysis Done:", sentiments)
100
+
101
+ first = True
102
+ for generated_caption, senti, pace_pred in zip(generated_text, sentiments, pace):
103
+ prompts.append(self.prompt_construction(generated_caption, pace_pred, senti, instrument, first))
104
+ first = False
105
+ print(f"[{now()}] Generated Prompts:", prompts)
106
+
107
+ audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
108
+ print(f"[{now()}]", audio_file)
109
+ print(f"[{now()}] Audio Generation Done")
110
+
111
+ video_file = self.stitch_images(file_paths, [audio_file])
112
+ return video_file
utils/emotions.txt ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'victimized': 'cheated',
2
+ 'accused': 'cheated',
3
+ 'acquitted': 'singled out',
4
+ 'adorable': 'loved',
5
+ 'adored': 'loved',
6
+ 'affected': 'attracted',
7
+ 'afflicted': 'sad',
8
+ 'aghast': 'fearful',
9
+ 'agog': 'attracted',
10
+ 'agonized': 'sad',
11
+ 'alarmed': 'fearful',
12
+ 'amused': 'happy',
13
+ 'angry': 'angry',
14
+ 'anguished': 'sad',
15
+ 'animated': 'happy',
16
+ 'annoyed': 'angry',
17
+ 'anxious': 'attracted',
18
+ 'apathetic': 'bored',
19
+ 'appalled': 'angry',
20
+ 'appeased': 'singled out',
21
+ 'appreciated': 'esteemed',
22
+ 'apprehensive': 'fearful',
23
+ 'approved of': 'loved',
24
+ 'ardent': 'lustful',
25
+ 'aroused': 'lustful',
26
+ 'attached': 'attached',
27
+ 'attracted': 'attracted',
28
+ 'autonomous': 'independent',
29
+ 'awed': 'fearful',
30
+ 'awkward': 'embarrassed',
31
+ 'beaten down': 'powerless',
32
+ 'beatific': 'happy',
33
+ 'belonging': 'attached',
34
+ 'bereaved': 'sad',
35
+ 'betrayed': 'cheated',
36
+ 'bewildered': 'surprise',
37
+ 'bitter': 'angry',
38
+ 'blissful': 'happy',
39
+ 'blithe': 'happy',
40
+ 'blocked': 'powerless',
41
+ 'boiling': 'angry',
42
+ 'bold': 'fearless',
43
+ 'bored': 'bored',
44
+ 'brave': 'fearless',
45
+ 'bright': 'happy',
46
+ 'brisk': 'happy',
47
+ 'calm': 'safe',
48
+ 'capable': 'adequate',
49
+ 'captivated': 'attached',
50
+ 'careless': 'powerless',
51
+ 'categorized': 'singled out',
52
+ 'cautious': 'fearful',
53
+ 'certain': 'fearless',
54
+ 'chagrined': 'belittled',
55
+ 'challenged': 'attracted',
56
+ 'chastised': 'hated',
57
+ 'cheated': 'cheated',
58
+ 'cheerful': 'happy',
59
+ 'cheerless': 'sad',
60
+ 'cheery': 'happy',
61
+ 'cherished': 'attached',
62
+ 'chicken': 'fearful',
63
+ 'cocky': 'independent',
64
+ 'codependent': 'codependent',
65
+ 'coerced': 'cheated',
66
+ 'comfortable': 'happy',
67
+ 'common': 'average',
68
+ 'competent': 'adequate',
69
+ 'complacent': 'apathetic',
70
+ 'composed': 'adequate',
71
+ 'concerned': 'attracted',
72
+ 'confident': 'adequate',
73
+ 'confused': 'surprise',
74
+ 'connected': 'attached',
75
+ 'conned': 'cheated',
76
+ 'consumed': 'obsessed',
77
+ 'contented': 'happy',
78
+ 'controlled': 'powerless',
79
+ 'convivial': 'happy',
80
+ 'cornered': 'entitled',
81
+ 'courageous': 'fearless',
82
+ 'cowardly': 'fearful',
83
+ 'craving': 'attracted',
84
+ 'crestfallen': 'sad',
85
+ 'criticized': 'hated',
86
+ 'cross': 'angry',
87
+ 'cross-examined': 'singled out',
88
+ 'crushed': 'sad',
89
+ 'curious': 'attracted',
90
+ 'cut off': 'alone',
91
+ 'daring': 'fearless',
92
+ 'dark': 'sad',
93
+ 'dedicated': 'attracted',
94
+ 'defeated': 'powerless',
95
+ 'defenseless': 'fearful',
96
+ 'degraded': 'belittled',
97
+ 'dejected': 'sad',
98
+ 'depressed': 'sad',
99
+ 'deserted': 'hated',
100
+ 'desirable': 'loved',
101
+ 'despondent': 'sad',
102
+ 'detached': 'alone',
103
+ 'determined': 'focused',
104
+ 'diminished': 'belittled',
105
+ 'disappointed': 'demoralized',
106
+ 'discarded': 'hated',
107
+ 'disconsolate': 'sad',
108
+ 'discontented': 'sad',
109
+ 'discounted': 'belittled',
110
+ 'discouraged': 'powerless',
111
+ 'disgraced': 'belittled',
112
+ 'disgusted': 'angry',
113
+ 'disheartened': 'demoralized',
114
+ 'disillusioned': 'demoralized',
115
+ 'disjointed': 'derailed',
116
+ 'dismal': 'sad',
117
+ 'dismayed': 'fearful',
118
+ 'disoriented': 'derailed',
119
+ 'disparaged': 'cheated',
120
+ 'displeased': 'sad',
121
+ 'disrespected': 'belittled',
122
+ 'distressed': 'sad',
123
+ 'distrustful': 'anxious',
124
+ 'dolorous': 'sad',
125
+ 'doubtful': 'fearful',
126
+ 'down': 'sad',
127
+ 'downhearted': 'sad',
128
+ 'dreadful': 'sad',
129
+ 'dreary': 'sad',
130
+ 'dubious': 'anxious',
131
+ 'dull': 'sad',
132
+ 'duped': 'cheated',
133
+ 'eager': 'attracted',
134
+ 'earnest': 'attracted',
135
+ 'ecstatic': 'happy',
136
+ 'elated': 'happy',
137
+ 'embarrassed': 'embarrassed',
138
+ 'empathetic': 'attached',
139
+ 'enchanted': 'attracted',
140
+ 'encouraged': 'adequate',
141
+ 'engrossed': 'attracted',
142
+ 'enraged': 'angry',
143
+ 'enterprising': 'fearless',
144
+ 'enthusiastic': 'happy',
145
+ 'entrusted': 'loved',
146
+ 'esteemed': 'esteemed',
147
+ 'excited': 'happy',
148
+ 'excluded': 'alone',
149
+ 'exempt': 'entitled',
150
+ 'exhausted hopeless': 'powerless',
151
+ 'exhilarated': 'happy',
152
+ 'exploited': 'cheated',
153
+ 'exposed': 'fearful',
154
+ 'fabulous': 'ecstatic',
155
+ 'fainthearted': 'fearful',
156
+ 'fantastic': 'ecstatic',
157
+ 'fascinated': 'attracted',
158
+ 'favored': 'entitled',
159
+ 'fearful': 'fearful',
160
+ 'fervent': 'attracted',
161
+ 'fervid': 'attracted',
162
+ 'festive': 'happy',
163
+ 'flat': 'sad',
164
+ 'focused': 'focused',
165
+ 'forced': 'powerless',
166
+ 'forsaken': 'hated',
167
+ 'framed': 'cheated',
168
+ 'free': 'free',
169
+ 'free & easy': 'happy',
170
+ 'frightened': 'fearful',
171
+ 'frisky': 'happy',
172
+ 'frustrated': 'angry',
173
+ 'full of anticipation': 'attracted',
174
+ 'full of ennui': 'apathetic',
175
+ 'fuming': 'angry',
176
+ 'funereal': 'sad',
177
+ 'furious': 'angry',
178
+ 'gallant': 'fearless',
179
+ 'genial': 'happy',
180
+ 'glad': 'happy',
181
+ 'gleeful': 'happy',
182
+ 'gloomy': 'sad',
183
+ 'glum': 'sad',
184
+ 'grass': 'happy',
185
+ 'grief-stricken': 'sad',
186
+ 'grieved': 'sad',
187
+ 'guilt': 'sad',
188
+ 'guilty': 'singled out',
189
+ 'happy': 'happy',
190
+ 'hardy': 'fearless',
191
+ 'heartbroken': 'sad',
192
+ 'heavyhearted': 'sad',
193
+ 'hesitant': 'fearful',
194
+ 'high-spirited': 'happy',
195
+ 'hilarious': 'happy',
196
+ 'hopeful': 'attracted',
197
+ 'horny': 'lustful',
198
+ 'horrified': 'fearful',
199
+ 'hot and bothered': 'lustful',
200
+ 'humiliated': 'sad',
201
+ 'humorous': 'happy',
202
+ 'hurt': 'sad',
203
+ 'hysterical': 'fearful',
204
+ 'ignored': 'hated',
205
+ 'ill at ease': 'sad',
206
+ 'immobilized': 'apathetic',
207
+ 'immune': 'entitled',
208
+ 'important': 'happy',
209
+ 'impotent': 'powerless',
210
+ 'imprisoned': 'entitled',
211
+ 'in a huff': 'angry',
212
+ 'in a stew': 'angry',
213
+ 'in control': 'adequate',
214
+ 'in fear': 'fearful',
215
+ 'in pain': 'sad',
216
+ 'in the dumps': 'sad',
217
+ 'in the zone': 'focused',
218
+ 'incensed': 'angry',
219
+ 'included': 'attached',
220
+ 'indecisive': 'anxious',
221
+ 'independent': 'free',
222
+ 'indignant': 'angry',
223
+ 'infatuated': 'lustful',
224
+ 'inflamed': 'angry',
225
+ 'injured': 'sad',
226
+ 'inquisitive': 'attracted',
227
+ 'insecure': 'codependent',
228
+ 'insignificant': 'belittled',
229
+ 'intent': 'attracted',
230
+ 'interested': 'attracted',
231
+ 'interrogated': 'singled out',
232
+ 'intrigued': 'attracted',
233
+ 'irate': 'angry',
234
+ 'irresolute': 'fearful',
235
+ 'irresponsible': 'powerless',
236
+ 'irritated': 'angry',
237
+ 'isolated': 'alone',
238
+ 'jaunty': 'happy',
239
+ 'jocular': 'happy',
240
+ 'jolly': 'happy',
241
+ 'jovial': 'happy',
242
+ 'joyful': 'happy',
243
+ 'joyless': 'sad',
244
+ 'joyous': 'happy',
245
+ 'jubilant': 'happy',
246
+ 'justified': 'singled out',
247
+ 'keen': 'attracted',
248
+ 'labeled': 'singled out',
249
+ 'lackadaisical': 'bored',
250
+ 'lazy': 'apathetic',
251
+ 'left out': 'hated',
252
+ 'let down': 'hated',
253
+ 'lethargic': 'apathetic',
254
+ 'lied to': 'cheated',
255
+ 'lighthearted': 'happy',
256
+ 'liked': 'attached',
257
+ 'lively': 'happy',
258
+ 'livid': 'angry',
259
+ 'lonely': 'alone',
260
+ 'lonesome': 'alone',
261
+ 'lost': 'lost',
262
+ 'loved': 'attached',
263
+ 'low': 'sad',
264
+ 'lucky': 'happy',
265
+ 'lugubrious': 'sad',
266
+ 'macho': 'independent',
267
+ 'mad': 'angry',
268
+ 'melancholy': 'sad',
269
+ 'menaced': 'fearful',
270
+ 'merry': 'happy',
271
+ 'mirthful': 'happy',
272
+ 'misgiving': 'fearful',
273
+ 'misunderstood': 'alone',
274
+ 'moody': 'sad',
275
+ 'moping': 'sad',
276
+ 'motivated': 'attracted',
277
+ 'mournful': 'sad',
278
+ 'needed': 'attracted',
279
+ 'needy': 'codependent',
280
+ 'nervous': 'fearful',
281
+ 'obligated': 'powerless',
282
+ 'obsessed': 'obsessed',
283
+ 'offended': 'angry',
284
+ 'oppressed': 'sad',
285
+ 'optionless': 'entitled',
286
+ 'ordinary': 'average',
287
+ 'organized': 'adequate',
288
+ 'out of control': 'powerless',
289
+ 'out of sorts': 'sad',
290
+ 'outmaneuvered': 'entitled',
291
+ 'outraged': 'angry',
292
+ 'overjoyed': 'happy',
293
+ 'overlooked': 'hated',
294
+ 'overwhelmed': 'powerless',
295
+ 'panicked': 'fearful',
296
+ 'passionate': 'lustful',
297
+ 'passive': 'apathetic',
298
+ 'pathetic': 'sad',
299
+ 'peaceful': 'safe',
300
+ 'pensive': 'anxious',
301
+ 'perplexed': 'anxious',
302
+ 'phobic': 'fearful',
303
+ 'playful': 'happy',
304
+ 'pleased': 'happy',
305
+ 'powerless': 'powerless',
306
+ 'pressured': 'burdened',
307
+ 'privileged': 'entitled',
308
+ 'proud': 'happy',
309
+ 'provoked': 'angry',
310
+ 'punished': 'hated',
311
+ 'put upon': 'burdened',
312
+ 'quaking': 'fearful',
313
+ 'quiescent': 'apathetic',
314
+ 'rageful': 'angry',
315
+ 'rapturous': 'happy',
316
+ 'rated': 'singled out',
317
+ 'reassured': 'fearless',
318
+ 'reckless': 'powerless',
319
+ 'redeemed': 'singled out',
320
+ 'regretful': 'sad',
321
+ 'rejected': 'alone',
322
+ 'released': 'free',
323
+ 'remorse': 'sad',
324
+ 'replaced': 'hated',
325
+ 'repulsed': 'demoralized',
326
+ 'resentful': 'angry',
327
+ 'resolute': 'fearless',
328
+ 'respected': 'esteemed',
329
+ 'responsible': 'adequate',
330
+ 'restful': 'fearful',
331
+ 'revered': 'esteemed',
332
+ 'rueful': 'sad',
333
+ 'sad': 'sad',
334
+ 'satisfied': 'happy',
335
+ 'saucy': 'happy',
336
+ 'scared': 'fearful',
337
+ 'secure': 'fearless',
338
+ 'self-reliant': 'fearless',
339
+ 'serene': 'happy',
340
+ 'shaky': 'fearful',
341
+ 'shamed': 'sad',
342
+ 'shocked': 'surprise',
343
+ 'significant': 'esteemed',
344
+ 'singled out': 'singled out',
345
+ 'skeptical': 'anxious',
346
+ 'snoopy': 'attracted',
347
+ 'somber': 'sad',
348
+ 'sparkling': 'happy',
349
+ 'spirited': 'happy',
350
+ 'spiritless': 'sad',
351
+ 'sprightly': 'happy',
352
+ 'startled': 'surprise',
353
+ 'stereotyped': 'singled out',
354
+ 'stifled': 'powerless',
355
+ 'stout hearted': 'fearless',
356
+ 'strong': 'independent',
357
+ 'suffering': 'sad',
358
+ 'sulky': 'sad',
359
+ 'sullen': 'angry',
360
+ 'sunny': 'happy',
361
+ 'surprised': 'surprise',
362
+ 'suspicious': 'anxious',
363
+ 'sympathetic': 'codependent',
364
+ 'tense': 'anxious',
365
+ 'terrified': 'fearful',
366
+ 'terrorized': 'fearful',
367
+ 'thankful': 'happy',
368
+ 'threatened': 'fearful',
369
+ 'thwarted': 'powerless',
370
+ 'timid': 'fearful',
371
+ 'timorous': 'fearful',
372
+ 'torn': 'derailed',
373
+ 'tortured': 'sad',
374
+ 'tragic': 'sad',
375
+ 'tranquil': 'happy',
376
+ 'transported': 'happy',
377
+ 'trapped': 'entitled',
378
+ 'tremulous': 'fearful',
379
+ 'tricked': 'entitled',
380
+ 'turned on': 'lustful',
381
+ 'unapproved of': 'hated',
382
+ 'unbelieving': 'anxious',
383
+ 'uncertain': 'anxious',
384
+ 'unconcerned': 'apathetic',
385
+ 'understood': 'attached',
386
+ 'unfocussed': 'lost',
387
+ 'unlovable': 'hated',
388
+ 'unloved': 'hated',
389
+ 'unmotivated': 'apathetic',
390
+ 'unshackled': 'free',
391
+ 'unsupported': 'belittled',
392
+ 'up in arms': 'angry',
393
+ 'upset': 'fearful',
394
+ 'validated': 'loved',
395
+ 'valued': 'esteemed',
396
+ 'victimized': 'sad',
397
+ 'violated': 'cheated',
398
+ 'virulent': 'angry',
399
+ 'vivacious': 'happy',
400
+ 'vulnerable': 'powerless',
401
+ 'wavering': 'anxious',
402
+ 'weak': 'powerless',
403
+ 'welcomed': 'loved',
404
+ 'woebegone': 'sad',
405
+ 'woeful': 'sad',
406
+ 'worn down': 'powerless',
407
+ 'worn out': 'powerless',
408
+ 'worried': 'fearful',
409
+ 'worshiped': 'esteemed',
410
+ 'wrathful': 'angry',
411
+ 'wronged': 'singled out',
412
+ 'wrought up': 'angry',
413
+ 'yearning': 'lustful',
414
+ 'zealous': 'attracted',
415
+ 'abandoned': 'hated',
416
+ 'absolved': 'singled out',
417
+ 'absorbed': 'attracted',
418
+ 'abused': 'powerless',
419
+ 'accepted': 'loved',
420
+ 'aching': 'sad',
421
+ 'acrimonious': 'angry',
422
+ 'addicted': 'codependent',
423
+ 'adequate': 'adequate',
424
+ 'admired': 'esteemed',
425
+ 'affectionate': 'attached',
426
+ 'affronted': 'singled out',
427
+ 'afraid': 'fearful',
428
+ 'airy': 'happy',
429
+ 'alone': 'alone',
430
+ 'ambivalent': 'bored',
431
+ 'apathetic': 'apathetic',
432
+ 'apprehensive': 'anxious',
433
+ 'arrogant': 'independent',
434
+ 'ashamed': 'embarrassed',
435
+ 'astonished': 'surprise',
436
+ 'at ease': 'safe',
437
+ 'attacked': 'fearful',
438
+ 'audacious': 'fearless',
439
+ 'autonomous': 'free',
440
+ 'average': 'average',
441
+ 'avid': 'attracted',
442
+ 'baffled': 'lost',
443
+ 'bashful': 'powerless',
444
+ 'belittled': 'belittled',
445
+ 'buoyant': 'happy',
446
+ 'burdened': 'burdened',
447
+ 'clouded': 'sad',
448
+ 'committed': 'focused',
449
+ 'compassionate': 'attached',
450
+ 'compelled': 'obsessed',
451
+ 'dauntless': 'fearless',
452
+ 'debonair': 'happy',
453
+ 'deceived': 'entitled',
454
+ 'delighted': 'ecstatic',
455
+ 'demoralized': 'demoralized',
456
+ 'derailed': 'derailed',
457
+ 'desirous': 'attracted',
458
+ 'despairing': 'sad',
459
+ 'devastated': 'angry',
460
+ 'diffident': 'fearful',
461
+ 'discredited': 'belittled',
462
+ 'disheartened': 'sad',
463
+ 'disinclined': 'demoralized',
464
+ 'disorganized': 'powerless',
465
+ 'downcast': 'sad',
466
+ 'entitled': 'entitled',
467
+ 'excited': 'adequate',
468
+ 'exultant': 'happy',
469
+ 'fidgety': 'fearful',
470
+ 'frowning': 'sad',
471
+ 'full of misgiving': 'anxious',
472
+ 'great': 'happy',
473
+ 'hapless': 'sad',
474
+ 'hated': 'hated',
475
+ 'heroic': 'fearless',
476
+ 'hostile': 'angry',
477
+ 'in despair': 'sad',
478
+ 'indifferent': 'bored',
479
+ 'infuriated': 'angry',
480
+ 'insecure': 'fearful',
481
+ 'inspired': 'happy',
482
+ 'inspiring': 'attracted',
483
+ 'judged': 'singled out',
484
+ 'justified': 'singled out',
485
+ 'laughting': 'happy',
486
+ 'loved': 'loved',
487
+ 'loving': 'attached',
488
+ 'low': 'sad',
489
+ 'lustful': 'lustful',
490
+ 'manipulated': 'cheated',
491
+ 'mumpish': 'sad',
492
+ 'nosey': 'attracted',
493
+ 'numb': 'apathetic',
494
+ 'obliterated': 'powerless',
495
+ 'peaceful': 'happy',
496
+ 'petrified': 'fearful',
497
+ 'piqued': 'angry',
498
+ 'piteous': 'sad',
499
+ 'powerless': 'powerless',
500
+ 'questioning': 'anxious',
501
+ 'rejected': 'hated',
502
+ 'self-satisfied': 'happy',
503
+ 'set up': 'entitled',
504
+ 'shut out': 'alone',
505
+ 'sorrowful': 'sad',
506
+ 'spirited': 'sad',
507
+ 'supported': 'esteemed',
508
+ 'suspicious': 'fearful',
509
+ 'terrific': 'happy',
510
+ 'trapped': 'entitled',
511
+ 'trembling': 'fearful',
512
+ 'uncomfortable': 'anxious',
513
+ 'underestimated': 'belittled',
514
+ 'unhappy': 'sad',
515
+ 'vindicated': 'singled out',
516
+ 'worked up': 'angry',
517
+ 'airborne': 'excited',
518
+ 'grass': 'happy',
519
+ 'mountain': 'calm',
520
+ 'dog': 'happy',
521
+ 'umbrella': 'sad',
522
+ 'train': 'sorrow',
523
+ 'lightning': 'ominous',
524
+ 'rocket': 'energetic',
525
+ 'elevator': 'relaxed',
526
+ 'slides': 'happy',
527
+ 'mountains': 'relaxed',
528
+ 'dog': 'excited',
529
+ 'trees': 'relaxed',
530
+ 'people': 'happy',
531
+ 'old': 'sad',
532
+ 'men': 'happy',
533
+ 'women': 'happy',
534
+ 'humans': 'happy',
535
+ 'persons': 'happy',
536
+ 'person': 'happy'
utils/gradio_helper.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from .audio_palette import AudioPalette
4
+
5
+ def single_image_interface(model: AudioPalette):
6
+ demo = gr.Interface(
7
+ fn=model.generate_single,
8
+ inputs=[
9
+ gr.Image(
10
+ type="pil",
11
+ label="Upload an image",
12
+ show_label=True,
13
+ container=True
14
+ ),
15
+ gr.Radio(
16
+ choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
17
+ label="Instrument",
18
+ show_label=True,
19
+ container=True
20
+ ),
21
+ gr.Textbox(
22
+ lines=1,
23
+ placeholder="ngrok endpoint",
24
+ label="colab endpoint",
25
+ show_label=True,
26
+ container=True,
27
+ type="text",
28
+ visible=True
29
+ )
30
+ ],
31
+ outputs=[
32
+ gr.Textbox(
33
+ lines=1,
34
+ placeholder="Prompt",
35
+ label="Generated Prompt",
36
+ show_label=True,
37
+ container=True,
38
+ type="text",
39
+ visible=False
40
+ ),
41
+ gr.Textbox(
42
+ lines=1,
43
+ placeholder="Pace of the image",
44
+ label="Pace",
45
+ show_label=True,
46
+ container=True,
47
+ type="text",
48
+ visible=False
49
+ ),
50
+ gr.Textbox(
51
+ lines=1,
52
+ placeholder="Caption for the image",
53
+ label="Caption",
54
+ show_label=True,
55
+ container=True,
56
+ type="text",
57
+ visible=False
58
+ ),
59
+ gr.Audio(
60
+ label="Generated Audio",
61
+ show_label=True,
62
+ container=True,
63
+ visible=True,
64
+ format="wav",
65
+ autoplay=False,
66
+ show_download_button=True,
67
+ )
68
+ ],
69
+ cache_examples=False,
70
+ live=False,
71
+ description="Provide an image to generate an appropriate background soundtrack",
72
+ )
73
+
74
+ return demo
75
+
76
+ def multi_image_interface(model: AudioPalette):
77
+ demo = gr.Interface(
78
+ fn=model.generate_multiple,
79
+ inputs=[
80
+ gr.File(
81
+ file_count="multiple",
82
+ file_types=["image"],
83
+ type="filepath",
84
+ label="Upload images",
85
+ show_label=True,
86
+ container=True,
87
+ visible=True
88
+ ),
89
+ gr.Radio(
90
+ choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
91
+ label="Instrument",
92
+ show_label=True,
93
+ container=True
94
+ ),
95
+ gr.Textbox(
96
+ lines=1,
97
+ placeholder="ngrok endpoint",
98
+ label="colab endpoint",
99
+ show_label=True,
100
+ container=True,
101
+ type="text",
102
+ visible=True
103
+ )
104
+ ],
105
+ outputs=[
106
+ gr.Video(
107
+ format="mp4",
108
+ label="Generated Video",
109
+ show_label=True,
110
+ container=True,
111
+ visible=True,
112
+ autoplay=False,
113
+ )
114
+ ],
115
+ cache_examples=False,
116
+ live=False,
117
+ description="Provide images to generate an a slideshow of the images with appropriate music as background",
118
+ )
119
+
120
+ return demo