Spaces:
Runtime error
Runtime error
add sentiment analyser and refactor code
Browse files- .gitattributes +0 -5
- .gitignore +5 -0
- app.py +8 -203
- lib/__init__.py +4 -0
- lib/audio_generation.py +0 -1
- lib/image_captioning.py +0 -1
- lib/pace_model.py +1 -1
- lib/sentiment_analyser.py +69 -0
- utils/__init__.py +2 -0
- utils/audio_palette.py +112 -0
- utils/emotions.txt +536 -0
- utils/gradio_helper.py +120 -0
.gitattributes
CHANGED
@@ -33,8 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
-
*.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
-
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
38 |
-
*.jpg filter=lfs diff=lfs merge=lfs -text
|
39 |
-
*.png filter=lfs diff=lfs merge=lfs -text
|
40 |
-
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
@@ -8,9 +8,14 @@ __pycache__
|
|
8 |
|
9 |
# Video
|
10 |
*.mp4
|
|
|
11 |
|
12 |
# Audio
|
13 |
*.wav
|
14 |
*.mp3
|
15 |
|
|
|
|
|
|
|
|
|
16 |
*.log
|
|
|
8 |
|
9 |
# Video
|
10 |
*.mp4
|
11 |
+
*.mkv
|
12 |
|
13 |
# Audio
|
14 |
*.wav
|
15 |
*.mp3
|
16 |
|
17 |
+
# Others
|
18 |
+
*.pdf
|
19 |
+
*.md
|
20 |
+
|
21 |
*.log
|
app.py
CHANGED
@@ -1,221 +1,26 @@
|
|
1 |
import typing
|
2 |
from pathlib import Path
|
3 |
|
4 |
-
import numpy as np
|
5 |
import gradio as gr
|
6 |
|
7 |
import PIL
|
8 |
from PIL import Image
|
9 |
from moviepy.editor import *
|
10 |
|
11 |
-
from
|
12 |
-
from lib.image_captioning import ImageCaptioning
|
13 |
-
from lib.pace_model import PaceModel
|
14 |
|
15 |
pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
|
16 |
resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
|
17 |
height, width, channels = (224, 224, 3)
|
18 |
|
19 |
-
class AudioPalette:
|
20 |
-
def __init__(self):
|
21 |
-
self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
|
22 |
-
self.image_captioning = ImageCaptioning()
|
23 |
-
self.audio_generation = AudioGeneration()
|
24 |
-
self.pace_map = {
|
25 |
-
"Fast": "high",
|
26 |
-
"Medium": "medium",
|
27 |
-
"Slow": "low"
|
28 |
-
}
|
29 |
-
|
30 |
-
def prompt_construction(self, caption: str, pace: str, instrument: typing.Union[str, None], first: bool = True):
|
31 |
-
instrument = instrument if instrument is not None else ""
|
32 |
-
|
33 |
-
if first:
|
34 |
-
prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality"
|
35 |
-
else:
|
36 |
-
prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
|
37 |
-
|
38 |
-
return prompt
|
39 |
-
|
40 |
-
def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
|
41 |
-
pace = self.pace_model.predict(input_image)
|
42 |
-
print("Pace Prediction Done")
|
43 |
-
|
44 |
-
generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
|
45 |
-
print("Captioning Done")
|
46 |
-
generated_text = generated_text if generated_text is not None else ""
|
47 |
-
|
48 |
-
prompt = self.prompt_construction(generated_text, pace, instrument)
|
49 |
-
print("Generated Prompt:", prompt)
|
50 |
-
|
51 |
-
audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
|
52 |
-
print("Audio Generation Done")
|
53 |
-
|
54 |
-
outputs = [prompt, pace, generated_text, audio_file]
|
55 |
-
return outputs
|
56 |
-
|
57 |
-
def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
|
58 |
-
clips = [ImageClip(m).set_duration(5) for m in file_paths]
|
59 |
-
audio_clips = [AudioFileClip(a) for a in audio_paths]
|
60 |
-
concat_audio = concatenate_audioclips(audio_clips)
|
61 |
-
new_audio = CompositeAudioClip([concat_audio])
|
62 |
-
|
63 |
-
concat_clip = concatenate_videoclips(clips, method="compose")
|
64 |
-
concat_clip.audio = new_audio
|
65 |
-
|
66 |
-
file_name = "generated_video.mp4"
|
67 |
-
concat_clip.write_videofile(file_name, fps=24)
|
68 |
-
return file_name
|
69 |
-
|
70 |
-
def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
|
71 |
-
images = [Image.open(image_path) for image_path in file_paths]
|
72 |
-
pace = []
|
73 |
-
generated_text = []
|
74 |
-
prompts = []
|
75 |
-
|
76 |
-
# Extracting the pace for all the images
|
77 |
-
for image in images:
|
78 |
-
pace_prediction = self.pace_model.predict(image)
|
79 |
-
pace.append(pace_prediction)
|
80 |
-
print("Pace Prediction Done")
|
81 |
-
|
82 |
-
# Generating the caption for all the images
|
83 |
-
for image in images:
|
84 |
-
caption = self.image_captioning.query(image)[0].get("generated_text")
|
85 |
-
generated_text.append(caption)
|
86 |
-
print("Captioning Done")
|
87 |
-
|
88 |
-
first = True
|
89 |
-
for generated_caption, pace_pred in zip(generated_text, pace):
|
90 |
-
prompts.append(self.prompt_construction(generated_caption, pace_pred, instrument, first))
|
91 |
-
first = False
|
92 |
-
print("Generated Prompts: ", prompts)
|
93 |
-
|
94 |
-
audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
|
95 |
-
print("Audio Generation Done")
|
96 |
-
|
97 |
-
video_file = self.stitch_images(file_paths, [audio_file])
|
98 |
-
return video_file
|
99 |
-
|
100 |
-
def single_image_interface(model: AudioPalette):
|
101 |
-
demo = gr.Interface(
|
102 |
-
fn=model.generate_single,
|
103 |
-
inputs=[
|
104 |
-
gr.Image(
|
105 |
-
type="pil",
|
106 |
-
label="Upload an image",
|
107 |
-
show_label=True,
|
108 |
-
container=True
|
109 |
-
),
|
110 |
-
gr.Radio(
|
111 |
-
choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
|
112 |
-
label="Instrument",
|
113 |
-
show_label=True,
|
114 |
-
container=True
|
115 |
-
),
|
116 |
-
gr.Textbox(
|
117 |
-
lines=1,
|
118 |
-
placeholder="ngrok endpoint",
|
119 |
-
label="colab endpoint",
|
120 |
-
show_label=True,
|
121 |
-
container=True,
|
122 |
-
type="text",
|
123 |
-
visible=True
|
124 |
-
)
|
125 |
-
],
|
126 |
-
outputs=[
|
127 |
-
gr.Textbox(
|
128 |
-
lines=1,
|
129 |
-
placeholder="Prompt",
|
130 |
-
label="Generated Prompt",
|
131 |
-
show_label=True,
|
132 |
-
container=True,
|
133 |
-
type="text",
|
134 |
-
visible=False
|
135 |
-
),
|
136 |
-
gr.Textbox(
|
137 |
-
lines=1,
|
138 |
-
placeholder="Pace of the image",
|
139 |
-
label="Pace",
|
140 |
-
show_label=True,
|
141 |
-
container=True,
|
142 |
-
type="text",
|
143 |
-
visible=False
|
144 |
-
),
|
145 |
-
gr.Textbox(
|
146 |
-
lines=1,
|
147 |
-
placeholder="Caption for the image",
|
148 |
-
label="Caption",
|
149 |
-
show_label=True,
|
150 |
-
container=True,
|
151 |
-
type="text",
|
152 |
-
visible=False
|
153 |
-
),
|
154 |
-
gr.Audio(
|
155 |
-
label="Generated Audio",
|
156 |
-
show_label=True,
|
157 |
-
container=True,
|
158 |
-
visible=True,
|
159 |
-
format="wav",
|
160 |
-
autoplay=False,
|
161 |
-
show_download_button=True,
|
162 |
-
)
|
163 |
-
],
|
164 |
-
cache_examples=False,
|
165 |
-
live=False,
|
166 |
-
description="Provide an image to generate an appropriate background soundtrack",
|
167 |
-
)
|
168 |
-
|
169 |
-
return demo
|
170 |
-
|
171 |
-
def multi_image_interface(model: AudioPalette):
|
172 |
-
demo = gr.Interface(
|
173 |
-
fn=model.generate_multiple,
|
174 |
-
inputs=[
|
175 |
-
gr.File(
|
176 |
-
file_count="multiple",
|
177 |
-
file_types=["image"],
|
178 |
-
type="filepath",
|
179 |
-
label="Upload images",
|
180 |
-
show_label=True,
|
181 |
-
container=True,
|
182 |
-
visible=True
|
183 |
-
),
|
184 |
-
gr.Radio(
|
185 |
-
choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
|
186 |
-
label="Instrument",
|
187 |
-
show_label=True,
|
188 |
-
container=True
|
189 |
-
),
|
190 |
-
gr.Textbox(
|
191 |
-
lines=1,
|
192 |
-
placeholder="ngrok endpoint",
|
193 |
-
label="colab endpoint",
|
194 |
-
show_label=True,
|
195 |
-
container=True,
|
196 |
-
type="text",
|
197 |
-
visible=True
|
198 |
-
)
|
199 |
-
],
|
200 |
-
outputs=[
|
201 |
-
gr.Video(
|
202 |
-
format="mp4",
|
203 |
-
label="Generated Video",
|
204 |
-
show_label=True,
|
205 |
-
container=True,
|
206 |
-
visible=True,
|
207 |
-
autoplay=False,
|
208 |
-
)
|
209 |
-
],
|
210 |
-
cache_examples=False,
|
211 |
-
live=False,
|
212 |
-
description="Provide images to generate an a slideshow of the images with appropriate music as background",
|
213 |
-
)
|
214 |
-
|
215 |
-
return demo
|
216 |
-
|
217 |
def main():
|
218 |
-
model = AudioPalette(
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
tab_1 = single_image_interface(model)
|
221 |
tab_2 = multi_image_interface(model)
|
|
|
1 |
import typing
|
2 |
from pathlib import Path
|
3 |
|
|
|
4 |
import gradio as gr
|
5 |
|
6 |
import PIL
|
7 |
from PIL import Image
|
8 |
from moviepy.editor import *
|
9 |
|
10 |
+
from utils import *
|
|
|
|
|
11 |
|
12 |
pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
|
13 |
resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
|
14 |
height, width, channels = (224, 224, 3)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def main():
|
17 |
+
model = AudioPalette(
|
18 |
+
pace_model_weights_path,
|
19 |
+
resnet50_tf_model_weights_path,
|
20 |
+
height,
|
21 |
+
width,
|
22 |
+
channels
|
23 |
+
)
|
24 |
|
25 |
tab_1 = single_image_interface(model)
|
26 |
tab_2 = multi_image_interface(model)
|
lib/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .audio_generation import AudioGeneration
|
2 |
+
from .image_captioning import ImageCaptioning
|
3 |
+
from .pace_model import PaceModel
|
4 |
+
from .sentiment_analyser import SentimentAnalyser
|
lib/audio_generation.py
CHANGED
@@ -33,5 +33,4 @@ class AudioGeneration:
|
|
33 |
stored_file_path = self.request_generation(prompt)
|
34 |
|
35 |
audio_file = self.request_download(stored_file_path)
|
36 |
-
print(audio_file)
|
37 |
return audio_file
|
|
|
33 |
stored_file_path = self.request_generation(prompt)
|
34 |
|
35 |
audio_file = self.request_download(stored_file_path)
|
|
|
36 |
return audio_file
|
lib/image_captioning.py
CHANGED
@@ -26,5 +26,4 @@ class ImageCaptioning:
|
|
26 |
headers=self.headers,
|
27 |
data=self.convert_to_bytes(image)
|
28 |
)
|
29 |
-
print(response.json())
|
30 |
return response.json()
|
|
|
26 |
headers=self.headers,
|
27 |
data=self.convert_to_bytes(image)
|
28 |
)
|
|
|
29 |
return response.json()
|
lib/pace_model.py
CHANGED
@@ -52,5 +52,5 @@ class PaceModel:
|
|
52 |
image = np.expand_dims(resized_image, axis=0)
|
53 |
|
54 |
prediction = self.resnet_model.predict(image)
|
55 |
-
print(prediction, np.argmax(prediction))
|
56 |
return self.class_names[np.argmax(prediction)]
|
|
|
52 |
image = np.expand_dims(resized_image, axis=0)
|
53 |
|
54 |
prediction = self.resnet_model.predict(image)
|
55 |
+
# print(prediction, np.argmax(prediction))
|
56 |
return self.class_names[np.argmax(prediction)]
|
lib/sentiment_analyser.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import string
|
3 |
+
from collections import Counter
|
4 |
+
from datetime import datetime
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
import nltk
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
10 |
+
from nltk.stem import WordNetLemmatizer
|
11 |
+
from nltk.tokenize import word_tokenize
|
12 |
+
|
13 |
+
from utils import *
|
14 |
+
|
15 |
+
datetime_format = "%d/%m/%Y %H:%M:%S"
|
16 |
+
|
17 |
+
def now():
|
18 |
+
return datetime.now().strftime(datetime_format)
|
19 |
+
|
20 |
+
class SentimentAnalyser:
|
21 |
+
def __init__(self):
|
22 |
+
nltk.download('punkt')
|
23 |
+
nltk.download('stopwords')
|
24 |
+
nltk.download('wordnet')
|
25 |
+
self.emotions = Path("utils/emotions.txt").resolve()
|
26 |
+
|
27 |
+
def sentiment(self, text):
|
28 |
+
prompt = text
|
29 |
+
lower_case = text.lower()
|
30 |
+
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
|
31 |
+
|
32 |
+
# Using word_tokenize because it's faster than split()
|
33 |
+
tokenized_words = word_tokenize(cleaned_text, "english")
|
34 |
+
|
35 |
+
# Removing Stop Words
|
36 |
+
final_words = []
|
37 |
+
for word in tokenized_words:
|
38 |
+
if word not in stopwords.words("english"):
|
39 |
+
final_words.append(word)
|
40 |
+
|
41 |
+
# Lemmatization - From plural to single + base form of a word (example better -> good)
|
42 |
+
lemma_words = []
|
43 |
+
for word in final_words:
|
44 |
+
word = WordNetLemmatizer().lemmatize(word)
|
45 |
+
lemma_words.append(word)
|
46 |
+
|
47 |
+
emotion_list = []
|
48 |
+
with open(self.emotions) as f:
|
49 |
+
for line in f:
|
50 |
+
clear_line = line.replace("\n", "").replace(",", "").replace("'", "").replace(" ", "").strip()
|
51 |
+
word, emotion = clear_line.split(":")
|
52 |
+
|
53 |
+
if word in lemma_words:
|
54 |
+
emotion_list.append(emotion)
|
55 |
+
|
56 |
+
print(f"[{now()}] Emotion List:", emotion_list)
|
57 |
+
if not len(emotion_list):
|
58 |
+
print(f"[{now()}] No emotion could be extracted.")
|
59 |
+
return None
|
60 |
+
|
61 |
+
emotions_count = Counter(emotion_list)
|
62 |
+
print(f"[{now()}] Emotions Count:", emotions_count)
|
63 |
+
|
64 |
+
common = emotions_count.most_common(1)
|
65 |
+
print(f"[{now()}] Common Emotions:", common)
|
66 |
+
|
67 |
+
sentiment, val = common[0]
|
68 |
+
print(f"[{now()}] Emotion:", sentiment)
|
69 |
+
return sentiment
|
utils/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .gradio_helper import single_image_interface, multi_image_interface
|
2 |
+
from .audio_palette import AudioPalette
|
utils/audio_palette.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import typing
|
2 |
+
from datetime import datetime
|
3 |
+
|
4 |
+
import PIL
|
5 |
+
from PIL import Image
|
6 |
+
from moviepy.editor import *
|
7 |
+
|
8 |
+
from lib import *
|
9 |
+
|
10 |
+
datetime_format = "%d/%m/%Y %H:%M:%S"
|
11 |
+
def now():
|
12 |
+
return datetime.now().strftime(datetime_format)
|
13 |
+
|
14 |
+
class AudioPalette:
|
15 |
+
def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
|
16 |
+
self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
|
17 |
+
self.image_captioning = ImageCaptioning()
|
18 |
+
self.audio_generation = AudioGeneration()
|
19 |
+
self.sentiment_analyser = SentimentAnalyser()
|
20 |
+
self.pace_map = {
|
21 |
+
"Fast": "high",
|
22 |
+
"Medium": "medium",
|
23 |
+
"Slow": "low"
|
24 |
+
}
|
25 |
+
|
26 |
+
def prompt_construction(self, caption: str, pace: str, sentiment: typing.Union[str, None], instrument: typing.Union[str, None], first: bool = True):
|
27 |
+
instrument = instrument if instrument is not None else ""
|
28 |
+
|
29 |
+
if first:
|
30 |
+
prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality."
|
31 |
+
else:
|
32 |
+
prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
|
33 |
+
|
34 |
+
# if sentiment:
|
35 |
+
# prompt += f" As a {sentiment} music."
|
36 |
+
|
37 |
+
return prompt
|
38 |
+
|
39 |
+
def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
|
40 |
+
pace = self.pace_model.predict(input_image)
|
41 |
+
print(f"[{now()}]", pace)
|
42 |
+
print(f"[{now()}] Pace Prediction Done")
|
43 |
+
|
44 |
+
generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
|
45 |
+
print(f"[{now()}]", generated_text)
|
46 |
+
print(f"[{now()}] Captioning Done")
|
47 |
+
|
48 |
+
sentiment = self.sentiment_analyser.sentiment(generated_text)
|
49 |
+
print(f"[{now()}] Sentiment Analysis Done")
|
50 |
+
|
51 |
+
prompt = self.prompt_construction(generated_text, pace, sentiment, instrument)
|
52 |
+
print(f"[{now()}] Generated Prompt:", prompt)
|
53 |
+
|
54 |
+
audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
|
55 |
+
print(f"[{now()}]", audio_file)
|
56 |
+
print(f"[{now()}] Audio Generation Done")
|
57 |
+
|
58 |
+
outputs = [prompt, pace, generated_text, audio_file]
|
59 |
+
return outputs
|
60 |
+
|
61 |
+
def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
|
62 |
+
clips = [ImageClip(m).set_duration(5) for m in file_paths]
|
63 |
+
audio_clips = [AudioFileClip(a) for a in audio_paths]
|
64 |
+
concat_audio = concatenate_audioclips(audio_clips)
|
65 |
+
new_audio = CompositeAudioClip([concat_audio])
|
66 |
+
|
67 |
+
concat_clip = concatenate_videoclips(clips, method="compose")
|
68 |
+
concat_clip.audio = new_audio
|
69 |
+
|
70 |
+
file_name = "generated_video.mp4"
|
71 |
+
concat_clip.write_videofile(file_name, fps=24)
|
72 |
+
return file_name
|
73 |
+
|
74 |
+
def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
|
75 |
+
images = [Image.open(image_path) for image_path in file_paths]
|
76 |
+
pace = []
|
77 |
+
generated_text = []
|
78 |
+
sentiments = []
|
79 |
+
prompts = []
|
80 |
+
|
81 |
+
# Extracting the pace for all the images
|
82 |
+
for image in images:
|
83 |
+
pace_prediction = self.pace_model.predict(image)
|
84 |
+
pace.append(pace_prediction)
|
85 |
+
print(f"[{now()}]", pace)
|
86 |
+
print(f"[{now()}] Pace Prediction Done")
|
87 |
+
|
88 |
+
# Generating the caption for all the images
|
89 |
+
for image in images:
|
90 |
+
caption = self.image_captioning.query(image)[0].get("generated_text")
|
91 |
+
generated_text.append(caption)
|
92 |
+
print(f"[{now()}]", generated_text)
|
93 |
+
print(f"[{now()}] Captioning Done")
|
94 |
+
|
95 |
+
# Extracting the sentiments from the generated captions
|
96 |
+
for text in generated_text:
|
97 |
+
sentiment = self.sentiment_analyser.sentiment(text)
|
98 |
+
sentiments.append(sentiment)
|
99 |
+
print(f"[{now()}] Sentiment Analysis Done:", sentiments)
|
100 |
+
|
101 |
+
first = True
|
102 |
+
for generated_caption, senti, pace_pred in zip(generated_text, sentiments, pace):
|
103 |
+
prompts.append(self.prompt_construction(generated_caption, pace_pred, senti, instrument, first))
|
104 |
+
first = False
|
105 |
+
print(f"[{now()}] Generated Prompts:", prompts)
|
106 |
+
|
107 |
+
audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
|
108 |
+
print(f"[{now()}]", audio_file)
|
109 |
+
print(f"[{now()}] Audio Generation Done")
|
110 |
+
|
111 |
+
video_file = self.stitch_images(file_paths, [audio_file])
|
112 |
+
return video_file
|
utils/emotions.txt
ADDED
@@ -0,0 +1,536 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'victimized': 'cheated',
|
2 |
+
'accused': 'cheated',
|
3 |
+
'acquitted': 'singled out',
|
4 |
+
'adorable': 'loved',
|
5 |
+
'adored': 'loved',
|
6 |
+
'affected': 'attracted',
|
7 |
+
'afflicted': 'sad',
|
8 |
+
'aghast': 'fearful',
|
9 |
+
'agog': 'attracted',
|
10 |
+
'agonized': 'sad',
|
11 |
+
'alarmed': 'fearful',
|
12 |
+
'amused': 'happy',
|
13 |
+
'angry': 'angry',
|
14 |
+
'anguished': 'sad',
|
15 |
+
'animated': 'happy',
|
16 |
+
'annoyed': 'angry',
|
17 |
+
'anxious': 'attracted',
|
18 |
+
'apathetic': 'bored',
|
19 |
+
'appalled': 'angry',
|
20 |
+
'appeased': 'singled out',
|
21 |
+
'appreciated': 'esteemed',
|
22 |
+
'apprehensive': 'fearful',
|
23 |
+
'approved of': 'loved',
|
24 |
+
'ardent': 'lustful',
|
25 |
+
'aroused': 'lustful',
|
26 |
+
'attached': 'attached',
|
27 |
+
'attracted': 'attracted',
|
28 |
+
'autonomous': 'independent',
|
29 |
+
'awed': 'fearful',
|
30 |
+
'awkward': 'embarrassed',
|
31 |
+
'beaten down': 'powerless',
|
32 |
+
'beatific': 'happy',
|
33 |
+
'belonging': 'attached',
|
34 |
+
'bereaved': 'sad',
|
35 |
+
'betrayed': 'cheated',
|
36 |
+
'bewildered': 'surprise',
|
37 |
+
'bitter': 'angry',
|
38 |
+
'blissful': 'happy',
|
39 |
+
'blithe': 'happy',
|
40 |
+
'blocked': 'powerless',
|
41 |
+
'boiling': 'angry',
|
42 |
+
'bold': 'fearless',
|
43 |
+
'bored': 'bored',
|
44 |
+
'brave': 'fearless',
|
45 |
+
'bright': 'happy',
|
46 |
+
'brisk': 'happy',
|
47 |
+
'calm': 'safe',
|
48 |
+
'capable': 'adequate',
|
49 |
+
'captivated': 'attached',
|
50 |
+
'careless': 'powerless',
|
51 |
+
'categorized': 'singled out',
|
52 |
+
'cautious': 'fearful',
|
53 |
+
'certain': 'fearless',
|
54 |
+
'chagrined': 'belittled',
|
55 |
+
'challenged': 'attracted',
|
56 |
+
'chastised': 'hated',
|
57 |
+
'cheated': 'cheated',
|
58 |
+
'cheerful': 'happy',
|
59 |
+
'cheerless': 'sad',
|
60 |
+
'cheery': 'happy',
|
61 |
+
'cherished': 'attached',
|
62 |
+
'chicken': 'fearful',
|
63 |
+
'cocky': 'independent',
|
64 |
+
'codependent': 'codependent',
|
65 |
+
'coerced': 'cheated',
|
66 |
+
'comfortable': 'happy',
|
67 |
+
'common': 'average',
|
68 |
+
'competent': 'adequate',
|
69 |
+
'complacent': 'apathetic',
|
70 |
+
'composed': 'adequate',
|
71 |
+
'concerned': 'attracted',
|
72 |
+
'confident': 'adequate',
|
73 |
+
'confused': 'surprise',
|
74 |
+
'connected': 'attached',
|
75 |
+
'conned': 'cheated',
|
76 |
+
'consumed': 'obsessed',
|
77 |
+
'contented': 'happy',
|
78 |
+
'controlled': 'powerless',
|
79 |
+
'convivial': 'happy',
|
80 |
+
'cornered': 'entitled',
|
81 |
+
'courageous': 'fearless',
|
82 |
+
'cowardly': 'fearful',
|
83 |
+
'craving': 'attracted',
|
84 |
+
'crestfallen': 'sad',
|
85 |
+
'criticized': 'hated',
|
86 |
+
'cross': 'angry',
|
87 |
+
'cross-examined': 'singled out',
|
88 |
+
'crushed': 'sad',
|
89 |
+
'curious': 'attracted',
|
90 |
+
'cut off': 'alone',
|
91 |
+
'daring': 'fearless',
|
92 |
+
'dark': 'sad',
|
93 |
+
'dedicated': 'attracted',
|
94 |
+
'defeated': 'powerless',
|
95 |
+
'defenseless': 'fearful',
|
96 |
+
'degraded': 'belittled',
|
97 |
+
'dejected': 'sad',
|
98 |
+
'depressed': 'sad',
|
99 |
+
'deserted': 'hated',
|
100 |
+
'desirable': 'loved',
|
101 |
+
'despondent': 'sad',
|
102 |
+
'detached': 'alone',
|
103 |
+
'determined': 'focused',
|
104 |
+
'diminished': 'belittled',
|
105 |
+
'disappointed': 'demoralized',
|
106 |
+
'discarded': 'hated',
|
107 |
+
'disconsolate': 'sad',
|
108 |
+
'discontented': 'sad',
|
109 |
+
'discounted': 'belittled',
|
110 |
+
'discouraged': 'powerless',
|
111 |
+
'disgraced': 'belittled',
|
112 |
+
'disgusted': 'angry',
|
113 |
+
'disheartened': 'demoralized',
|
114 |
+
'disillusioned': 'demoralized',
|
115 |
+
'disjointed': 'derailed',
|
116 |
+
'dismal': 'sad',
|
117 |
+
'dismayed': 'fearful',
|
118 |
+
'disoriented': 'derailed',
|
119 |
+
'disparaged': 'cheated',
|
120 |
+
'displeased': 'sad',
|
121 |
+
'disrespected': 'belittled',
|
122 |
+
'distressed': 'sad',
|
123 |
+
'distrustful': 'anxious',
|
124 |
+
'dolorous': 'sad',
|
125 |
+
'doubtful': 'fearful',
|
126 |
+
'down': 'sad',
|
127 |
+
'downhearted': 'sad',
|
128 |
+
'dreadful': 'sad',
|
129 |
+
'dreary': 'sad',
|
130 |
+
'dubious': 'anxious',
|
131 |
+
'dull': 'sad',
|
132 |
+
'duped': 'cheated',
|
133 |
+
'eager': 'attracted',
|
134 |
+
'earnest': 'attracted',
|
135 |
+
'ecstatic': 'happy',
|
136 |
+
'elated': 'happy',
|
137 |
+
'embarrassed': 'embarrassed',
|
138 |
+
'empathetic': 'attached',
|
139 |
+
'enchanted': 'attracted',
|
140 |
+
'encouraged': 'adequate',
|
141 |
+
'engrossed': 'attracted',
|
142 |
+
'enraged': 'angry',
|
143 |
+
'enterprising': 'fearless',
|
144 |
+
'enthusiastic': 'happy',
|
145 |
+
'entrusted': 'loved',
|
146 |
+
'esteemed': 'esteemed',
|
147 |
+
'excited': 'happy',
|
148 |
+
'excluded': 'alone',
|
149 |
+
'exempt': 'entitled',
|
150 |
+
'exhausted hopeless': 'powerless',
|
151 |
+
'exhilarated': 'happy',
|
152 |
+
'exploited': 'cheated',
|
153 |
+
'exposed': 'fearful',
|
154 |
+
'fabulous': 'ecstatic',
|
155 |
+
'fainthearted': 'fearful',
|
156 |
+
'fantastic': 'ecstatic',
|
157 |
+
'fascinated': 'attracted',
|
158 |
+
'favored': 'entitled',
|
159 |
+
'fearful': 'fearful',
|
160 |
+
'fervent': 'attracted',
|
161 |
+
'fervid': 'attracted',
|
162 |
+
'festive': 'happy',
|
163 |
+
'flat': 'sad',
|
164 |
+
'focused': 'focused',
|
165 |
+
'forced': 'powerless',
|
166 |
+
'forsaken': 'hated',
|
167 |
+
'framed': 'cheated',
|
168 |
+
'free': 'free',
|
169 |
+
'free & easy': 'happy',
|
170 |
+
'frightened': 'fearful',
|
171 |
+
'frisky': 'happy',
|
172 |
+
'frustrated': 'angry',
|
173 |
+
'full of anticipation': 'attracted',
|
174 |
+
'full of ennui': 'apathetic',
|
175 |
+
'fuming': 'angry',
|
176 |
+
'funereal': 'sad',
|
177 |
+
'furious': 'angry',
|
178 |
+
'gallant': 'fearless',
|
179 |
+
'genial': 'happy',
|
180 |
+
'glad': 'happy',
|
181 |
+
'gleeful': 'happy',
|
182 |
+
'gloomy': 'sad',
|
183 |
+
'glum': 'sad',
|
184 |
+
'grass': 'happy',
|
185 |
+
'grief-stricken': 'sad',
|
186 |
+
'grieved': 'sad',
|
187 |
+
'guilt': 'sad',
|
188 |
+
'guilty': 'singled out',
|
189 |
+
'happy': 'happy',
|
190 |
+
'hardy': 'fearless',
|
191 |
+
'heartbroken': 'sad',
|
192 |
+
'heavyhearted': 'sad',
|
193 |
+
'hesitant': 'fearful',
|
194 |
+
'high-spirited': 'happy',
|
195 |
+
'hilarious': 'happy',
|
196 |
+
'hopeful': 'attracted',
|
197 |
+
'horny': 'lustful',
|
198 |
+
'horrified': 'fearful',
|
199 |
+
'hot and bothered': 'lustful',
|
200 |
+
'humiliated': 'sad',
|
201 |
+
'humorous': 'happy',
|
202 |
+
'hurt': 'sad',
|
203 |
+
'hysterical': 'fearful',
|
204 |
+
'ignored': 'hated',
|
205 |
+
'ill at ease': 'sad',
|
206 |
+
'immobilized': 'apathetic',
|
207 |
+
'immune': 'entitled',
|
208 |
+
'important': 'happy',
|
209 |
+
'impotent': 'powerless',
|
210 |
+
'imprisoned': 'entitled',
|
211 |
+
'in a huff': 'angry',
|
212 |
+
'in a stew': 'angry',
|
213 |
+
'in control': 'adequate',
|
214 |
+
'in fear': 'fearful',
|
215 |
+
'in pain': 'sad',
|
216 |
+
'in the dumps': 'sad',
|
217 |
+
'in the zone': 'focused',
|
218 |
+
'incensed': 'angry',
|
219 |
+
'included': 'attached',
|
220 |
+
'indecisive': 'anxious',
|
221 |
+
'independent': 'free',
|
222 |
+
'indignant': 'angry',
|
223 |
+
'infatuated': 'lustful',
|
224 |
+
'inflamed': 'angry',
|
225 |
+
'injured': 'sad',
|
226 |
+
'inquisitive': 'attracted',
|
227 |
+
'insecure': 'codependent',
|
228 |
+
'insignificant': 'belittled',
|
229 |
+
'intent': 'attracted',
|
230 |
+
'interested': 'attracted',
|
231 |
+
'interrogated': 'singled out',
|
232 |
+
'intrigued': 'attracted',
|
233 |
+
'irate': 'angry',
|
234 |
+
'irresolute': 'fearful',
|
235 |
+
'irresponsible': 'powerless',
|
236 |
+
'irritated': 'angry',
|
237 |
+
'isolated': 'alone',
|
238 |
+
'jaunty': 'happy',
|
239 |
+
'jocular': 'happy',
|
240 |
+
'jolly': 'happy',
|
241 |
+
'jovial': 'happy',
|
242 |
+
'joyful': 'happy',
|
243 |
+
'joyless': 'sad',
|
244 |
+
'joyous': 'happy',
|
245 |
+
'jubilant': 'happy',
|
246 |
+
'justified': 'singled out',
|
247 |
+
'keen': 'attracted',
|
248 |
+
'labeled': 'singled out',
|
249 |
+
'lackadaisical': 'bored',
|
250 |
+
'lazy': 'apathetic',
|
251 |
+
'left out': 'hated',
|
252 |
+
'let down': 'hated',
|
253 |
+
'lethargic': 'apathetic',
|
254 |
+
'lied to': 'cheated',
|
255 |
+
'lighthearted': 'happy',
|
256 |
+
'liked': 'attached',
|
257 |
+
'lively': 'happy',
|
258 |
+
'livid': 'angry',
|
259 |
+
'lonely': 'alone',
|
260 |
+
'lonesome': 'alone',
|
261 |
+
'lost': 'lost',
|
262 |
+
'loved': 'attached',
|
263 |
+
'low': 'sad',
|
264 |
+
'lucky': 'happy',
|
265 |
+
'lugubrious': 'sad',
|
266 |
+
'macho': 'independent',
|
267 |
+
'mad': 'angry',
|
268 |
+
'melancholy': 'sad',
|
269 |
+
'menaced': 'fearful',
|
270 |
+
'merry': 'happy',
|
271 |
+
'mirthful': 'happy',
|
272 |
+
'misgiving': 'fearful',
|
273 |
+
'misunderstood': 'alone',
|
274 |
+
'moody': 'sad',
|
275 |
+
'moping': 'sad',
|
276 |
+
'motivated': 'attracted',
|
277 |
+
'mournful': 'sad',
|
278 |
+
'needed': 'attracted',
|
279 |
+
'needy': 'codependent',
|
280 |
+
'nervous': 'fearful',
|
281 |
+
'obligated': 'powerless',
|
282 |
+
'obsessed': 'obsessed',
|
283 |
+
'offended': 'angry',
|
284 |
+
'oppressed': 'sad',
|
285 |
+
'optionless': 'entitled',
|
286 |
+
'ordinary': 'average',
|
287 |
+
'organized': 'adequate',
|
288 |
+
'out of control': 'powerless',
|
289 |
+
'out of sorts': 'sad',
|
290 |
+
'outmaneuvered': 'entitled',
|
291 |
+
'outraged': 'angry',
|
292 |
+
'overjoyed': 'happy',
|
293 |
+
'overlooked': 'hated',
|
294 |
+
'overwhelmed': 'powerless',
|
295 |
+
'panicked': 'fearful',
|
296 |
+
'passionate': 'lustful',
|
297 |
+
'passive': 'apathetic',
|
298 |
+
'pathetic': 'sad',
|
299 |
+
'peaceful': 'safe',
|
300 |
+
'pensive': 'anxious',
|
301 |
+
'perplexed': 'anxious',
|
302 |
+
'phobic': 'fearful',
|
303 |
+
'playful': 'happy',
|
304 |
+
'pleased': 'happy',
|
305 |
+
'powerless': 'powerless',
|
306 |
+
'pressured': 'burdened',
|
307 |
+
'privileged': 'entitled',
|
308 |
+
'proud': 'happy',
|
309 |
+
'provoked': 'angry',
|
310 |
+
'punished': 'hated',
|
311 |
+
'put upon': 'burdened',
|
312 |
+
'quaking': 'fearful',
|
313 |
+
'quiescent': 'apathetic',
|
314 |
+
'rageful': 'angry',
|
315 |
+
'rapturous': 'happy',
|
316 |
+
'rated': 'singled out',
|
317 |
+
'reassured': 'fearless',
|
318 |
+
'reckless': 'powerless',
|
319 |
+
'redeemed': 'singled out',
|
320 |
+
'regretful': 'sad',
|
321 |
+
'rejected': 'alone',
|
322 |
+
'released': 'free',
|
323 |
+
'remorse': 'sad',
|
324 |
+
'replaced': 'hated',
|
325 |
+
'repulsed': 'demoralized',
|
326 |
+
'resentful': 'angry',
|
327 |
+
'resolute': 'fearless',
|
328 |
+
'respected': 'esteemed',
|
329 |
+
'responsible': 'adequate',
|
330 |
+
'restful': 'fearful',
|
331 |
+
'revered': 'esteemed',
|
332 |
+
'rueful': 'sad',
|
333 |
+
'sad': 'sad',
|
334 |
+
'satisfied': 'happy',
|
335 |
+
'saucy': 'happy',
|
336 |
+
'scared': 'fearful',
|
337 |
+
'secure': 'fearless',
|
338 |
+
'self-reliant': 'fearless',
|
339 |
+
'serene': 'happy',
|
340 |
+
'shaky': 'fearful',
|
341 |
+
'shamed': 'sad',
|
342 |
+
'shocked': 'surprise',
|
343 |
+
'significant': 'esteemed',
|
344 |
+
'singled out': 'singled out',
|
345 |
+
'skeptical': 'anxious',
|
346 |
+
'snoopy': 'attracted',
|
347 |
+
'somber': 'sad',
|
348 |
+
'sparkling': 'happy',
|
349 |
+
'spirited': 'happy',
|
350 |
+
'spiritless': 'sad',
|
351 |
+
'sprightly': 'happy',
|
352 |
+
'startled': 'surprise',
|
353 |
+
'stereotyped': 'singled out',
|
354 |
+
'stifled': 'powerless',
|
355 |
+
'stout hearted': 'fearless',
|
356 |
+
'strong': 'independent',
|
357 |
+
'suffering': 'sad',
|
358 |
+
'sulky': 'sad',
|
359 |
+
'sullen': 'angry',
|
360 |
+
'sunny': 'happy',
|
361 |
+
'surprised': 'surprise',
|
362 |
+
'suspicious': 'anxious',
|
363 |
+
'sympathetic': 'codependent',
|
364 |
+
'tense': 'anxious',
|
365 |
+
'terrified': 'fearful',
|
366 |
+
'terrorized': 'fearful',
|
367 |
+
'thankful': 'happy',
|
368 |
+
'threatened': 'fearful',
|
369 |
+
'thwarted': 'powerless',
|
370 |
+
'timid': 'fearful',
|
371 |
+
'timorous': 'fearful',
|
372 |
+
'torn': 'derailed',
|
373 |
+
'tortured': 'sad',
|
374 |
+
'tragic': 'sad',
|
375 |
+
'tranquil': 'happy',
|
376 |
+
'transported': 'happy',
|
377 |
+
'trapped': 'entitled',
|
378 |
+
'tremulous': 'fearful',
|
379 |
+
'tricked': 'entitled',
|
380 |
+
'turned on': 'lustful',
|
381 |
+
'unapproved of': 'hated',
|
382 |
+
'unbelieving': 'anxious',
|
383 |
+
'uncertain': 'anxious',
|
384 |
+
'unconcerned': 'apathetic',
|
385 |
+
'understood': 'attached',
|
386 |
+
'unfocussed': 'lost',
|
387 |
+
'unlovable': 'hated',
|
388 |
+
'unloved': 'hated',
|
389 |
+
'unmotivated': 'apathetic',
|
390 |
+
'unshackled': 'free',
|
391 |
+
'unsupported': 'belittled',
|
392 |
+
'up in arms': 'angry',
|
393 |
+
'upset': 'fearful',
|
394 |
+
'validated': 'loved',
|
395 |
+
'valued': 'esteemed',
|
396 |
+
'victimized': 'sad',
|
397 |
+
'violated': 'cheated',
|
398 |
+
'virulent': 'angry',
|
399 |
+
'vivacious': 'happy',
|
400 |
+
'vulnerable': 'powerless',
|
401 |
+
'wavering': 'anxious',
|
402 |
+
'weak': 'powerless',
|
403 |
+
'welcomed': 'loved',
|
404 |
+
'woebegone': 'sad',
|
405 |
+
'woeful': 'sad',
|
406 |
+
'worn down': 'powerless',
|
407 |
+
'worn out': 'powerless',
|
408 |
+
'worried': 'fearful',
|
409 |
+
'worshiped': 'esteemed',
|
410 |
+
'wrathful': 'angry',
|
411 |
+
'wronged': 'singled out',
|
412 |
+
'wrought up': 'angry',
|
413 |
+
'yearning': 'lustful',
|
414 |
+
'zealous': 'attracted',
|
415 |
+
'abandoned': 'hated',
|
416 |
+
'absolved': 'singled out',
|
417 |
+
'absorbed': 'attracted',
|
418 |
+
'abused': 'powerless',
|
419 |
+
'accepted': 'loved',
|
420 |
+
'aching': 'sad',
|
421 |
+
'acrimonious': 'angry',
|
422 |
+
'addicted': 'codependent',
|
423 |
+
'adequate': 'adequate',
|
424 |
+
'admired': 'esteemed',
|
425 |
+
'affectionate': 'attached',
|
426 |
+
'affronted': 'singled out',
|
427 |
+
'afraid': 'fearful',
|
428 |
+
'airy': 'happy',
|
429 |
+
'alone': 'alone',
|
430 |
+
'ambivalent': 'bored',
|
431 |
+
'apathetic': 'apathetic',
|
432 |
+
'apprehensive': 'anxious',
|
433 |
+
'arrogant': 'independent',
|
434 |
+
'ashamed': 'embarrassed',
|
435 |
+
'astonished': 'surprise',
|
436 |
+
'at ease': 'safe',
|
437 |
+
'attacked': 'fearful',
|
438 |
+
'audacious': 'fearless',
|
439 |
+
'autonomous': 'free',
|
440 |
+
'average': 'average',
|
441 |
+
'avid': 'attracted',
|
442 |
+
'baffled': 'lost',
|
443 |
+
'bashful': 'powerless',
|
444 |
+
'belittled': 'belittled',
|
445 |
+
'buoyant': 'happy',
|
446 |
+
'burdened': 'burdened',
|
447 |
+
'clouded': 'sad',
|
448 |
+
'committed': 'focused',
|
449 |
+
'compassionate': 'attached',
|
450 |
+
'compelled': 'obsessed',
|
451 |
+
'dauntless': 'fearless',
|
452 |
+
'debonair': 'happy',
|
453 |
+
'deceived': 'entitled',
|
454 |
+
'delighted': 'ecstatic',
|
455 |
+
'demoralized': 'demoralized',
|
456 |
+
'derailed': 'derailed',
|
457 |
+
'desirous': 'attracted',
|
458 |
+
'despairing': 'sad',
|
459 |
+
'devastated': 'angry',
|
460 |
+
'diffident': 'fearful',
|
461 |
+
'discredited': 'belittled',
|
462 |
+
'disheartened': 'sad',
|
463 |
+
'disinclined': 'demoralized',
|
464 |
+
'disorganized': 'powerless',
|
465 |
+
'downcast': 'sad',
|
466 |
+
'entitled': 'entitled',
|
467 |
+
'excited': 'adequate',
|
468 |
+
'exultant': 'happy',
|
469 |
+
'fidgety': 'fearful',
|
470 |
+
'frowning': 'sad',
|
471 |
+
'full of misgiving': 'anxious',
|
472 |
+
'great': 'happy',
|
473 |
+
'hapless': 'sad',
|
474 |
+
'hated': 'hated',
|
475 |
+
'heroic': 'fearless',
|
476 |
+
'hostile': 'angry',
|
477 |
+
'in despair': 'sad',
|
478 |
+
'indifferent': 'bored',
|
479 |
+
'infuriated': 'angry',
|
480 |
+
'insecure': 'fearful',
|
481 |
+
'inspired': 'happy',
|
482 |
+
'inspiring': 'attracted',
|
483 |
+
'judged': 'singled out',
|
484 |
+
'justified': 'singled out',
|
485 |
+
'laughting': 'happy',
|
486 |
+
'loved': 'loved',
|
487 |
+
'loving': 'attached',
|
488 |
+
'low': 'sad',
|
489 |
+
'lustful': 'lustful',
|
490 |
+
'manipulated': 'cheated',
|
491 |
+
'mumpish': 'sad',
|
492 |
+
'nosey': 'attracted',
|
493 |
+
'numb': 'apathetic',
|
494 |
+
'obliterated': 'powerless',
|
495 |
+
'peaceful': 'happy',
|
496 |
+
'petrified': 'fearful',
|
497 |
+
'piqued': 'angry',
|
498 |
+
'piteous': 'sad',
|
499 |
+
'powerless': 'powerless',
|
500 |
+
'questioning': 'anxious',
|
501 |
+
'rejected': 'hated',
|
502 |
+
'self-satisfied': 'happy',
|
503 |
+
'set up': 'entitled',
|
504 |
+
'shut out': 'alone',
|
505 |
+
'sorrowful': 'sad',
|
506 |
+
'spirited': 'sad',
|
507 |
+
'supported': 'esteemed',
|
508 |
+
'suspicious': 'fearful',
|
509 |
+
'terrific': 'happy',
|
510 |
+
'trapped': 'entitled',
|
511 |
+
'trembling': 'fearful',
|
512 |
+
'uncomfortable': 'anxious',
|
513 |
+
'underestimated': 'belittled',
|
514 |
+
'unhappy': 'sad',
|
515 |
+
'vindicated': 'singled out',
|
516 |
+
'worked up': 'angry',
|
517 |
+
'airborne': 'excited',
|
518 |
+
'grass': 'happy',
|
519 |
+
'mountain': 'calm',
|
520 |
+
'dog': 'happy',
|
521 |
+
'umbrella': 'sad',
|
522 |
+
'train': 'sorrow',
|
523 |
+
'lightning': 'ominous',
|
524 |
+
'rocket': 'energetic',
|
525 |
+
'elevator': 'relaxed',
|
526 |
+
'slides': 'happy',
|
527 |
+
'mountains': 'relaxed',
|
528 |
+
'dog': 'excited',
|
529 |
+
'trees': 'relaxed',
|
530 |
+
'people': 'happy',
|
531 |
+
'old': 'sad',
|
532 |
+
'men': 'happy',
|
533 |
+
'women': 'happy',
|
534 |
+
'humans': 'happy',
|
535 |
+
'persons': 'happy',
|
536 |
+
'person': 'happy'
|
utils/gradio_helper.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from .audio_palette import AudioPalette
|
4 |
+
|
5 |
+
def single_image_interface(model: AudioPalette):
|
6 |
+
demo = gr.Interface(
|
7 |
+
fn=model.generate_single,
|
8 |
+
inputs=[
|
9 |
+
gr.Image(
|
10 |
+
type="pil",
|
11 |
+
label="Upload an image",
|
12 |
+
show_label=True,
|
13 |
+
container=True
|
14 |
+
),
|
15 |
+
gr.Radio(
|
16 |
+
choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
|
17 |
+
label="Instrument",
|
18 |
+
show_label=True,
|
19 |
+
container=True
|
20 |
+
),
|
21 |
+
gr.Textbox(
|
22 |
+
lines=1,
|
23 |
+
placeholder="ngrok endpoint",
|
24 |
+
label="colab endpoint",
|
25 |
+
show_label=True,
|
26 |
+
container=True,
|
27 |
+
type="text",
|
28 |
+
visible=True
|
29 |
+
)
|
30 |
+
],
|
31 |
+
outputs=[
|
32 |
+
gr.Textbox(
|
33 |
+
lines=1,
|
34 |
+
placeholder="Prompt",
|
35 |
+
label="Generated Prompt",
|
36 |
+
show_label=True,
|
37 |
+
container=True,
|
38 |
+
type="text",
|
39 |
+
visible=False
|
40 |
+
),
|
41 |
+
gr.Textbox(
|
42 |
+
lines=1,
|
43 |
+
placeholder="Pace of the image",
|
44 |
+
label="Pace",
|
45 |
+
show_label=True,
|
46 |
+
container=True,
|
47 |
+
type="text",
|
48 |
+
visible=False
|
49 |
+
),
|
50 |
+
gr.Textbox(
|
51 |
+
lines=1,
|
52 |
+
placeholder="Caption for the image",
|
53 |
+
label="Caption",
|
54 |
+
show_label=True,
|
55 |
+
container=True,
|
56 |
+
type="text",
|
57 |
+
visible=False
|
58 |
+
),
|
59 |
+
gr.Audio(
|
60 |
+
label="Generated Audio",
|
61 |
+
show_label=True,
|
62 |
+
container=True,
|
63 |
+
visible=True,
|
64 |
+
format="wav",
|
65 |
+
autoplay=False,
|
66 |
+
show_download_button=True,
|
67 |
+
)
|
68 |
+
],
|
69 |
+
cache_examples=False,
|
70 |
+
live=False,
|
71 |
+
description="Provide an image to generate an appropriate background soundtrack",
|
72 |
+
)
|
73 |
+
|
74 |
+
return demo
|
75 |
+
|
76 |
+
def multi_image_interface(model: AudioPalette):
|
77 |
+
demo = gr.Interface(
|
78 |
+
fn=model.generate_multiple,
|
79 |
+
inputs=[
|
80 |
+
gr.File(
|
81 |
+
file_count="multiple",
|
82 |
+
file_types=["image"],
|
83 |
+
type="filepath",
|
84 |
+
label="Upload images",
|
85 |
+
show_label=True,
|
86 |
+
container=True,
|
87 |
+
visible=True
|
88 |
+
),
|
89 |
+
gr.Radio(
|
90 |
+
choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
|
91 |
+
label="Instrument",
|
92 |
+
show_label=True,
|
93 |
+
container=True
|
94 |
+
),
|
95 |
+
gr.Textbox(
|
96 |
+
lines=1,
|
97 |
+
placeholder="ngrok endpoint",
|
98 |
+
label="colab endpoint",
|
99 |
+
show_label=True,
|
100 |
+
container=True,
|
101 |
+
type="text",
|
102 |
+
visible=True
|
103 |
+
)
|
104 |
+
],
|
105 |
+
outputs=[
|
106 |
+
gr.Video(
|
107 |
+
format="mp4",
|
108 |
+
label="Generated Video",
|
109 |
+
show_label=True,
|
110 |
+
container=True,
|
111 |
+
visible=True,
|
112 |
+
autoplay=False,
|
113 |
+
)
|
114 |
+
],
|
115 |
+
cache_examples=False,
|
116 |
+
live=False,
|
117 |
+
description="Provide images to generate an a slideshow of the images with appropriate music as background",
|
118 |
+
)
|
119 |
+
|
120 |
+
return demo
|