Surn commited on
Commit
5d66b58
1 Parent(s): 0ec9f7b

Add Text and description to video

Browse files
Files changed (3) hide show
  1. app.py +15 -6
  2. audiocraft/utils/extend.py +86 -1
  3. requirements.txt +1 -0
app.py CHANGED
@@ -13,7 +13,7 @@ import gradio as gr
13
  import os
14
  from audiocraft.models import MusicGen
15
  from audiocraft.data.audio import audio_write
16
- from audiocraft.utils.extend import generate_music_segments
17
  import numpy as np
18
 
19
  MODEL = None
@@ -25,7 +25,7 @@ def load_model(version):
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
- def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
29
  global MODEL
30
  output_segments = None
31
  topk = int(topk)
@@ -75,6 +75,10 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
75
  else:
76
  output = output.detach().cpu().float()[0]
77
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
 
 
 
 
78
  audio_write(
79
  file.name, output, MODEL.sample_rate, strategy="loudness",
80
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
@@ -102,12 +106,17 @@ def ui(**kwargs):
102
  with gr.Row():
103
  with gr.Column():
104
  with gr.Row():
105
- text = gr.Text(label="Input Text", interactive=True)
106
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
107
  with gr.Row():
108
  submit = gr.Button("Submit")
109
  with gr.Row():
110
  background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
 
 
 
 
 
111
  with gr.Row():
112
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
113
  with gr.Row():
@@ -116,11 +125,11 @@ def ui(**kwargs):
116
  with gr.Row():
117
  topk = gr.Number(label="Top-k", value=250, interactive=True)
118
  topp = gr.Number(label="Top-p", value=0, interactive=True)
119
- temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
120
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
121
  with gr.Column():
122
  output = gr.Video(label="Generated Music")
123
- submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
124
  gr.Examples(
125
  fn=predict,
126
  examples=[
 
13
  import os
14
  from audiocraft.models import MusicGen
15
  from audiocraft.data.audio import audio_write
16
+ from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, sanitize_file_name
17
  import numpy as np
18
 
19
  MODEL = None
 
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
+ def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color):
29
  global MODEL
30
  output_segments = None
31
  topk = int(topk)
 
75
  else:
76
  output = output.detach().cpu().float()[0]
77
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
78
+ if include_settings:
79
+ video_description = f"{text}\n Duration: {str(duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef}"
80
+ background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
81
+ #filename = sanitize_file_name(title) if title != "" else file.name
82
  audio_write(
83
  file.name, output, MODEL.sample_rate, strategy="loudness",
84
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
 
106
  with gr.Row():
107
  with gr.Column():
108
  with gr.Row():
109
+ text = gr.Text(label="Input Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
110
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
111
  with gr.Row():
112
  submit = gr.Button("Submit")
113
  with gr.Row():
114
  background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
115
+ include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
116
+ with gr.Row():
117
+ title = gr.Textbox(label="Title", value="MusicGen", interactive=True)
118
+ settings_font = gr.Text(label="Settings Font", value="arial.ttf", interactive=True)
119
+ settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#ffffff", interactive=True)
120
  with gr.Row():
121
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
122
  with gr.Row():
 
125
  with gr.Row():
126
  topk = gr.Number(label="Top-k", value=250, interactive=True)
127
  topp = gr.Number(label="Top-p", value=0, interactive=True)
128
+ temperature = gr.Number(label="Randomness Temperature", value=1.0, precision=2, interactive=True)
129
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, precision=2, interactive=True)
130
  with gr.Column():
131
  output = gr.Video(label="Generated Music")
132
+ submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color], outputs=[output])
133
  gr.Examples(
134
  fn=predict,
135
  examples=[
audiocraft/utils/extend.py CHANGED
@@ -2,7 +2,11 @@ import torch
2
  import math
3
  from audiocraft.models import MusicGen
4
  import numpy as np
5
-
 
 
 
 
6
 
7
  def separate_audio_segments(audio, segment_duration=30):
8
  sr, audio_data = audio[0], audio[1]
@@ -106,6 +110,87 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
106
 
107
  # return output_segments
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
 
111
 
 
2
  import math
3
  from audiocraft.models import MusicGen
4
  import numpy as np
5
+ from PIL import Image, ImageDraw, ImageFont, ImageColor
6
+ import string
7
+ import tempfile
8
+ import os
9
+ import textwrap
10
 
11
  def separate_audio_segments(audio, segment_duration=30):
12
  sr, audio_data = audio[0], audio[1]
 
110
 
111
  # return output_segments
112
 
113
+ def save_image(image):
114
+ """
115
+ Saves a PIL image to a temporary file and returns the file path.
116
+
117
+ Parameters:
118
+ - image: PIL.Image
119
+ The PIL image object to be saved.
120
+
121
+ Returns:
122
+ - str or None: The file path where the image was saved,
123
+ or None if there was an error saving the image.
124
+
125
+ """
126
+ temp_dir = tempfile.gettempdir()
127
+ temp_file = tempfile.NamedTemporaryFile(suffix=".png", dir=temp_dir, delete=False)
128
+ temp_file.close()
129
+ file_path = temp_file.name
130
+
131
+ try:
132
+ image.save(file_path)
133
+
134
+ except Exception as e:
135
+ print("Unable to save image:", str(e))
136
+ return None
137
+ finally:
138
+ return file_path
139
+
140
+ def hex_to_rgba(hex_color):
141
+ try:
142
+ # Convert hex color to RGBA tuple
143
+ rgba = ImageColor.getcolor(hex_color, "RGBA")
144
+ except ValueError:
145
+ # If the hex color is invalid, default to yellow
146
+ rgba = (255,255,0,255)
147
+ return rgba
148
+
149
+ def add_settings_to_image(title: str = "title", description: str = "", width: int = 768, height: int = 512, background_path: str = "", font: str = "arial.ttf", font_color: str = "#ffffff"):
150
+ # Create a new RGBA image with the specified dimensions
151
+ image = Image.new("RGBA", (width, height), (255, 255, 255, 0))
152
+ # If a background image is specified, open it and paste it onto the image
153
+ if background_path == "":
154
+ background = Image.new("RGBA", (width, height), (255, 255, 255, 255))
155
+ else:
156
+ background = Image.open(background_path).convert("RGBA")
157
+
158
+ #Convert font color to RGBA tuple
159
+ font_color = hex_to_rgba(font_color)
160
+
161
+ # Calculate the center coordinates for placing the text
162
+ text_x = width // 2
163
+ text_y = height // 2
164
+ # Draw the title text at the center top
165
+ title_font = ImageFont.truetype(font, 26) # Replace with your desired font and size
166
+ title_text = '\n'.join(textwrap.wrap(title, width // 12))
167
+ title_x, title_y, title_text_width, title_text_height = title_font.getbbox(title_text)
168
+ title_x = max(text_x - (title_text_width // 2), title_x, 0)
169
+ title_y = text_y - (height // 2) + 10 # 10 pixels padding from the top
170
+ title_draw = ImageDraw.Draw(image)
171
+ title_draw.multiline_text((title_x, title_y), title, fill=font_color, font=title_font, align="center")
172
+ # Draw the description text two lines below the title
173
+ description_font = ImageFont.truetype(font, 16) # Replace with your desired font and size
174
+ description_text = '\n'.join(textwrap.wrap(description, width // 12))
175
+ description_x, description_y, description_text_width, description_text_height = description_font.getbbox(description_text)
176
+ description_x = max(text_x - (description_text_width // 2), description_x, 0)
177
+ description_y = title_y + title_text_height + 20 # 20 pixels spacing between title and description
178
+ description_draw = ImageDraw.Draw(image)
179
+ description_draw.multiline_text((description_x, description_y), description_text, fill=font_color, font=description_font, align="center")
180
+ # Calculate the offset to center the image on the background
181
+ bg_w, bg_h = background.size
182
+ offset = ((bg_w - width) // 2, (bg_h - height) // 2)
183
+ # Paste the image onto the background
184
+ background.paste(image, offset, mask=image)
185
+
186
+ # Save the image and return the file path
187
+ return save_image(background)
188
+
189
+
190
+ def sanitize_file_name(filename):
191
+ valid_chars = "-_.() " + string.ascii_letters + string.digits
192
+ sanitized_filename = ''.join(c for c in filename if c in valid_chars)
193
+ return sanitized_filename
194
 
195
 
196
 
requirements.txt CHANGED
@@ -18,3 +18,4 @@ xformers
18
  demucs
19
  librosa
20
  gradio
 
 
18
  demucs
19
  librosa
20
  gradio
21
+ textwrap