UnlimitedMusicGen

Running

App Files Files Community

Surn commited on Jun 12, 2023

Commit

5d66b58

1 Parent(s): 0ec9f7b

Add Text and description to video

Browse files

Files changed (3) hide show

app.py +15 -6
audiocraft/utils/extend.py +86 -1
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import gradio as gr
 import os
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
-from audiocraft.utils.extend import generate_music_segments
 import numpy as np
 MODEL = None
@@ -25,7 +25,7 @@ def load_model(version):
     return MusicGen.get_pretrained(version)
-def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
     global MODEL
     output_segments = None
     topk = int(topk)
@@ -75,6 +75,10 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
     else:
         output = output.detach().cpu().float()[0]
     with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
         audio_write(
             file.name, output, MODEL.sample_rate, strategy="loudness",
             loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
@@ -102,12 +106,17 @@ def ui(**kwargs):
         with gr.Row():
             with gr.Column():
                 with gr.Row():
-                    text = gr.Text(label="Input Text", interactive=True)
                     melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
                 with gr.Row():
                     submit = gr.Button("Submit")
                 with gr.Row():
                     background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
                 with gr.Row():
                     model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
                 with gr.Row():
@@ -116,11 +125,11 @@ def ui(**kwargs):
                 with gr.Row():
                     topk = gr.Number(label="Top-k", value=250, interactive=True)
                     topp = gr.Number(label="Top-p", value=0, interactive=True)
-                    temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
-                    cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
             with gr.Column():
                 output = gr.Video(label="Generated Music")
-        submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
         gr.Examples(
             fn=predict,
             examples=[

 import os
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
+from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, sanitize_file_name
 import numpy as np
 MODEL = None
     return MusicGen.get_pretrained(version)
+def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color):
     global MODEL
     output_segments = None
     topk = int(topk)
     else:
         output = output.detach().cpu().float()[0]
     with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+        if include_settings:
+            video_description = f"{text}\n Duration: {str(duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef}"
+            background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
+        #filename = sanitize_file_name(title) if title != "" else file.name
         audio_write(
             file.name, output, MODEL.sample_rate, strategy="loudness",
             loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
         with gr.Row():
             with gr.Column():
                 with gr.Row():
+                    text = gr.Text(label="Input Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
                     melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
                 with gr.Row():
                     submit = gr.Button("Submit")
                 with gr.Row():
                     background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
+                    include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
+                with gr.Row():
+                    title = gr.Textbox(label="Title", value="MusicGen", interactive=True)
+                    settings_font = gr.Text(label="Settings Font", value="arial.ttf", interactive=True)
+                    settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#ffffff", interactive=True)
                 with gr.Row():
                     model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
                 with gr.Row():
                 with gr.Row():
                     topk = gr.Number(label="Top-k", value=250, interactive=True)
                     topp = gr.Number(label="Top-p", value=0, interactive=True)
+                    temperature = gr.Number(label="Randomness Temperature", value=1.0, precision=2, interactive=True)
+                    cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, precision=2, interactive=True)
             with gr.Column():
                 output = gr.Video(label="Generated Music")
+        submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color], outputs=[output])
         gr.Examples(
             fn=predict,
             examples=[

audiocraft/utils/extend.py CHANGED Viewed

@@ -2,7 +2,11 @@ import torch
 import math
 from audiocraft.models import MusicGen
 import numpy as np
 def separate_audio_segments(audio, segment_duration=30):
     sr, audio_data = audio[0], audio[1]
@@ -106,6 +110,87 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
 #    return output_segments

 import math
 from audiocraft.models import MusicGen
 import numpy as np
+from PIL import Image, ImageDraw, ImageFont, ImageColor
+import string
+import tempfile
+import os
+import textwrap
 def separate_audio_segments(audio, segment_duration=30):
     sr, audio_data = audio[0], audio[1]
 #    return output_segments
+def save_image(image):
+    """
+    Saves a PIL image to a temporary file and returns the file path.
+    Parameters:
+    - image: PIL.Image
+        The PIL image object to be saved.
+    Returns:
+    - str or None: The file path where the image was saved,
+        or None if there was an error saving the image.
+    """
+    temp_dir = tempfile.gettempdir()
+    temp_file = tempfile.NamedTemporaryFile(suffix=".png", dir=temp_dir, delete=False)
+    temp_file.close()
+    file_path = temp_file.name
+    try:
+        image.save(file_path)
+    except Exception as e:
+        print("Unable to save image:", str(e))
+        return None
+    finally:
+        return file_path
+def hex_to_rgba(hex_color):
+    try:
+        # Convert hex color to RGBA tuple
+        rgba = ImageColor.getcolor(hex_color, "RGBA")
+    except ValueError:
+        # If the hex color is invalid, default to yellow
+        rgba = (255,255,0,255)
+    return rgba
+def add_settings_to_image(title: str = "title", description: str = "", width: int = 768, height: int = 512, background_path: str = "", font: str = "arial.ttf", font_color: str = "#ffffff"):
+    # Create a new RGBA image with the specified dimensions
+    image = Image.new("RGBA", (width, height), (255, 255, 255, 0))
+    # If a background image is specified, open it and paste it onto the image
+    if background_path == "":
+        background = Image.new("RGBA", (width, height), (255, 255, 255, 255))
+    else:
+        background = Image.open(background_path).convert("RGBA")
+    #Convert font color to RGBA tuple
+    font_color = hex_to_rgba(font_color)
+    # Calculate the center coordinates for placing the text
+    text_x = width // 2
+    text_y = height // 2
+    # Draw the title text at the center top
+    title_font = ImageFont.truetype(font, 26)  # Replace with your desired font and size
+    title_text = '\n'.join(textwrap.wrap(title, width // 12))
+    title_x, title_y, title_text_width, title_text_height = title_font.getbbox(title_text)
+    title_x = max(text_x - (title_text_width // 2), title_x, 0)
+    title_y = text_y - (height // 2) + 10  # 10 pixels padding from the top
+    title_draw = ImageDraw.Draw(image)
+    title_draw.multiline_text((title_x, title_y), title, fill=font_color, font=title_font, align="center")
+    # Draw the description text two lines below the title
+    description_font = ImageFont.truetype(font, 16)  # Replace with your desired font and size
+    description_text = '\n'.join(textwrap.wrap(description, width // 12))
+    description_x, description_y, description_text_width, description_text_height = description_font.getbbox(description_text)
+    description_x = max(text_x - (description_text_width // 2), description_x, 0)
+    description_y = title_y + title_text_height + 20  # 20 pixels spacing between title and description
+    description_draw = ImageDraw.Draw(image)
+    description_draw.multiline_text((description_x, description_y), description_text, fill=font_color, font=description_font, align="center")
+    # Calculate the offset to center the image on the background
+    bg_w, bg_h = background.size
+    offset = ((bg_w - width) // 2, (bg_h - height) // 2)
+    # Paste the image onto the background
+    background.paste(image, offset, mask=image)
+    # Save the image and return the file path
+    return save_image(background)
+def sanitize_file_name(filename):
+    valid_chars = "-_.() " + string.ascii_letters + string.digits
+    sanitized_filename = ''.join(c for c in filename if c in valid_chars)
+    return sanitized_filename

requirements.txt CHANGED Viewed

@@ -18,3 +18,4 @@ xformers
 demucs
 librosa
 gradio

 demucs
 librosa
 gradio
+textwrap