Spaces:

Shad0ws
/

imagetomusic

Runtime error

App Files Files Community

Shad0ws

doevent commited on Dec 12, 2022

Commit

e1b51d8

•

0 Parent(s):

Duplicate from doevent/msk

Browse files

Co-authored-by: Max Skobeev <doevent@users.noreply.huggingface.co>

Files changed (6) hide show

.gitattributes +33 -0
README.md +14 -0
app.py +145 -0
constants.py +7 -0
requirements.txt +3 -0
utils.py +50 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,33 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Img to Music Video
+emoji: ⚡
+colorFrom: red
+colorTo: green
+sdk: gradio
+sdk_version: 3.10.1
+app_file: app.py
+pinned: false
+license: unknown
+duplicated_from: doevent/msk
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import time
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import httpx
+import json
+from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat
+#import subprocess
+import os
+import uuid
+from tempfile import gettempdir
+from PIL import Image
+import cv2
+from pprint import pprint
+minilm = SentenceTransformer('all-MiniLM-L6-v2')
+mubert_tags_embeddings = get_mubert_tags_embeddings(minilm)
+# image_to_text = gr.Interface.load("spaces/doevent/image_to_text", api_key=os.environ['HF_TOKEN'])
+image_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
+def center_crop(img, dim: tuple = (512, 512)):
+    """Returns center cropped image
+	Args:
+	img: image to be center cropped
+	dim: dimensions (width, height) to be cropped
+    """
+    width, height = img.shape[1], img.shape[0]
+    # process crop width and height for max available dimension
+    crop_width = dim[0] if dim[0]<img.shape[1] else img.shape[1]
+    crop_height = dim[1] if dim[1]<img.shape[0] else img.shape[0]
+    mid_x, mid_y = int(width/2), int(height/2)
+    cw2, ch2 = int(crop_width/2), int(crop_height/2)
+    crop_img = img[mid_y-ch2:mid_y+ch2, mid_x-cw2:mid_x+cw2]
+    return crop_img
+def scale_image(img, factor=1):
+	"""Returns resize image by scale factor.
+	This helps to retain resolution ratio while resizing.
+	Args:
+	img: image to be scaled
+	factor: scale factor to resize
+	"""
+	return cv2.resize(img,(int(img.shape[1]*factor), int(img.shape[0]*factor)))
+def get_track_by_tags(tags, pat, duration, maxit=20, loop=False):
+    if loop:
+        mode = "loop"
+    else:
+        mode = "track"
+    r = httpx.post('https://api-b2b.mubert.com/v2/RecordTrackTTM',
+                   json={
+                       "method": "RecordTrackTTM",
+                       "params": {
+                           "pat": pat,
+                           "duration": duration,
+                           "tags": tags,
+                           "mode": mode
+                       }
+                   })
+    pprint(r.text)
+    rdata = json.loads(r.text)
+    assert rdata['status'] == 1, rdata['error']['text']
+    trackurl = rdata['data']['tasks'][0]['download_link']
+    #print('Generating track ', end='')
+    for i in range(maxit):
+        r = httpx.get(trackurl)
+        if r.status_code == 200:
+            return trackurl
+        time.sleep(1)
+def generate_track_by_prompt(image, email, duration, loop=False):
+    try:
+        # Checking Image Aspect Ratio
+        filename_png = f"{uuid.uuid4().hex}.png"
+        filepath_png = f"{gettempdir()}/{filename_png}"
+        with Image.open(image) as im:
+            # image size
+            ratio_width = im.size[0]
+            ratio_height = im.size[1]
+            im.convert("RGB").save(filepath_png)
+        if ratio_width > 3501 or ratio_height > 3501:
+            raise gr.Error("Image aspect ratio must not exceed width: 1024 px or height: 1024 px.")
+        elif ratio_width > 3500 or ratio_height > 3500:
+            image_g = cv2.imread(image)
+            scale_img = scale_image(image_g, factor=0.2)
+            cv2.imwrite(filepath_png, scale_img)
+        elif ratio_width > 1800 or ratio_height > 1800:
+            image_g = cv2.imread(image)
+            scale_img = scale_image(image_g, factor=0.3)
+            cv2.imwrite(filepath_png, scale_img)
+        elif ratio_width > 900 or ratio_height > 900:
+            image_g = cv2.imread(image)
+            scale_img = scale_image(image_g, factor=0.5)
+            cv2.imwrite(filepath_png, scale_img)
+        # prompt = image_to_text(filepath_png, "Image Captioning", "", "Nucleus sampling")
+        prompt = image_to_text(filepath_png, "ViT-L (best for Stable Diffusion 1.*)", "Fast",  fn_index=1)[0]
+        print(f"PROMPT: {prompt}")
+        pat = get_pat(email)
+        _, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0]
+        filepath = get_track_by_tags(tags, pat, int(duration), loop=loop)
+        filename_mp3 = filepath.split("/")[-1]
+        filepath_mp3 = f"{gettempdir()}/{filename_mp3}"
+        filename_mp4 = f"{uuid.uuid4().hex}.mp4"
+        filepath_mp4 = f"{gettempdir()}/{filename_mp4}"
+        os.system(f"wget {filepath} -P {gettempdir()}")
+        # waveform
+        with Image.open(filepath_png) as im:
+            width = im.size[0]
+            height = im.size[1]
+        print(f"{width}x{height}")
+        command = f'ffmpeg -hide_banner -loglevel warning -y -i {filepath_mp3} -loop 1 -i {filepath_png} -filter_complex "[0:a]showwaves=s={width}x{height}:colors=0xffffff:mode=cline,format=rgba[v];[1:v][v]overlay[outv]" -map "[outv]" -map 0:a -c:v libx264 -r 15 -c:a copy -pix_fmt yuv420p -shortest {filepath_mp4}'
+        os.system(command)
+        os.remove(filepath_png)
+        os.remove(filepath_mp3)
+        return filepath_mp4, filepath, prompt, tags
+    except Exception as e:
+        raise gr.Error(str(e))
+iface = gr.Interface(fn=generate_track_by_prompt,
+                    inputs=[gr.Image(type="filepath"),
+                    "text",
+                    gr.Slider(label="duration (seconds)", value=30, minimum=10, maximum=60)],
+                    outputs=[gr.Video(label="Video"),
+                    gr.Audio(label="Audio"),
+                    gr.Text(label="Prompt"),
+                    gr.Text(label="Tags")])
+iface.queue().launch()

constants.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import numpy as np
+MUBERT_TAGS_STRING = 'tribal,action,kids,neo-classic,run 130,pumped,jazz / funk,ethnic,dubtechno,reggae,acid jazz,liquidfunk,funk,witch house,tech house,underground,artists,mystical,disco,sensorium,r&b,agender,psychedelic trance / psytrance,peaceful,run 140,piano,run 160,setting,meditation,christmas,ambient,horror,cinematic,electro house,idm,bass,minimal,underscore,drums,glitchy,beautiful,technology,tribal house,country pop,jazz & funk,documentary,space,classical,valentines,chillstep,experimental,trap,new jack swing,drama,post-rock,tense,corporate,neutral,happy,analog,funky,spiritual,sberzvuk special,chill hop,dramatic,catchy,holidays,fitness 90,optimistic,orchestra,acid techno,energizing,romantic,minimal house,breaks,hyper pop,warm up,dreamy,dark,urban,microfunk,dub,nu disco,vogue,keys,hardcore,aggressive,indie,electro funk,beauty,relaxing,trance,pop,hiphop,soft,acoustic,chillrave / ethno-house,deep techno,angry,dance,fun,dubstep,tropical,latin pop,heroic,world music,inspirational,uplifting,atmosphere,art,epic,advertising,chillout,scary,spooky,slow ballad,saxophone,summer,erotic,jazzy,energy 100,kara mar,xmas,atmospheric,indie pop,hip-hop,yoga,reggaeton,lounge,travel,running,folk,chillrave & ethno-house,detective,darkambient,chill,fantasy,minimal techno,special,night,tropical house,downtempo,lullaby,meditative,upbeat,glitch hop,fitness,neurofunk,sexual,indie rock,future pop,jazz,cyberpunk,melancholic,happy hardcore,family / kids,synths,electric guitar,comedy,psychedelic trance & psytrance,edm,psychedelic rock,calm,zen,bells,podcast,melodic house,ethnic percussion,nature,heavy,bassline,indie dance,techno,drumnbass,synth pop,vaporwave,sad,8-bit,chillgressive,deep,orchestral,futuristic,hardtechno,nostalgic,big room,sci-fi,tutorial,joyful,pads,minimal 170,drill,ethnic 108,amusing,sleepy ambient,psychill,italo disco,lofi,house,acoustic guitar,bassline house,rock,k-pop,synthwave,deep house,electronica,gabber,nightlife,sport & fitness,road trip,celebration,electro,disco house,electronic'
+MUBERT_TAGS = np.array(MUBERT_TAGS_STRING.split(','))
+MUBERT_LICENSE = "ttmmubertlicense#f0acYBenRcfeFpNT4wpYGaTQIyDI4mJGv5MfIhBFz97NXDwDNFHmMRsBSzmGsJwbTpP1A6i07AXcIeAHo5"
+MUBERT_MODE = "loop"
+MUBERT_TOKEN = "4951f6428e83172a4f39de05d5b3ab10d58560b8"

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+httpx
+sentence-transformers
+opencv-python

utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import json
+import numpy as np
+import httpx
+from constants import MUBERT_TAGS, MUBERT_LICENSE, MUBERT_MODE, MUBERT_TOKEN
+def get_mubert_tags_embeddings(w2v_model):
+    return w2v_model.encode(MUBERT_TAGS)
+def get_pat(email: str):
+    r = httpx.post('https://api-b2b.mubert.com/v2/GetServiceAccess',
+                   json={
+                       "method": "GetServiceAccess",
+                       "params": {
+                           "email": email,
+                           "license": MUBERT_LICENSE,
+                           "token": MUBERT_TOKEN,
+                           "mode": MUBERT_MODE,
+                       }
+                   })
+    rdata = json.loads(r.text)
+    assert rdata['status'] == 1, "probably incorrect e-mail"
+    pat = rdata['data']['pat']
+    return pat
+def find_similar(em, embeddings, method='cosine'):
+    scores = []
+    for ref in embeddings:
+        if method == 'cosine':
+            scores.append(1 - np.dot(ref, em) / (np.linalg.norm(ref) * np.linalg.norm(em)))
+        if method == 'norm':
+            scores.append(np.linalg.norm(ref - em))
+    return np.array(scores), np.argsort(scores)
+def get_tags_for_prompts(w2v_model, mubert_tags_embeddings, prompts, top_n=3, debug=False):
+    prompts_embeddings = w2v_model.encode(prompts)
+    ret = []
+    for i, pe in enumerate(prompts_embeddings):
+        scores, idxs = find_similar(pe, mubert_tags_embeddings)
+        top_tags = MUBERT_TAGS[idxs[:top_n]]
+        top_prob = 1 - scores[idxs[:top_n]]
+        if debug:
+            print(f"Prompt: {prompts[i]}\nTags: {', '.join(top_tags)}\nScores: {top_prob}\n\n\n")
+        ret.append((prompts[i], list(top_tags)))
+    return ret