import time import gradio as gr from sentence_transformers import SentenceTransformer import httpx import json from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat #import subprocess import os import uuid from tempfile import gettempdir from PIL import Image import cv2 from pprint import pprint minilm = SentenceTransformer('all-MiniLM-L6-v2') mubert_tags_embeddings = get_mubert_tags_embeddings(minilm) # image_to_text = gr.Interface.load("spaces/doevent/image_to_text", api_key=os.environ['HF_TOKEN']) image_to_text = gr.Blocks.load(name="spaces/banana-dev/demo-clip-interrogator") def center_crop(img, dim: tuple = (512, 512)): """Returns center cropped image Args: img: image to be center cropped dim: dimensions (width, height) to be cropped """ width, height = img.shape[1], img.shape[0] # process crop width and height for max available dimension crop_width = dim[0] if dim[0] 3501 or ratio_height > 3501: raise gr.Error("Image aspect ratio must not exceed width: 1024 px or height: 1024 px.") elif ratio_width > 3500 or ratio_height > 3500: image_g = cv2.imread(image) scale_img = scale_image(image_g, factor=0.2) cv2.imwrite(filepath_png, scale_img) elif ratio_width > 1800 or ratio_height > 1800: image_g = cv2.imread(image) scale_img = scale_image(image_g, factor=0.3) cv2.imwrite(filepath_png, scale_img) elif ratio_width > 900 or ratio_height > 900: image_g = cv2.imread(image) scale_img = scale_image(image_g, factor=0.5) cv2.imwrite(filepath_png, scale_img) # prompt = image_to_text(filepath_png, "Image Captioning", "", "Nucleus sampling") prompt = image_to_text(filepath_png, "ViT-L (best for Stable Diffusion 1.*)", "Fast", fn_index=1)[0] print(f"PROMPT: {prompt}") pat = get_pat(email) _, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0] filepath = get_track_by_tags(tags, pat, int(duration), loop=loop) filename_mp3 = filepath.split("/")[-1] filepath_mp3 = f"{gettempdir()}/{filename_mp3}" filename_mp4 = f"{uuid.uuid4().hex}.mp4" filepath_mp4 = f"{gettempdir()}/{filename_mp4}" os.system(f"wget {filepath} -P {gettempdir()}") # waveform with Image.open(filepath_png) as im: width = im.size[0] height = im.size[1] print(f"{width}x{height}") command = f'ffmpeg -hide_banner -loglevel warning -y -i {filepath_mp3} -loop 1 -i {filepath_png} -filter_complex "[0:a]showwaves=s={width}x{height}:colors=0xffffff:mode=cline,format=rgba[v];[1:v][v]overlay[outv]" -map "[outv]" -map 0:a -c:v libx264 -r 15 -c:a copy -pix_fmt yuv420p -shortest {filepath_mp4}' os.system(command) os.remove(filepath_png) os.remove(filepath_mp3) return filepath_mp4, filepath, prompt, tags except Exception as e: raise gr.Error(str(e)) iface = gr.Interface(fn=generate_track_by_prompt, inputs=[gr.Image(type="filepath"), "text", gr.Slider(label="duration (seconds)", value=30, minimum=10, maximum=60)], outputs=[gr.Video(label="Video"), gr.Audio(label="Audio"), gr.Text(label="Prompt"), gr.Text(label="Tags")]) iface.queue().launch()