import time import json import os import openl3 import librosa import pickle import gradio as gr import numpy as np import faiss import yt_dlp embed_html1 = '' # NO GPU os.environ['CUDA_VISIBLE_DEVICES'] = '-1' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Cacher le nom du repo python_path = hf_hub_download(repo_id='PierreHanna/TextRetrieval', repo_type="space", filename='models.py', use_auth_token='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') #python_path = hf_hub_download(repo_id=os.environ['REPO_ID'], repo_type="space", filename=os.environ['MODEL_FILE'], # use_auth_token=os.environ['TOKEN']) print(python_path) sys.path.append(os.environ['PRIVATE_DIR']) from models import * def download_audio_(link): with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video: info_dict = video.extract_info(link, download = True) video_title = info_dict['title'] video.download(link) return video_title def download_audio(id_video): id = id_video.split("?v=")[-1][:11] audio_file = download_audio_(id_video) audio_file = audio_file+'.mp3' embed_html_all = embed_html1 + id +embed_html2 return audio_file, audio_file, embed_html_all def process_url(input_path): # setup the client audio_file, audio_file, embed_html_all = download_audio(input_path) return process(audio_file, embed_html_all) def process_file(input_path): return process(input_path, '') def process(audio_file, embed_html_all): model = openl3.models.load_audio_embedding_model(input_repr="linear", content_type="music",embedding_size=512) audio, sr = librosa.load(audio_file, mono=True) emb, ts = openl3.get_audio_embedding(audio, sr, model=model) emb = np.mean(emb, axis=0) print("SHAPE ", emb.shape) ind = faiss.read_index("index.index") ind_filenames = pickle.load(open('index.filenames','rb')) dict_bmg = pickle.load(open('dict_bmg.pickle','rb')) # filename to url D, I = ind.search(emb.reshape((1,512)), 5) top1 = dict_bmg[ind_filenames[I[0][0]]] top2 = dict_bmg[ind_filenames[I[0][1]]] top3 = dict_bmg[ind_filenames[I[0][2]]] top4 = dict_bmg[ind_filenames[I[0][3]]] top5 = dict_bmg[ind_filenames[I[0][4]]] return top1, top2, top3, top4, top5 with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): #gr.HTML(embed_html) html = gr.HTML() with gr.Row(): with gr.Column(): audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL') analyze_url_btn = gr.Button('Search from URL') with gr.Row(): with gr.Column(): audio_input_file = gr.Audio(type="filepath", label='Audio Input') analyze_file_btn = gr.Button('Search from file') with gr.Row(): with gr.Column(): ''' gr.HTML("