Spaces:
Runtime error
Runtime error
File size: 6,234 Bytes
83418c6 aa6ba21 83418c6 aa6ba21 83418c6 aa6ba21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import numpy as np
import tensorflow as tf
from scipy.io.wavfile import write
import keras.backend as K
import librosa.display
import cv2
import librosa
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
from keras.applications import VGG16
import os
import scipy
import gradio as gr
# Load the tune recognition model
model = tf.keras.models.load_model('embdmodel_1.hdf5')
embedding_model=model.layers[2]
DURATION = 10
WAVE_OUTPUT_FILE = "my_audio.wav"
# Define function to preprocess input audio
#convert song to mel spectogram as siamese network doesn't work on sound directly
def create_spectrogram(clip,sample_rate,save_path):
plt.interactive(False)
fig=plt.figure(figsize=[0.72,0.72])
S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
plt.close()
fig.clf()
plt.close(fig)
plt.close('all')
del save_path,clip,sample_rate,fig,S
def load_img(path):
img=cv2.imread(path)
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img=cv2.resize(img,(150,150))
return img
import pickle
with open('dict.pickle', 'rb') as handle:
songspecdict = pickle.load(handle)
def list_file_sizes():
path = "."
# Get list of all files only in the given directory
fun = lambda x : os.path.isfile(os.path.join(path,x))
files_list = filter(fun, os.listdir(path))
# Create a list of files in directory along with the size
size_of_file = [
(f,os.stat(os.path.join(path, f)).st_size)
for f in files_list
]
# Iterate over list of files along with size
# and print them one by one.
for f,s in size_of_file:
print("{} : {}MB".format(f, round(s/(1024*1024),3)))
def main(audio):
with open(WAVE_OUTPUT_FILE, "wb") as file:
file.write(audio)
list_file_sizes()
# Load the song to match
song, sr = librosa.load("my_audio.wav")
to_match = np.copy(song[0:220500])
print("Loaded data into librosa...")
# Create spectrogram image of the song to match
create_spectrogram(to_match, sr, 'test.png')
print("Created spectogram...")
# Load the spectrogram image of the song to match
to_match_img = load_img('test.png')
to_match_img = np.expand_dims(to_match_img, axis=0)
print("Loaded spectrum image...")
# Get the embedding of the song to match
to_match_emb = embedding_model.predict(to_match_img)
print("Get song embedding...")
# Calculate the distances between the song to match and the songs in the database
songsdistdict = {}
for key, values in songspecdict.items():
dist_array = []
for embd in values:
dist_array.append(np.linalg.norm(to_match_emb - embd))
songsdistdict[key] = min(dist_array)
song_titles=list(songsdistdict.keys())
distances=list(songsdistdict.values())
# Get the title and artist of the recognized song
recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-')
recognized_song_title = os.path.splitext(recognized_song_title)[0]
print(f'Artist: {recognized_song_artist}')
print(f'Title: {recognized_song_title}')
from musixmatch import Musixmatch
# Initialize Musixmatch API
musixmatch = Musixmatch(apikey='2b0d0615efa782e95598a0e99bda4a60')
# Search for the recognized song
track_search_results = musixmatch.track_search(q_track=recognized_song_title, q_artist=recognized_song_artist, page_size=1, page=1, s_track_rating='desc')
if track_search_results['message']['header']['status_code'] == 200:
# Get the track ID for the top result
track_id = track_search_results['message']['body']['track_list'][0]['track']['track_id']
# Get the lyrics for the recognized song
lyrics_result = musixmatch.track_lyrics_get(track_id=track_id)
if lyrics_result['message']['header']['status_code'] == 200:
# Get the lyrics
lyrics = lyrics_result['message']['body']['lyrics']['lyrics_body']
# Remove the annotation tags from the lyrics
lyrics = lyrics.replace('******* This Lyrics is NOT for Commercial use *******', '').strip()
print("Lyrics:\n", lyrics)
else:
print("Couldn't find lyrics for the recognized song.")
# Play the recognized song
recognized_song_file = f'https://huggingface.co/spaces/prerna9811/Chord/tree/main/seismese_net_songs/{song_titles[distances.index(min(distances))]}'
recognized_song_audio, recognized_song_sr = librosa.load(recognized_song_file)
audio_file = open(recognized_song_file, 'rb') # enter the filename with filepath
audio_bytes = audio_file.read() # reading the file
return audio_bytes
css = """
footer {display:none !important}
.output-markdown{display:none !important}
button.primary {
z-index: 14;
left: 0px;
top: 0px;
cursor: pointer !important;
background: none rgb(17, 20, 45) !important;
border: none !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: none !important;
}
button.primary:hover{
z-index: 14;
left: 0px;
top: 0px;
cursor: pointer !important;
background: none rgb(37, 56, 133) !important;
border: none !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
}
button.gallery-item:hover {
border-color: rgb(37 56 133) !important;
background-color: rgb(229,225,255) !important;
}
"""
demo = gr.Blocks()
mf_transcribe = gr.Interface(
fn=main,
inputs=gr.inputs.Audio(source="microphone", type="filepath"),
outputs="audio",
layout="horizontal",
theme="huggingface",
allow_flagging="never",
css = css
)
mf_transcribe.launch()
|