import pickle import random import time import datetime from pytz import timezone import gradio as gr def classify_lyrics(lyric): print('=' * 70) print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) start_time = time.time() print('=' * 70) print('Req string:', lyric) print('=' * 70) print('Preparing to search...') search_query = lyric text_search_query = [at.split(chr(32)) for at in search_query.lower().split(chr(10))] tsq = [] for t in text_search_query: for tt in t: tsq.append(''.join(filter(str.isalpha, tt.lower()))) clean_text_search_query = list(dict.fromkeys(tsq)) clean_text_search_query_set = set(clean_text_search_query) random.shuffle(lyrics_set_final) texts_match_ratios = [] print('Searching titles...Please wait...') for l in lyrics_set_final: text_set = set(l[2]) word_match_count = len(clean_text_search_query_set & text_set) match_ratio = word_match_count / len(min(clean_text_search_query_set, text_set)) words_match_consequtive_ratio = sum([1 if a == b else 0 for a, b in zip(clean_text_search_query, l[2])]) / len(min(clean_text_search_query, l[2])) texts_match_ratios.append((match_ratio + words_match_consequtive_ratio) / 2) sorted_texts_match_ratios = sorted(set(texts_match_ratios), reverse=True) result = lyrics_set_final[texts_match_ratios.index(sorted_texts_match_ratios[0])][:2] print('Done!') print('=' * 70) print('Search match ratio:', sorted_texts_match_ratios[0]) print('Found title/artist and genre:',result[0], "---", result[1]) print('=' * 70) print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('-' * 70) print('Req execution time:', (time.time() - start_time), 'sec') print('=' * 70) return sorted_texts_match_ratios[0], result[0], result[1] demo = gr.Interface( fn=classify_lyrics, inputs=[gr.Textbox(label="Enter any lyrics here", value="So close, no matter how far\nCouldn't be much more from the heart\nForever trusting who we are\nAnd nothing else matters")], outputs=[gr.Label(label="Match score"), gr.Textbox(label="Title/Artist"), gr.Textbox(label="Genre (if identified)")], title="Algorithmic Lyrics Classifier", description="Algorithmic match lyrics classification by artist and genre" ) if __name__ == "__main__": PDT = timezone('US/Pacific') print('=' * 70) print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('=' * 70) print('Loading data...') with open('English_Lyrics_Ordered_Sets_Small_1358353.pickle', 'rb') as f: lyrics_set_final = pickle.load(f) print('=' * 70) print('Done!') print('=' * 70) demo.launch()