semantic-song-search

Running

Shea commited on Apr 18, 2023

Commit

a9871b8

•

1 Parent(s): e097941

update

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,41 +1,8 @@
 import gradio as gr
-import pandas as pd
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
-from sentence_transformers import SentenceTransformer
 from datasets import load_dataset
-dataset = load_dataset(
-  "sheacon/song_lyrics",
-  revision="main"  # tag name, or branch name, or commit hash
-)
-df = dataset.to_pandas()
-minilm = SentenceTransformer('all-MiniLM-L12-v2')
-#roberta = SentenceTransformer('all-distilroberta-v1')
-#glove = SentenceTransformer('average_word_embeddings_glove.840B.300d')
-# Tokenize and encode the song lyrics using the embedding model
-song_embeddings = df["embedding"].tolist()
-def search_songs(text, top_n=5):
-    # Tokenize and encode the text entry using the same embedding model
-    text_embedding = minilm([text])[0]
-    # Calculate the cosine similarity between the text entry embedding and each song embedding
-    similarities = cosine_similarity([text_embedding], song_embeddings)[0]
-    # Sort the songs by similarity score and return the top N songs with their titles and lyrics
-    top_indices = similarities.argsort()[::-1][:top_n]
-    results = [{"title": df.iloc[i]["title"], "lyrics": df.iloc[i]["lyrics"]} for i in top_indices]
-    return results
-# Define the Gradio interface
-iface = gr.Interface(search_songs, "textbox", "text", examples=[["I'm feeling lonely tonight"]])
-# Launch the interface
-iface.launch()

 import gradio as gr
 import numpy as np
+import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 from datasets import load_dataset
+dataset = load_dataset("sheacon/song_lyrics")

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 gradio
-sentence_transformers
 scikit-learn
 datasets

 gradio
+pandas
 scikit-learn
 datasets