semantic-song-search

Running

App Files Files Community

Shea commited on Apr 17, 2023

Commit

58a182b

•

1 Parent(s): 542351c

try new interface

Browse files

Files changed (1) hide show

app.py +31 -31

app.py CHANGED Viewed

@@ -1,41 +1,41 @@
-import numpy as np
 import gradio as gr
 from sentence_transformers import SentenceTransformer
 minilm = SentenceTransformer('all-MiniLM-L12-v2')
 #roberta = SentenceTransformer('all-distilroberta-v1')
 #glove = SentenceTransformer('average_word_embeddings_glove.840B.300d')
-labels = ["contradiction", "entailment", "neutral"]
-def predict(sentence1, sentence2):
-    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
-    print(sentence1)
-    print(sentence2)
-#    test_data = BertSemanticDataGenerator(
-#        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
-#    )
-#    probs = model.predict(test_data[0])[0]
-#    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
- #   return labels_probs
-examples = [["Two women are observing something together.", "Two women are standing with their eyes closed."],
-            ["A smiling costumed woman is holding an umbrella", "A happy woman in a fairy costume holds an umbrella"],
-            ["A soccer game with multiple males playing", "Some men are playing a sport"],
-            ]
-gr.Interface(
-    fn=predict,
-    title="Semantic Song Search",
-    description = "Search for songs based on the meaning in the song's lyrics using a variety of embeddings",
-    inputs=["text", "text"],
-    examples=examples,
-    #outputs=gr.Textbox(label='Prediction'),
-    outputs=gr.outputs.Label(num_top_classes=3, label='Semantic similarity'),
-    cache_examples=True,
-    article = "Author: @sheacon",
-).launch(debug=True, enable_queue=True)

 import gradio as gr
+import pandas as pd
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
 from sentence_transformers import SentenceTransformer
+from datasets import load_dataset
+dataset = load_dataset(
+  "sheacon/song_lyrics",
+  revision="main"  # tag name, or branch name, or commit hash
+)
+df = dataset.to_pandas()
 minilm = SentenceTransformer('all-MiniLM-L12-v2')
 #roberta = SentenceTransformer('all-distilroberta-v1')
 #glove = SentenceTransformer('average_word_embeddings_glove.840B.300d')
+# Tokenize and encode the song lyrics using the embedding model
+song_embeddings = df["embedding"].tolist()
+def search_songs(text, top_n=5):
+    # Tokenize and encode the text entry using the same embedding model
+    text_embedding = minilm([text])[0]
+    # Calculate the cosine similarity between the text entry embedding and each song embedding
+    similarities = cosine_similarity([text_embedding], song_embeddings)[0]
+    # Sort the songs by similarity score and return the top N songs with their titles and lyrics
+    top_indices = similarities.argsort()[::-1][:top_n]
+    results = [{"title": df.iloc[i]["title"], "lyrics": df.iloc[i]["lyrics"]} for i in top_indices]
+    return results
+# Define the Gradio interface
+iface = gr.Interface(search_songs, "textbox", "text", examples=[["I'm feeling lonely tonight"]])
+# Launch the interface
+iface.launch()