Shea commited on
Commit
a9871b8
1 Parent(s): e097941
Files changed (2) hide show
  1. app.py +2 -35
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,41 +1,8 @@
1
-
2
-
3
  import gradio as gr
4
- import pandas as pd
5
  import numpy as np
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
- from sentence_transformers import SentenceTransformer
8
  from datasets import load_dataset
9
 
10
- dataset = load_dataset(
11
- "sheacon/song_lyrics",
12
- revision="main" # tag name, or branch name, or commit hash
13
- )
14
-
15
- df = dataset.to_pandas()
16
-
17
- minilm = SentenceTransformer('all-MiniLM-L12-v2')
18
- #roberta = SentenceTransformer('all-distilroberta-v1')
19
- #glove = SentenceTransformer('average_word_embeddings_glove.840B.300d')
20
-
21
- # Tokenize and encode the song lyrics using the embedding model
22
- song_embeddings = df["embedding"].tolist()
23
-
24
- def search_songs(text, top_n=5):
25
- # Tokenize and encode the text entry using the same embedding model
26
- text_embedding = minilm([text])[0]
27
-
28
- # Calculate the cosine similarity between the text entry embedding and each song embedding
29
- similarities = cosine_similarity([text_embedding], song_embeddings)[0]
30
-
31
- # Sort the songs by similarity score and return the top N songs with their titles and lyrics
32
- top_indices = similarities.argsort()[::-1][:top_n]
33
- results = [{"title": df.iloc[i]["title"], "lyrics": df.iloc[i]["lyrics"]} for i in top_indices]
34
-
35
- return results
36
-
37
- # Define the Gradio interface
38
- iface = gr.Interface(search_songs, "textbox", "text", examples=[["I'm feeling lonely tonight"]])
39
 
40
- # Launch the interface
41
- iface.launch()
 
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
+ import pandas as pd
4
  from sklearn.metrics.pairwise import cosine_similarity
 
5
  from datasets import load_dataset
6
 
7
+ dataset = load_dataset("sheacon/song_lyrics")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  gradio
2
- sentence_transformers
3
  scikit-learn
4
  datasets
 
1
  gradio
2
+ pandas
3
  scikit-learn
4
  datasets