Spaces:
Sleeping
Sleeping
MarieGotthardt
commited on
Commit
•
a4dcd8d
1
Parent(s):
b4666e4
Upload song_guesser.py
Browse files- song_guesser.py +36 -0
song_guesser.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from datasketch import MinHash
|
3 |
+
|
4 |
+
class SongGuesser:
|
5 |
+
@staticmethod
|
6 |
+
def guess_song(query):
|
7 |
+
with open("./swedish_christmas_songs.json", "r", encoding='utf-8') as f:
|
8 |
+
songs = json.load(f)
|
9 |
+
shingle_size = 5
|
10 |
+
|
11 |
+
query_shingles = get_shingles(query, shingle_size)
|
12 |
+
query_minhash = create_minhash(query_shingles)
|
13 |
+
|
14 |
+
max_sim = 0
|
15 |
+
max_name = ""
|
16 |
+
|
17 |
+
for song in songs:
|
18 |
+
song_lyrics = song['lyrics'].lower()
|
19 |
+
song_shingles = get_shingles(song_lyrics, shingle_size)
|
20 |
+
song_minhash = create_minhash(song_shingles)
|
21 |
+
|
22 |
+
estimated_jaccard = query_minhash.jaccard(song_minhash)
|
23 |
+
if estimated_jaccard > max_sim:
|
24 |
+
max_sim = estimated_jaccard
|
25 |
+
max_name = song['name']
|
26 |
+
|
27 |
+
return max_name
|
28 |
+
|
29 |
+
def get_shingles(text, shingle_size):
|
30 |
+
return set(text[i:i+shingle_size] for i in range(len(text) - shingle_size + 1))
|
31 |
+
|
32 |
+
def create_minhash(shingles, num_perm=128):
|
33 |
+
m = MinHash(num_perm=num_perm)
|
34 |
+
for shingle in shingles:
|
35 |
+
m.update(shingle.encode('utf8'))
|
36 |
+
return m
|