File size: 2,144 Bytes
af5cf7c
 
 
 
8921edd
 
b571090
af5cf7c
b571090
af5cf7c
b571090
af5cf7c
b571090
8921edd
 
 
af5cf7c
 
 
 
 
 
 
b571090
8921edd
af5cf7c
 
 
 
8921edd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd

# Read the CSV files
tracks_df = pd.read_csv('data/music_info.csv')
tracks_df.fillna('', inplace=True)
tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv')

# Merge data on those two csvs
dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
# Convert all columns to string type
dataframe = dataframe.astype(str)
# Create a history lookup dictionary by 'user_id'
user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
                              for user_id, group in dataframe.groupby('user_id')}

def get_users_with_track_interactions(ascending=False, limit=10):
    playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
    playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
    if limit is not None:
        playcount_summary = playcount_summary.head(limit)
    return playcount_summary.to_dict(orient='records')

def get_top_tracks_for_user(user_id: str, limit=10):
    track_list = user_to_track_history_dict.get(user_id, [])
    sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
    if limit is not None:
        sorted_tracks = sorted_tracks[:limit]
    return sorted_tracks

def get_unlistened_tracks_for_user(user_id:str):
    all_tracks = tracks_df['entry'].tolist()
    listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
    return list(set(all_tracks) - set(listened_tracks))

def predictions_to_tracks(entries_and_predictions):
    tracks = []
    for entry, score in entries_and_predictions:
        track_info = tracks_df[tracks_df['entry'] == entry]
        if not track_info.empty:
            track_dict = track_info.to_dict('records')[0]
            track_dict['score'] = score.astype(str)
            tracks.append(track_dict)
    return tracks