Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
seqs = [x[:-1].split("_") for x in open("analysis/base_filenames.txt", "r").readlines()] | |
seqs = [{"genre":x[0], "situation":x[1], "camera":x[2], "dancer":x[3], "musicId":x[4], "choreo":x[5]} for x in seqs] | |
df = pd.DataFrame(seqs) | |
df["choreo"].unique().size | |
df["musicId"].unique().size | |
len(df["choreo"].unique())*len(df["dancer"].unique()) | |
df["dancer"].unique() | |
[df[df["dancer"]==d]["choreo"] for d in df["dancer"].unique()] | |
df["musicId"].unique() | |
songs=[np.random.choice(df[df["genre"]==g]["musicId"],size=1).item() for g in df["genre"].unique()] | |
# song_dancers=sum([[(s,x) for x in np.random.choice(df[df["musicId"]==s]["dancer"],size=2, replace=False).tolist()] for s in songs], []) | |
# song_dancers | |
# df[(df["musicId"]=="mBR4") & (df["dancer"]=="d06")] | |
# song_dancer_choreos=sum([[(s,d,x) for x in np.random.choice(df[(df["musicId"]==s) & (df["dancer"]==d)]["choreo"],size=2, replace=False).tolist()] for s,d in song_dancers], []) | |
# len(song_dancer_choreos) | |
# test_data = pd.concat([df[(df["musicId"]==s) & (df["dancer"]==d) & (df["choreo"]==c)].sample(1) for s,d,c in song_dancer_choreos]) | |
test_data = pd.concat([df[(df["musicId"]==s)] for s in songs]) | |
# [df[(df["musicId"]==s) & (df["dancer"]==d) & (df["choreo"]==c)] for s,d,c in song_dancer_choreos] | |
test_data.count() | |
test_data_seqs = ["_".join([x["genre"], x["situation"], x["camera"], x["dancer"], x["musicId"], x["choreo"]]) for i,x in test_data.iterrows()] | |
with open("analysis/aistpp_base_filenames_test.txt", "w") as f: | |
f.writelines([x+"\n" for x in test_data_seqs]) | |
###TRAIN DATA | |
seqs = [x[:-1].split("_") for x in open("analysis/aistpp_base_filenames_test.txt", "r").readlines()] | |
seqs = [{"genre":x[0], "situation":x[1], "camera":x[2], "dancer":x[3], "musicId":x[4], "choreo":x[5]} for x in seqs] | |
test_data = pd.DataFrame(seqs) | |
# train_data = df[~(df["musicId"].isin(test_data["musicId"])) & ~(df["choreo"].isin(test_data["choreo"]))] | |
train_data = df[~(df["musicId"].isin(test_data["musicId"]))] | |
# train_data = df[~((df["musicId"].isin(test_data["musicId"])) & (df["choreo"].isin(test_data["choreo"])))] | |
len(train_data) | |
# song_choreos=[x.tolist() for i,x in test_data[["musicId","choreo"]].iterrows()] | |
# song_dancer_choreos=[x.tolist() for i,x in test_data[["musicId","dancer","choreo"]].iterrows()] | |
# count=0 | |
# for i,x in df[["musicId", "dancer", "choreo"]].iterrows(): | |
# if x.tolist() not in song_dancer_choreos: | |
# count+=1 | |
# | |
# count | |
# train_data = df[(~df["musicId"].isin(test_data["musicId"])) & (~df["choreo"].isin(test_data["choreo"])))] | |
train_data.count() | |
# train_data[["musicId","choreo"]].drop_duplicates().count() | |
# train_data[["dancer","choreo"]].drop_duplicates().count() | |
train_data_seqs = ["_".join([x["genre"], x["situation"], x["camera"], x["dancer"], x["musicId"], x["choreo"]]) for i,x in train_data.iterrows()] | |
with open("analysis/aistpp_base_filenames_train.txt", "w") as f: | |
f.writelines([x+"\n" for x in train_data_seqs]) | |