Spaces:
Runtime error
Runtime error
File size: 2,988 Bytes
2d5fdd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import pandas as pd
import numpy as np
seqs = [x[:-1].split("_") for x in open("analysis/base_filenames.txt", "r").readlines()]
seqs = [{"genre":x[0], "situation":x[1], "camera":x[2], "dancer":x[3], "musicId":x[4], "choreo":x[5]} for x in seqs]
df = pd.DataFrame(seqs)
df["choreo"].unique().size
df["musicId"].unique().size
len(df["choreo"].unique())*len(df["dancer"].unique())
df["dancer"].unique()
[df[df["dancer"]==d]["choreo"] for d in df["dancer"].unique()]
df["musicId"].unique()
songs=[np.random.choice(df[df["genre"]==g]["musicId"],size=1).item() for g in df["genre"].unique()]
# song_dancers=sum([[(s,x) for x in np.random.choice(df[df["musicId"]==s]["dancer"],size=2, replace=False).tolist()] for s in songs], [])
# song_dancers
# df[(df["musicId"]=="mBR4") & (df["dancer"]=="d06")]
# song_dancer_choreos=sum([[(s,d,x) for x in np.random.choice(df[(df["musicId"]==s) & (df["dancer"]==d)]["choreo"],size=2, replace=False).tolist()] for s,d in song_dancers], [])
# len(song_dancer_choreos)
# test_data = pd.concat([df[(df["musicId"]==s) & (df["dancer"]==d) & (df["choreo"]==c)].sample(1) for s,d,c in song_dancer_choreos])
test_data = pd.concat([df[(df["musicId"]==s)] for s in songs])
# [df[(df["musicId"]==s) & (df["dancer"]==d) & (df["choreo"]==c)] for s,d,c in song_dancer_choreos]
test_data.count()
test_data_seqs = ["_".join([x["genre"], x["situation"], x["camera"], x["dancer"], x["musicId"], x["choreo"]]) for i,x in test_data.iterrows()]
with open("analysis/aistpp_base_filenames_test.txt", "w") as f:
f.writelines([x+"\n" for x in test_data_seqs])
###TRAIN DATA
seqs = [x[:-1].split("_") for x in open("analysis/aistpp_base_filenames_test.txt", "r").readlines()]
seqs = [{"genre":x[0], "situation":x[1], "camera":x[2], "dancer":x[3], "musicId":x[4], "choreo":x[5]} for x in seqs]
test_data = pd.DataFrame(seqs)
# train_data = df[~(df["musicId"].isin(test_data["musicId"])) & ~(df["choreo"].isin(test_data["choreo"]))]
train_data = df[~(df["musicId"].isin(test_data["musicId"]))]
# train_data = df[~((df["musicId"].isin(test_data["musicId"])) & (df["choreo"].isin(test_data["choreo"])))]
len(train_data)
# song_choreos=[x.tolist() for i,x in test_data[["musicId","choreo"]].iterrows()]
# song_dancer_choreos=[x.tolist() for i,x in test_data[["musicId","dancer","choreo"]].iterrows()]
# count=0
# for i,x in df[["musicId", "dancer", "choreo"]].iterrows():
# if x.tolist() not in song_dancer_choreos:
# count+=1
#
# count
# train_data = df[(~df["musicId"].isin(test_data["musicId"])) & (~df["choreo"].isin(test_data["choreo"])))]
train_data.count()
# train_data[["musicId","choreo"]].drop_duplicates().count()
# train_data[["dancer","choreo"]].drop_duplicates().count()
train_data_seqs = ["_".join([x["genre"], x["situation"], x["camera"], x["dancer"], x["musicId"], x["choreo"]]) for i,x in train_data.iterrows()]
with open("analysis/aistpp_base_filenames_train.txt", "w") as f:
f.writelines([x+"\n" for x in train_data_seqs])
|