Christina Theodoris commited on
Commit
45b9d69
1 Parent(s): b36d210

Change list of individual IDs to set to ensure unique before subsetting into train/valid/test sets

Browse files
examples/hyperparam_optimiz_for_disease_classifier.py CHANGED
@@ -67,10 +67,10 @@ def classes_to_ids(example):
67
  trainset_v4 = trainset_v3.map(classes_to_ids, num_proc=num_proc)
68
 
69
  # separate into train, validation, test sets
70
- indiv_list = trainset_v4["individual"]
71
  random.seed(42)
72
- train_indiv = random.sample(indiv_list,round(0.7*len(indiv_list)))
73
- eval_indiv = [indiv for indiv in indiv_list if indiv not in train_indiv]
74
  valid_indiv = random.sample(eval_indiv,round(0.5*len(eval_indiv)))
75
  test_indiv = [indiv for indiv in eval_indiv if indiv not in valid_indiv]
76
 
 
67
  trainset_v4 = trainset_v3.map(classes_to_ids, num_proc=num_proc)
68
 
69
  # separate into train, validation, test sets
70
+ indiv_set = set(trainset_v4["individual"])
71
  random.seed(42)
72
+ train_indiv = random.sample(indiv_set,round(0.7*len(indiv_set)))
73
+ eval_indiv = [indiv for indiv in indiv_set if indiv not in train_indiv]
74
  valid_indiv = random.sample(eval_indiv,round(0.5*len(eval_indiv)))
75
  test_indiv = [indiv for indiv in eval_indiv if indiv not in valid_indiv]
76