AmelieSchreiber commited on
Commit
2331ba4
1 Parent(s): 7a4f9a7

Update data_preprocessing_notebook_v1.ipynb

Browse files
data_preprocessing_notebook_v1.ipynb CHANGED
@@ -340,12 +340,12 @@
340
  "num_test_samples = len(test_sequences_by_family)\n",
341
  "num_train_samples = len(train_sequences_by_family)\n",
342
  "\n",
343
- "# Define the percentage of data you want to keep",
344
- "percentage_to_keep = 3.64 # for keeping 3.64% of the data",
345
  "\n",
346
- "# Generate random indices representing a percentage of each dataset",
347
- "random_test_indices = random.sample(range(num_test_samples), int(num_test_samples * (percentage_to_keep / 100)))",
348
- "random_train_indices = random.sample(range(num_train_samples), int(num_train_samples * (percentage_to_keep / 100)))",
349
  "\n",
350
  "# Create smaller datasets using the random indices\n",
351
  "test_sequences_small = [test_sequences_by_family[i] for i in random_test_indices]\n",
 
340
  "num_test_samples = len(test_sequences_by_family)\n",
341
  "num_train_samples = len(train_sequences_by_family)\n",
342
  "\n",
343
+ "# Define the percentage of data you want to keep\n",
344
+ "percentage_to_keep = 3.64 # for keeping 3.64% of the data\n",
345
  "\n",
346
+ "# Generate random indices representing a percentage of each dataset\n",
347
+ "random_test_indices = random.sample(range(num_test_samples), int(num_test_samples * (percentage_to_keep / 100)))\n",
348
+ "random_train_indices = random.sample(range(num_train_samples), int(num_train_samples * (percentage_to_keep / 100)))\n",
349
  "\n",
350
  "# Create smaller datasets using the random indices\n",
351
  "test_sequences_small = [test_sequences_by_family[i] for i in random_test_indices]\n",