AmelieSchreiber
commited on
Commit
•
7a4f9a7
1
Parent(s):
90c60d5
Update data_preprocessing_notebook_v1.ipynb
Browse files
data_preprocessing_notebook_v1.ipynb
CHANGED
@@ -340,9 +340,12 @@
|
|
340 |
"num_test_samples = len(test_sequences_by_family)\n",
|
341 |
"num_train_samples = len(train_sequences_by_family)\n",
|
342 |
"\n",
|
343 |
-
"#
|
344 |
-
"
|
345 |
-
"
|
|
|
|
|
|
|
346 |
"\n",
|
347 |
"# Create smaller datasets using the random indices\n",
|
348 |
"test_sequences_small = [test_sequences_by_family[i] for i in random_test_indices]\n",
|
|
|
340 |
"num_test_samples = len(test_sequences_by_family)\n",
|
341 |
"num_train_samples = len(train_sequences_by_family)\n",
|
342 |
"\n",
|
343 |
+
"# Define the percentage of data you want to keep",
|
344 |
+
"percentage_to_keep = 3.64 # for keeping 3.64% of the data",
|
345 |
+
"\n",
|
346 |
+
"# Generate random indices representing a percentage of each dataset",
|
347 |
+
"random_test_indices = random.sample(range(num_test_samples), int(num_test_samples * (percentage_to_keep / 100)))",
|
348 |
+
"random_train_indices = random.sample(range(num_train_samples), int(num_train_samples * (percentage_to_keep / 100)))",
|
349 |
"\n",
|
350 |
"# Create smaller datasets using the random indices\n",
|
351 |
"test_sequences_small = [test_sequences_by_family[i] for i in random_test_indices]\n",
|