smhavens commited on
Commit
20042af
·
1 Parent(s): 6ffe1b1

Return to no separate datasets

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -85,57 +85,57 @@ def training():
85
  n_examples = dataset["train"].num_rows // 2
86
  # n_remaining = dataset["train"].num_rows - n_examples
87
  dataset_clean = {}
88
- dataset_0 = []
89
- dataset_1 = []
90
- dataset_2 = []
91
- dataset_3 = []
92
  for i in range(n_examples):
93
  dataset_clean[i] = {}
94
  dataset_clean[i]["text"] = normalize(train_data[i]["text"], lowercase=True, remove_stopwords=True)
95
  dataset_clean[i]["label"] = train_data[i]["label"]
96
- if train_data[i]["label"] == 0:
97
- dataset_0.append(dataset_clean[i])
98
- elif train_data[i]["label"] == 1:
99
- dataset_1.append(dataset_clean[i])
100
- elif train_data[i]["label"] == 2:
101
- dataset_2.append(dataset_clean[i])
102
- elif train_data[i]["label"] == 3:
103
- dataset_3.append(dataset_clean[i])
104
- n_0 = len(dataset_0) // 2
105
- n_1 = len(dataset_1) // 2
106
- n_2 = len(dataset_2) // 2
107
- n_3 = len(dataset_3) // 2
108
- print("Label lengths:", len(dataset_0), len(dataset_1), len(dataset_2), len(dataset_3))
109
 
110
- # for i in range(n_examples):
111
- # example = dataset_clean[i]
112
- # example_opposite = dataset_clean[-(i)]
113
- # # print(example["text"])
114
- # train_examples.append(InputExample(texts=[example['text'], example_opposite["text"]]))
115
-
116
- for i in range(n_0):
117
- example = dataset_0[i]
118
- # example_opposite = dataset_0[-(i)]
119
  # print(example["text"])
120
- train_examples.append(InputExample(texts=[example['text']], label=0))
121
 
122
- for i in range(n_1):
123
- example = dataset_1[i]
124
- # example_opposite = dataset_1[-(i)]
125
- # print(example["text"])
126
- train_examples.append(InputExample(texts=[example['text']], label=1))
127
 
128
- for i in range(n_2):
129
- example = dataset_2[i]
130
- # example_opposite = dataset_2[-(i)]
131
- # print(example["text"])
132
- train_examples.append(InputExample(texts=[example['text']], label=2))
133
 
134
- for i in range(n_3):
135
- example = dataset_3[i]
136
- # example_opposite = dataset_3[-(i)]
137
- # print(example["text"])
138
- train_examples.append(InputExample(texts=[example['text']], label=3))
 
 
 
 
 
 
139
 
140
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
141
 
 
85
  n_examples = dataset["train"].num_rows // 2
86
  # n_remaining = dataset["train"].num_rows - n_examples
87
  dataset_clean = {}
88
+ # dataset_0 = []
89
+ # dataset_1 = []
90
+ # dataset_2 = []
91
+ # dataset_3 = []
92
  for i in range(n_examples):
93
  dataset_clean[i] = {}
94
  dataset_clean[i]["text"] = normalize(train_data[i]["text"], lowercase=True, remove_stopwords=True)
95
  dataset_clean[i]["label"] = train_data[i]["label"]
96
+ # if train_data[i]["label"] == 0:
97
+ # dataset_0.append(dataset_clean[i])
98
+ # elif train_data[i]["label"] == 1:
99
+ # dataset_1.append(dataset_clean[i])
100
+ # elif train_data[i]["label"] == 2:
101
+ # dataset_2.append(dataset_clean[i])
102
+ # elif train_data[i]["label"] == 3:
103
+ # dataset_3.append(dataset_clean[i])
104
+ # n_0 = len(dataset_0) // 2
105
+ # n_1 = len(dataset_1) // 2
106
+ # n_2 = len(dataset_2) // 2
107
+ # n_3 = len(dataset_3) // 2
108
+ # print("Label lengths:", len(dataset_0), len(dataset_1), len(dataset_2), len(dataset_3))
109
 
110
+ for i in range(n_examples):
111
+ example = dataset_clean[i]
112
+ # example_opposite = dataset_clean[-(i)]
 
 
 
 
 
 
113
  # print(example["text"])
114
+ train_examples.append(InputExample(texts=[example['text']], label=example['label']))
115
 
116
+ # for i in range(n_0):
117
+ # example = dataset_0[i]
118
+ # # example_opposite = dataset_0[-(i)]
119
+ # # print(example["text"])
120
+ # train_examples.append(InputExample(texts=[example['text']], label=0))
121
 
122
+ # for i in range(n_1):
123
+ # example = dataset_1[i]
124
+ # # example_opposite = dataset_1[-(i)]
125
+ # # print(example["text"])
126
+ # train_examples.append(InputExample(texts=[example['text']], label=1))
127
 
128
+ # for i in range(n_2):
129
+ # example = dataset_2[i]
130
+ # # example_opposite = dataset_2[-(i)]
131
+ # # print(example["text"])
132
+ # train_examples.append(InputExample(texts=[example['text']], label=2))
133
+
134
+ # for i in range(n_3):
135
+ # example = dataset_3[i]
136
+ # # example_opposite = dataset_3[-(i)]
137
+ # # print(example["text"])
138
+ # train_examples.append(InputExample(texts=[example['text']], label=3))
139
 
140
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
141