Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
@@ -204,20 +204,6 @@ shared_state = State()
|
|
204 |
def daten_laden(name):
|
205 |
return load_dataset('alexkueck/tis', 'alexkueck/tis')
|
206 |
|
207 |
-
|
208 |
-
def group_texts(examples):
|
209 |
-
# Concatenate all texts.
|
210 |
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
211 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
212 |
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
213 |
-
# customize this part to your needs.
|
214 |
-
total_length = (total_length // block_size) * block_size
|
215 |
-
# Split by chunks of max_len.
|
216 |
-
result = {
|
217 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
218 |
-
for k, t in concatenated_examples.items()
|
219 |
-
}
|
220 |
-
result["labels"] = result["input_ids"].copy()
|
221 |
-
return result
|
222 |
|
223 |
|
|
|
204 |
def daten_laden(name):
|
205 |
return load_dataset('alexkueck/tis', 'alexkueck/tis')
|
206 |
|
207 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
|