alexkueck commited on
Commit
4dc9c10
·
1 Parent(s): 55e55f8

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +1 -15
utils.py CHANGED
@@ -204,20 +204,6 @@ shared_state = State()
204
  def daten_laden(name):
205
  return load_dataset('alexkueck/tis', 'alexkueck/tis')
206
 
207
- #Funktion, die den gegebenen Text aus dem Datenset gruppiert
208
- def group_texts(examples):
209
- # Concatenate all texts.
210
- concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
211
- total_length = len(concatenated_examples[list(examples.keys())[0]])
212
- # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
213
- # customize this part to your needs.
214
- total_length = (total_length // block_size) * block_size
215
- # Split by chunks of max_len.
216
- result = {
217
- k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
218
- for k, t in concatenated_examples.items()
219
- }
220
- result["labels"] = result["input_ids"].copy()
221
- return result
222
 
223
 
 
204
  def daten_laden(name):
205
  return load_dataset('alexkueck/tis', 'alexkueck/tis')
206
 
207
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209