Spaces:
Running
Running
Current output if translation predicts new line
Browse files- dataset.py +3 -3
- translate.py +4 -1
dataset.py
CHANGED
|
@@ -17,7 +17,7 @@ class DatasetReader(IterableDataset):
|
|
| 17 |
|
| 18 |
def preprocess(self, text: str):
|
| 19 |
self.current_line += 1
|
| 20 |
-
text = text.
|
| 21 |
if len(text) == 0:
|
| 22 |
print(f"Warning: empty sentence at line {self.current_line}")
|
| 23 |
return self.tokenizer(
|
|
@@ -52,8 +52,8 @@ class ParallelTextReader(IterableDataset):
|
|
| 52 |
|
| 53 |
def preprocess(self, pred: str, gold: str):
|
| 54 |
self.current_line += 1
|
| 55 |
-
pred = pred.
|
| 56 |
-
gold = gold.
|
| 57 |
if len(pred) == 0:
|
| 58 |
print(f"Warning: Pred empty sentence at line {self.current_line}")
|
| 59 |
if len(gold) == 0:
|
|
|
|
| 17 |
|
| 18 |
def preprocess(self, text: str):
|
| 19 |
self.current_line += 1
|
| 20 |
+
text = text.strip()
|
| 21 |
if len(text) == 0:
|
| 22 |
print(f"Warning: empty sentence at line {self.current_line}")
|
| 23 |
return self.tokenizer(
|
|
|
|
| 52 |
|
| 53 |
def preprocess(self, pred: str, gold: str):
|
| 54 |
self.current_line += 1
|
| 55 |
+
pred = pred.strip()
|
| 56 |
+
gold = gold.strip()
|
| 57 |
if len(pred) == 0:
|
| 58 |
print(f"Warning: Pred empty sentence at line {self.current_line}")
|
| 59 |
if len(gold) == 0:
|
translate.py
CHANGED
|
@@ -209,7 +209,10 @@ def main(
|
|
| 209 |
else:
|
| 210 |
samples_seen += len(tgt_text)
|
| 211 |
|
| 212 |
-
print(
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
|
| 215 |
|
|
|
|
| 209 |
else:
|
| 210 |
samples_seen += len(tgt_text)
|
| 211 |
|
| 212 |
+
print(
|
| 213 |
+
"\n".join([repr(sentence) for sentence in tgt_text]),
|
| 214 |
+
file=output_file,
|
| 215 |
+
)
|
| 216 |
|
| 217 |
pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
|
| 218 |
|