lengocduc195's picture
pushNe
2359bda
raw
history blame
1.06 kB
from . import InputExample
import csv
import gzip
import os
import gzip
class PairedFilesReader(object):
"""
Reads in the a Pair Dataset, split in two files
"""
def __init__(self, filepaths):
self.filepaths = filepaths
def get_examples(self, max_examples=0):
"""
"""
fIns = []
for filepath in self.filepaths:
fIn = gzip.open(filepath, 'rt', encoding='utf-8') if filepath.endswith('.gz') else open(filepath, encoding='utf-8')
fIns.append(fIn)
examples = []
eof = False
while not eof:
texts = []
for fIn in fIns:
text = fIn.readline()
if text == '':
eof = True
break
texts.append(text)
if eof:
break;
examples.append(InputExample(guid=str(len(examples)), texts=texts, label=1))
if max_examples > 0 and len(examples) >= max_examples:
break
return examples