File size: 792 Bytes
25e310b 642d911 25e310b e3c7b5a 25e310b e3c7b5a 25e310b e3c7b5a 25e310b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from transformers import BartTokenizer
from idiomify.datamodules import IdiomifyDataModule
CONFIG = {
"literal2idiomatic_ver": "d-1-2",
"batch_size": 20,
"num_workers": 4,
"shuffle": True
}
def main():
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
datamodule = IdiomifyDataModule(CONFIG, tokenizer)
datamodule.prepare_data()
datamodule.setup()
for batch in datamodule.train_dataloader():
srcs, tgts_r, tgts = batch
print(srcs.shape)
print(tgts_r.shape)
print(tgts.shape)
break
for batch in datamodule.test_dataloader():
srcs, tgts_r, tgts = batch
print(srcs.shape)
print(tgts_r.shape)
print(tgts.shape)
break
if __name__ == '__main__':
main()
|