# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt | |
# pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt | |
python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train | |
python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val | |
python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened | |
# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05 | |
# pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt | |
# pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt | |
# python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened |