|
#!/bin/bash |
|
|
|
""" |
|
This file is adapted from |
|
https://github.com/salesforce/awd-lstm-lm/blob/master/getdata.sh |
|
Copyright by the AWD LSTM authors. |
|
""" |
|
DATA_DIR=$1 |
|
echo "- Downloading WikiText-2" |
|
|
|
wget --continue -P $DATA_DIR https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip |
|
unzip -q $DATA_DIR/wikitext-2-v1.zip -d $DATA_DIR |
|
cd $DATA_DIR/wikitext-2 |
|
mv wiki.train.tokens train.txt |
|
sed -i -e "s/<unk>/[UNK]/g" train.txt |
|
mv wiki.valid.tokens valid.txt |
|
sed -i -e "s/<unk>/[UNK]/g" valid.txt |
|
mv wiki.test.tokens test.txt |
|
sed -i -e "s/<unk>/[UNK]/g" test.txt |
|
cd .. |
|
rm wikitext-2-v1.zip |
|
|
|
echo "- WikiText-2 saved at $DATA_DIR/wikitext-2" |
|
|