hf-deepmoji / examples /create_twitter_vocab.py
thomwolf's picture
thomwolf HF staff
Initial commit
cc0b62b
raw
history blame
No virus
377 Bytes
""" Creates a vocabulary from a tsv file.
"""
import codecs
import example_helper
from torchmoji.create_vocab import VocabBuilder
from torchmoji.word_generator import TweetWordGenerator
with codecs.open('../../twitterdata/tweets.2016-09-01', 'rU', 'utf-8') as stream:
wg = TweetWordGenerator(stream)
vb = VocabBuilder(wg)
vb.count_all_words()
vb.save_vocab()