File size: 700 Bytes
e088236 efde5e9 e088236 efde5e9 e088236 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# Download nltk and numpy
import os
os.system('pip install nltk numpy')
import nltk
import numpy as np
nltk.download('punkt')
def train_model(corpus):
print("Training the model...")
tokens = nltk.word_tokenize(corpus)
model = {}
for i in range(len(tokens) - 1):
if tokens[i] in model:
model[tokens[i]].append(tokens[i + 1])
else:
model[tokens[i]] = [tokens[i + 1]]
print("Model has been trained. Saving the model...")
return model
import pickle
# Train the model on a given corpus
corpus = open('corpus.txt').read()
model = train_model(corpus)
# Save the model to a file
with open("model.pkl", "wb") as f:
pickle.dump(model, f)
|