File size: 700 Bytes
e088236
 
 
 
 
 
 
 
efde5e9
e088236
 
 
 
 
 
 
efde5e9
e088236
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Download nltk and numpy
import os
os.system('pip install nltk numpy')
import nltk
import numpy as np

nltk.download('punkt')
def train_model(corpus):
    print("Training the model...")
    tokens = nltk.word_tokenize(corpus)
    model = {}
    for i in range(len(tokens) - 1):
        if tokens[i] in model:
            model[tokens[i]].append(tokens[i + 1])
        else:
            model[tokens[i]] = [tokens[i + 1]]
    print("Model has been trained. Saving the model...")
    return model

import pickle

# Train the model on a given corpus
corpus = open('corpus.txt').read()
model = train_model(corpus)

# Save the model to a file
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)