file = "text.txt" import nltk import simplemma from nltk.tokenize import word_tokenize from nltk.tokenize import sent_tokenize from nltk.probability import FreqDist from simplemma import text_lemmatizer nltk.download('punkt') def get_lists(file): with open(file, 'r', encoding='utf-8') as f: text = f.read() word_tokenized_text = word_tokenize(text, language='italian') word_tokenized_text_lower = [word.lower() for word in word_tokenized_text] sent_tokenized_text = sent_tokenize(text, language='italian') sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text] return word_tokenized_text, word_tokenized_text_lower, sent_tokenized_text, sent_tokenized_text_lower words, words_lower, sentences, sentences_lower = get_lists(file)