Spaces:
Runtime error
Runtime error
import nltk | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
nltk.download('punkt') | |
from nltk.corpus import stopwords,wordnet | |
from nltk.tokenize import sent_tokenize | |
from flashtext import KeywordProcessor | |
import regex as re | |
import string | |
import subprocess | |
import logging | |
try: | |
import pke | |
logging.error("importing pke info") | |
except: | |
logging.error("installing pke info") | |
subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git']) | |
subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en']) | |
import pke | |
def tokenize_sentence(text): | |
sentences=sent_tokenize(text) | |
sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20] | |
return sentences | |
def get_noun_adj_verb(text): | |
output = [] | |
try: | |
extractor = pke.unsupervised.MultipartiteRank() | |
extractor.load_document(input=text, language='en',normalization=None) | |
# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB' | |
extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'}) | |
# candidate weighting, | |
extractor.candidate_weighting(threshold=0.9,method='average',alpha=1.1) | |
#extract top n | |
keyphrases = extractor.get_n_best(n=5) | |
for val in keyphrases: | |
output.append(val[0]) | |
except Exception as e: | |
print("found exception",e) | |
return list(set(output)) | |