oroszgy's picture
feat: initial commit
f214d73 unverified
raw
history blame
513 Bytes
from typing import List, Tuple
import pandas as pd
from examples.common import NLP
from textacy.extract.keyterms.sgrank import sgrank as keywords
def process(text: str) -> pd.DataFrame:
doc = NLP(text)
terms: List[Tuple[str, float]] = keywords(doc, topn=10)
term_set = [t for t, _ in terms]
return pd.DataFrame([{"Keyphrase": term, "Probability": prob}
for term, prob in terms
if all(other == term or term not in other for other in term_set)])