Spaces:

huspacy
/

example-applications

Runtime error

Improved keyphrase extraction by PoS filtering and IDF scores

e2e35eb unverified about 2 years ago

No virus

883 Bytes

	from typing import List, Tuple

	import gradio as gr
	import pandas as pd
	from textacy.extract.keyterms.sgrank import sgrank as keywords

	from examples.common import NLP, IDF, NEWS_EXAMPLES


	def process(text: str) -> pd.DataFrame:
	doc = NLP(text)
	terms: List[Tuple[str, float]] = keywords(doc, topn=10, include_pos=("NOUN", "PROPN"), idf=IDF, ngrams=(1, 2, 3))
	term_set = [t for t, _ in terms]
	return pd.DataFrame([{"Keyphrase": term, "Score": prob}
	for term, prob in terms
	if all(other == term or term not in other for other in term_set)])


	demo = gr.Interface(
	fn=process,
	inputs=gr.Textbox(value=NEWS_EXAMPLES[0], lines=10, label="Input text", show_label=True),
	outputs=gr.DataFrame(label="Keywords", show_label=False, max_cols=2, max_rows=10),
	examples=NEWS_EXAMPLES,
	# cache_examples=True,
	)