Spaces:

darylfunggg
/

text-analysis

Sleeping

text-analysis / app.py

Daryl Fung

initial commit

9b9ea2f almost 2 years ago

2.47 kB

	import gradio as gr
	from keybert import KeyBERT
	import spacy
	import string
	from spacy import displacy
	from pathlib import Path
	from PIL import Image

	from keyword_extraction import keyword_extract
	from keyphrase_extraction import get_top_key_phrases, display_key_phrases
	from word import show_gram_plot


	nlp = spacy.load("en_core_web_sm")

	def greet(name, descriptions):
	outputs = []
	descriptions = descriptions.translate(str.maketrans('', '', string.punctuation))

	# run word count
	show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png')
	show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png')
	show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png')
	outputs.append(Image.open(f'results/{name}/1_gram.png'))
	outputs.append(Image.open(f'results/{name}/2_gram.png'))
	outputs.append(Image.open(f'results/{name}/3_gram.png'))

	# run named entity recognition
	spacy_descriptions = nlp(descriptions)
	# Create a visualization of named entities
	ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True)
	filename = Path(f'results/{name}/ner.html')
	filename.open('w', encoding='utf-8').write(ner_svg)

	# run keyword extraction
	kw_model = KeyBERT()
	keyword_extract(kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
	keyword_extract(kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
	keyword_extract(kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
	outputs.append(Image.open(f'results/{name}/1_keyword.png'))
	outputs.append(Image.open(f'results/{name}/2_keyword.png'))
	outputs.append(Image.open(f'results/{name}/3_keyword.png'))

	# keywords = kw_model.extract_keywords(descriptions, highlight=True)
	# print(keywords)

	# run key phrase extraction
	get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png')
	keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html')
	outputs.append(Image.open(f'results/{name}/top_keyphrase.png'))

	outputs += [ner_svg, keyphrase_svg]

	return outputs

	demo = gr.Interface(
	fn=greet,
	inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"),
	gr.Textbox(lines=10, placeholder="All the descriptions")],
	outputs=['image', 'image', 'image', 'image', 'image', 'image', 'image', 'html', 'html'],
	)
	demo.launch()