text-analysis / app.py
Daryl Fung
initial commit
9b9ea2f
raw
history blame
2.47 kB
import gradio as gr
from keybert import KeyBERT
import spacy
import string
from spacy import displacy
from pathlib import Path
from PIL import Image
from keyword_extraction import keyword_extract
from keyphrase_extraction import get_top_key_phrases, display_key_phrases
from word import show_gram_plot
nlp = spacy.load("en_core_web_sm")
def greet(name, descriptions):
outputs = []
descriptions = descriptions.translate(str.maketrans('', '', string.punctuation))
# run word count
show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png')
show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png')
show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png')
outputs.append(Image.open(f'results/{name}/1_gram.png'))
outputs.append(Image.open(f'results/{name}/2_gram.png'))
outputs.append(Image.open(f'results/{name}/3_gram.png'))
# run named entity recognition
spacy_descriptions = nlp(descriptions)
# Create a visualization of named entities
ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True)
filename = Path(f'results/{name}/ner.html')
filename.open('w', encoding='utf-8').write(ner_svg)
# run keyword extraction
kw_model = KeyBERT()
keyword_extract(kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
keyword_extract(kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
keyword_extract(kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
outputs.append(Image.open(f'results/{name}/1_keyword.png'))
outputs.append(Image.open(f'results/{name}/2_keyword.png'))
outputs.append(Image.open(f'results/{name}/3_keyword.png'))
# keywords = kw_model.extract_keywords(descriptions, highlight=True)
# print(keywords)
# run key phrase extraction
get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png')
keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html')
outputs.append(Image.open(f'results/{name}/top_keyphrase.png'))
outputs += [ner_svg, keyphrase_svg]
return outputs
demo = gr.Interface(
fn=greet,
inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"),
gr.Textbox(lines=10, placeholder="All the descriptions")],
outputs=['image', 'image', 'image', 'image', 'image', 'image', 'image', 'html', 'html'],
)
demo.launch()