import gradio as gr from keybert import KeyBERT import os import spacy import string from spacy import displacy from pathlib import Path from PIL import Image from keyword_extraction import keyword_extract from keyphrase_extraction import get_top_key_phrases, display_key_phrases from word import show_gram_plot nlp = spacy.load("en_core_web_sm") def greet(name, descriptions): os.makedirs(f'results/{name}', exist_ok=True) outputs = [] descriptions = descriptions.translate(str.maketrans('', '', string.punctuation)) # run word count show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png') show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png') show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png') outputs.append(Image.open(f'results/{name}/1_gram.png')) outputs.append(Image.open(f'results/{name}/2_gram.png')) outputs.append(Image.open(f'results/{name}/3_gram.png')) # run named entity recognition spacy_descriptions = nlp(descriptions) # Create a visualization of named entities ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True) filename = Path(f'results/{name}/ner.html') filename.open('w', encoding='utf-8').write(ner_svg) # run keyword extraction kw_model = KeyBERT() keyword_extract(descriptions, kw_model, 1, save_output=f'results/{name}/{1}_keyword.png') keyword_extract(descriptions, kw_model, 2, save_output=f'results/{name}/{2}_keyword.png') keyword_extract(descriptions, kw_model, 3, save_output=f'results/{name}/{3}_keyword.png') outputs.append(Image.open(f'results/{name}/1_keyword.png')) outputs.append(Image.open(f'results/{name}/2_keyword.png')) outputs.append(Image.open(f'results/{name}/3_keyword.png')) # keywords = kw_model.extract_keywords(descriptions, highlight=True) # print(keywords) # run key phrase extraction get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png') keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html') outputs.append(Image.open(f'results/{name}/top_keyphrase.png')) outputs += [ner_svg, keyphrase_svg] return outputs demo = gr.Interface( fn=greet, inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"), gr.Textbox(lines=10, placeholder="All the descriptions")], outputs=['image', 'image', 'image', 'image', 'image', 'image', 'image', 'html', 'html'], ) demo.launch()