Spaces:
Sleeping
Sleeping
File size: 2,611 Bytes
9b9ea2f 03c0d73 9b9ea2f 0b8e9c2 9b9ea2f 03c0d73 9b9ea2f 721c493 e69b90c 9b9ea2f e69b90c 9b9ea2f 5051113 c082b57 9b9ea2f 8451095 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
from keybert import KeyBERT
import os
import spacy
import string
from spacy import displacy
from pathlib import Path
from PIL import Image
from keyword_extraction import keyword_extract
from keyphrase_extraction import get_top_key_phrases, display_key_phrases
from word import show_gram_plot
nlp = spacy.load("en_core_web_sm")
def greet(name, descriptions):
os.makedirs(f'results/{name}', exist_ok=True)
outputs = []
descriptions = descriptions.translate(str.maketrans('', '', string.punctuation))
# run word count
show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png')
show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png')
show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png')
outputs.append(Image.open(f'results/{name}/1_gram.png'))
outputs.append(Image.open(f'results/{name}/2_gram.png'))
outputs.append(Image.open(f'results/{name}/3_gram.png'))
# run named entity recognition
spacy_descriptions = nlp(descriptions)
# Create a visualization of named entities
ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True)
filename = Path(f'results/{name}/ner.html')
filename.open('w', encoding='utf-8').write(ner_svg)
# run keyword extraction
kw_model = KeyBERT()
keyword_extract(descriptions, kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
# keyword_extract(descriptions, kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
# keyword_extract(descriptions, kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
outputs.append(Image.open(f'results/{name}/1_keyword.png'))
# outputs.append(Image.open(f'results/{name}/2_keyword.png'))
# outputs.append(Image.open(f'results/{name}/3_keyword.png'))
# keywords = kw_model.extract_keywords(descriptions, highlight=True)
# print(keywords)
# run key phrase extraction
get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png')
keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html')
outputs.append(Image.open(f'results/{name}/top_keyphrase.png'))
outputs += [ner_svg, keyphrase_svg]
return outputs
demo = gr.Interface(
fn=greet,
inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"),
gr.Textbox(lines=10, placeholder="All the descriptions for analysis")],
outputs=['image', 'image', 'image', 'image', 'image', 'html', 'html'],
)
demo.launch(server_name='0.0.0.0', server_port=7860) |