|
import gradio as gr |
|
from gensim.models import TfidfModel |
|
from gensim.corpora import Dictionary |
|
from gensim.utils import simple_preprocess |
|
from gensim.parsing.preprocessing import remove_stopwords |
|
import numpy as np |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
EXAMPLES = { |
|
"Scientific Abstract": """ |
|
Compatibility of systems of linear constraints over the set of natural numbers. |
|
Criteria of compatibility of a system of linear Diophantine equations, strict inequations, |
|
and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions |
|
and algorithms of construction of minimal generating sets of solutions for all types of systems are given. |
|
""", |
|
"News Article": """ |
|
Machine learning is revolutionizing the way we interact with technology. |
|
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making |
|
and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve, |
|
making breakthroughs in natural language processing and computer vision. |
|
""", |
|
"Technical Documentation": """ |
|
The user interface provides intuitive navigation through contextual menus and adaptive layouts. |
|
System responses are optimized for performance while maintaining high reliability standards. |
|
Database connections are pooled to minimize resource overhead and maximize throughput. |
|
""" |
|
} |
|
|
|
def preprocess_text(text): |
|
|
|
text = remove_stopwords(text) |
|
|
|
tokens = simple_preprocess(text, deacc=True) |
|
return ' '.join(tokens) |
|
|
|
|
|
def extract_keywords(text, num_keywords=10, scores=True, min_length=1): |
|
|
|
processed_text = remove_stopwords(text.lower()) |
|
tokens = simple_preprocess(processed_text, deacc=True) |
|
|
|
|
|
dictionary = Dictionary([tokens]) |
|
corpus = [dictionary.doc2bow(tokens)] |
|
|
|
|
|
tfidf = TfidfModel(corpus) |
|
tfidf_corpus = tfidf[corpus][0] |
|
|
|
|
|
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
results = [] |
|
for word_id, score in sorted_keywords: |
|
word = dictionary[word_id] |
|
if len(word.split()) >= min_length: |
|
if scores: |
|
results.append(f"β’ {word:<30} (score: {score:.4f})") |
|
else: |
|
results.append(f"β’ {word}") |
|
if len(results) >= num_keywords: |
|
break |
|
|
|
return "\n".join(results) if results else "No keywords found." |
|
|
|
|
|
extract_btn.click( |
|
extract_keywords, |
|
inputs=[input_text, num_keywords, show_scores, min_length], |
|
outputs=[output_text] |
|
) |
|
|
|
def load_example(example_name): |
|
return EXAMPLES.get(example_name, "") |
|
|
|
|
|
with gr.Blocks(title="Gensim Keyword Extraction") as demo: |
|
gr.Markdown("# π Gensim Keyword Extraction") |
|
gr.Markdown("Extract keywords using Gensim's text processing capabilities") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
input_text = gr.Textbox( |
|
label="Input Text", |
|
placeholder="Enter your text here...", |
|
lines=8 |
|
) |
|
example_dropdown = gr.Dropdown( |
|
choices=list(EXAMPLES.keys()), |
|
label="Load Example Text" |
|
) |
|
|
|
with gr.Column(scale=1): |
|
ratio = gr.Slider( |
|
minimum=1, |
|
maximum=100, |
|
value=20, |
|
step=1, |
|
label="Keyword Ratio (%)" |
|
) |
|
|
|
min_length = gr.Slider( |
|
minimum=1, |
|
maximum=5, |
|
value=1, |
|
step=1, |
|
label="Minimum Words per Keyword" |
|
) |
|
|
|
show_scores = gr.Checkbox( |
|
label="Show Relevance Scores", |
|
value=True |
|
) |
|
|
|
extract_btn = gr.Button( |
|
"Extract Keywords", |
|
variant="primary" |
|
) |
|
|
|
output_text = gr.Textbox( |
|
label="Extracted Keywords", |
|
lines=10, |
|
interactive=False |
|
) |
|
|
|
|
|
example_dropdown.change( |
|
load_example, |
|
inputs=[example_dropdown], |
|
outputs=[input_text] |
|
) |
|
|
|
extract_btn.click( |
|
extract_keywords, |
|
inputs=[ |
|
input_text, |
|
ratio, |
|
show_scores, |
|
min_length |
|
], |
|
outputs=[output_text] |
|
) |
|
|
|
demo.launch() |