File size: 4,921 Bytes
a0d2064
 
 
 
dc1f625
a0d2064
0e99301
 
 
 
 
a0d2064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05dc10b
 
a0d2064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import spacy
from collections import Counter
from string import punctuation
import os

# Check if model is installed, if not download it
if not spacy.util.is_package("en_core_web_sm"):
    os.system("pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl")


# Load the English language model
nlp = spacy.load("en_core_web_sm")

# Example texts
EXAMPLES = {
    "Scientific Abstract": """
    Compatibility of systems of linear constraints over the set of natural numbers. 
    Criteria of compatibility of a system of linear Diophantine equations, strict inequations, 
    and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions 
    and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
    """,
    "News Article": """
    Machine learning is revolutionizing the way we interact with technology. 
    Artificial intelligence systems are becoming more sophisticated, enabling automated decision making 
    and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve, 
    making breakthroughs in natural language processing and computer vision.
    """,
    "Technical Documentation": """
    The user interface provides intuitive navigation through contextual menus and adaptive layouts. 
    System responses are optimized for performance while maintaining high reliability standards. 
    Database connections are pooled to minimize resource overhead and maximize throughput.
    """
}

def extract_keywords(text, num_keywords, extraction_type, include_phrases):
    doc = nlp(text)
    
    # Remove stopwords and punctuation
    words = [token.text.lower() for token in doc
             if not token.is_stop and not token.is_punct and token.text.strip()]
    
    # Extract noun phrases if requested
    phrases = []
    if include_phrases:
        phrases = [chunk.text.lower() for chunk in doc.noun_chunks
                  if len(chunk.text.split()) > 1]
    
    # Extract keywords based on selected method
    keywords = []
    if extraction_type == "Nouns":
        keywords = [token.text.lower() for token in doc
                   if token.pos_ == "NOUN" and not token.is_stop]
    elif extraction_type == "Named Entities":
        keywords = [ent.text.lower() for ent in doc.ents]
    elif extraction_type == "All Words":
        keywords = words
    
    # Combine keywords and phrases
    all_keywords = keywords + phrases
    
    # Count frequencies
    keyword_freq = Counter(all_keywords)
    
    # Sort by frequency and get top keywords
    top_keywords = sorted(keyword_freq.items(), key=lambda x: x[1], reverse=True)[:num_keywords]
    
    # Format output
    result = []
    for idx, (keyword, freq) in enumerate(top_keywords, 1):
        result.append(f"{idx}. {keyword} (frequency: {freq})")
    
    return "\n".join(result) if result else "No keywords found."

def load_example(example_name):
    return EXAMPLES.get(example_name, "")

# Create Gradio interface
with gr.Blocks(title="Keyword Extraction Tool") as demo:
    gr.Markdown("# 🔍 Keyword extraction using Spacy")
    gr.Markdown("**Developed by : Venugopal Adep**")
    gr.Markdown("Extract keywords using spaCy's natural language processing")
    
    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter your text here...",
                lines=8
            )
            example_dropdown = gr.Dropdown(
                choices=list(EXAMPLES.keys()),
                label="Load Example Text"
            )
        
        with gr.Column(scale=1):
            extraction_type = gr.Radio(
                choices=["Nouns", "Named Entities", "All Words"],
                value="Nouns",
                label="Extraction Method"
            )
            
            include_phrases = gr.Checkbox(
                label="Include Noun Phrases",
                value=True
            )
            
            num_keywords = gr.Slider(
                minimum=1,
                maximum=20,
                value=10,
                step=1,
                label="Number of Keywords"
            )
            
            extract_btn = gr.Button("Extract Keywords", variant="primary")
    
    output_text = gr.Textbox(
        label="Extracted Keywords",
        lines=10,
        interactive=False
    )
    
    # Set up event handlers
    example_dropdown.change(
        load_example,
        inputs=[example_dropdown],
        outputs=[input_text]
    )
    
    extract_btn.click(
        extract_keywords,
        inputs=[
            input_text,
            num_keywords,
            extraction_type,
            include_phrases
        ],
        outputs=[output_text]
    )

# Launch the app
demo.launch()