File size: 4,144 Bytes
a9ffc21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5439971
 
a9ffc21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from keybert import KeyBERT

# Initialize KeyBERT model
kw_model = KeyBERT(model="all-MiniLM-L6-v2")

# Example texts
EXAMPLES = {
    "Scientific Abstract": """
    Compatibility of systems of linear constraints over the set of natural numbers. 
    Criteria of compatibility of a system of linear Diophantine equations, strict inequations, 
    and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions 
    and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
    """,
    "News Article": """
    Machine learning is revolutionizing the way we interact with technology. 
    Artificial intelligence systems are becoming more sophisticated, enabling automated decision making 
    and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve, 
    making breakthroughs in natural language processing and computer vision.
    """,
    "Technical Documentation": """
    The user interface provides intuitive navigation through contextual menus and adaptive layouts. 
    System responses are optimized for performance while maintaining high reliability standards. 
    Database connections are pooled to minimize resource overhead and maximize throughput.
    """
}

def extract_keywords(text, num_words, ngram_range, diversity, use_mmr):
    # Convert ngram_range string to tuple
    min_n, max_n = map(int, ngram_range.split('-'))
    
    # Extract keywords
    keywords = kw_model.extract_keywords(
        text,
        keyphrase_ngram_range=(min_n, max_n),
        stop_words='english',
        top_n=num_words,
        use_mmr=use_mmr,
        diversity=diversity if use_mmr else None
    )
    
    # Format output
    result = []
    for keyword, score in keywords:
        result.append(f"β€’ {keyword:<30} (score: {score:.4f})")
    
    return "\n".join(result) if result else "No keywords found."

def load_example(example_name):
    return EXAMPLES.get(example_name, "")

# Create Gradio interface
with gr.Blocks(title="KeyBERT Keyword Extraction") as demo:
    gr.Markdown("# πŸ”‘ Keyword extraction using KeyBERT")
    gr.Markdown("**Developed by : Venugopal Adep**")
    gr.Markdown("Extract keywords using BERT embeddings and semantic similarity")
    
    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter your text here...",
                lines=8
            )
            example_dropdown = gr.Dropdown(
                choices=list(EXAMPLES.keys()),
                label="Load Example Text"
            )
        
        with gr.Column(scale=1):
            ngram_range = gr.Dropdown(
                choices=["1-1", "1-2", "1-3", "2-2", "2-3", "3-3"],
                value="1-2",
                label="Keyword Length (N-gram Range)"
            )
            
            num_words = gr.Slider(
                minimum=1,
                maximum=20,
                value=10,
                step=1,
                label="Number of Keywords"
            )
            
            use_mmr = gr.Checkbox(
                label="Use Maximal Marginal Relevance",
                value=True
            )
            
            diversity = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.5,
                step=0.1,
                label="Diversity (MMR)",
                interactive=True
            )
            
            extract_btn = gr.Button("Extract Keywords", variant="primary")
    
    output_text = gr.Textbox(
        label="Extracted Keywords",
        lines=12,
        interactive=False
    )
    
    # Set up event handlers
    example_dropdown.change(
        load_example,
        inputs=[example_dropdown],
        outputs=[input_text]
    )
    
    extract_btn.click(
        extract_keywords,
        inputs=[
            input_text,
            num_words,
            ngram_range,
            diversity,
            use_mmr
        ],
        outputs=[output_text]
    )

demo.launch()