File size: 1,354 Bytes
4dad73d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#importing the necessary libraries

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from keybert import KeyBERT
from keyphrase_vectorizers import KeyphraseCountVectorizer

# Defining a function to read in the text file

def read_in_text(url):
  with open(url, 'r') as file:
    article = file.read()
    return article
    
tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction')
kw_extractor = KeyBERT(tmp_model)

def get_keybert_results_with_vectorizer(file, number_of_results=20):
    try:
        text = read_in_text(file.name)
        keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
        keywords = [i for i in keywords if i[1] >= 0.25]

        keybert_diversity_phrases = []
        for i, j in keywords:
          keybert_diversity_phrases.append(i)

        output_df = pd.DataFrame()
        output_df['keyword'] = np.array(keybert_diversity_phrases)
        return output_df.head(20)
    except Exception:
        return "Error"
        
demo = gr.Interface(get_keybert_results_with_vectorizer, inputs=gr.inputs.File(),
                    outputs=gr.outputs.Dataframe(),
                    title = "Keyword Extraction")
                    
if __name__ == "__main__":
    demo.launch(debug=True)