#importing the necessary libraries import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer from keybert import KeyBERT from keyphrase_vectorizers import KeyphraseCountVectorizer # Defining a function to read in the text file def read_in_text(url): with open(url, 'r') as file: article = file.read() return article tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction') kw_extractor = KeyBERT(tmp_model) def get_keybert_results_with_vectorizer(file, number_of_results=20): try: text = read_in_text(file.name) keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results) keywords = [i for i in keywords if i[1] >= 0.25] keybert_diversity_phrases = [] for i, j in keywords: keybert_diversity_phrases.append(i) output_df = pd.DataFrame() output_df['keyword'] = np.array(keybert_diversity_phrases) return output_df.head(20) except Exception: return "Error" demo = gr.Interface(get_keybert_results_with_vectorizer, inputs=gr.inputs.File(), outputs=gr.outputs.Dataframe(), title = "Keyword Extraction") if __name__ == "__main__": demo.launch(debug=True)