keybert / app.py
varun500's picture
Update app.py
2020d9c
raw
history blame
1.82 kB
import streamlit as st
from keybert import KeyBERT
# Create a KeyBERT instance
kw_model = KeyBERT()
# Define the Streamlit app
def main():
st.title("Keyword Extraction")
st.write("Enter your document below:")
# Get user input
doc = st.text_area("Document")
# Get user choice for stopwords removal
remove_stopwords = st.checkbox("Remove Stopwords")
# Extract keywords
if st.button("Extract Keywords"):
keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english")
# Get user choice for MMR
apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)")
if apply_mmr:
# Apply Maximal Marginal Relevance (MMR)
selected_keywords = []
selected_keywords.append(keywords[0]) # Select the top-scoring keyword
# Set the MMR hyperparameters
lambda_param = 0.7 # Weight for the trade-off between relevance and diversity
num_keywords = 5 # Number of keywords to select
for i in range(1, num_keywords):
selected_keywords_scores = [kw[1] for kw in selected_keywords]
remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]]
mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param)
max_mmr_index = mmr_scores.index(max(mmr_scores))
selected_keywords.append(remaining_keywords[max_mmr_index])
keywords = selected_keywords # Update keywords with MMR-selected keywords
st.write("Keywords:")
for keyword, score in keywords:
st.write(f"- {keyword} (Score: {score})")
# Run the app
if __name__ == "__main__":
main()