|
import streamlit as st |
|
from keybert import KeyBERT |
|
|
|
|
|
kw_model = KeyBERT() |
|
|
|
|
|
def main(): |
|
st.title("Keyword Extraction") |
|
st.write("Enter your document below:") |
|
|
|
|
|
doc = st.text_area("Document") |
|
|
|
|
|
remove_stopwords = st.checkbox("Remove Stopwords") |
|
|
|
|
|
if st.button("Extract Keywords"): |
|
keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english") |
|
|
|
|
|
apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)") |
|
|
|
if apply_mmr: |
|
|
|
selected_keywords = [] |
|
selected_keywords.append(keywords[0]) |
|
|
|
|
|
lambda_param = 0.7 |
|
num_keywords = 5 |
|
|
|
for i in range(1, num_keywords): |
|
selected_keywords_scores = [kw[1] for kw in selected_keywords] |
|
remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]] |
|
mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param) |
|
max_mmr_index = mmr_scores.index(max(mmr_scores)) |
|
selected_keywords.append(remaining_keywords[max_mmr_index]) |
|
|
|
keywords = selected_keywords |
|
|
|
st.write("Keywords:") |
|
for keyword, score in keywords: |
|
st.write(f"- {keyword} (Score: {score})") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|