import streamlit as st import pandas as pd import joblib from gensim import corpora, models from PIL import Image # Load the saved models and data dictionary = joblib.load('joblibDeploy/new_deploy/doc2bow.sav') lda_model = joblib.load('joblibDeploy/new_deploy/ldamodel.sav') # Function to preprocess input text and get topic distribution def get_topics(text): bow_vector = dictionary(text.split()) topics = lda_model[bow_vector] return topics # Function to get top keywords for a topic def get_top_keywords(topic, num_keywords=10): topic = lda_model.show_topic(topic, topn=num_keywords) keywords = [f"{word} ({weight:.3f})" for word, weight in topic] return keywords # Streamlit app def main(): st.title("Web Berita Topic Clustering 📰") # Sidebar with title and description st.sidebar.title("Topic Clustering") st.sidebar.write("Discover topics in news articles.") # Input text area for user to enter their text user_input = st.text_area("Enter your text here:", "") # Submit button if st.button("Submit"): if user_input: # Process the user's input and get topic distribution topics = get_topics(user_input) # Display the top topics st.subheader("🔥Top Topics🔥") for topic in topics: st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})") top_keywords = get_top_keywords(topic[0]) st.markdown(", ".join(top_keywords)) st.write("---") # Add a footer st.sidebar.markdown("---") st.sidebar.write("© 2023 Web Berita Topic Clustering") if __name__ == "__main__": main()