import streamlit as st
import pandas as pd
import joblib
from gensim import corpora, models
from PIL import Image

# Load the saved models and data
dictionary = joblib.load('joblibDeploy/new_deploy/doc2bow.sav')
lda_model = joblib.load('joblibDeploy/new_deploy/ldamodel.sav')

# Function to preprocess input text and get topic distribution
def get_topics(text):
    bow_vector = dictionary(text.split())
    topics = lda_model[bow_vector]
    return topics

# Function to get top keywords for a topic
def get_top_keywords(topic, num_keywords=10):
    topic = lda_model.show_topic(topic, topn=num_keywords)
    keywords = [f"{word} ({weight:.3f})" for word, weight in topic]
    return keywords

# Streamlit app
def main():
    st.title("Web Berita Topic Clustering 📰")

     # Sidebar with title and description
    st.sidebar.title("Topic Clustering")
    st.sidebar.write("Discover topics in news articles.")
    
    # Input text area for user to enter their text
    user_input = st.text_area("Enter your text here:", "")
    
    # Submit button
    if st.button("Submit"):
        if user_input:
            # Process the user's input and get topic distribution
            topics = get_topics(user_input)
            
            # Display the top topics
            st.subheader("🔥Top Topics🔥")
            for topic in topics:
                st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})")
                top_keywords = get_top_keywords(topic[0])
                st.markdown(", ".join(top_keywords))
                st.write("---")

    # Add a footer
    st.sidebar.markdown("---")
    st.sidebar.write("© 2023 Web Berita Topic Clustering")

if __name__ == "__main__":
    main()