LDA-Modelling / app.py
salsabilapl's picture
Update app.py
bfa6667
raw
history blame
No virus
2.32 kB
import streamlit as st
import pandas as pd
import joblib
from gensim import corpora, models
from PIL import Image
# Load the saved models and data
dictionary = joblib.load('doc2bow.sav') # Load the dictionary
lda_model = joblib.load('ldamodel.sav') # Load the LDA model
# Function to preprocess input text and get topic distribution
def preprocess(text):
# Define your preprocessing logic here, as it was in your original code
result = []
for token in gensim.utils.simple_preprocess(text):
if token not in gensim.parsing.preprocessing.STOPWORDS and token not in newStopWords and len(token) > 3:
result.append(lemmatize_stemming(token))
return result
def get_topics(text):
bow_vector = dictionary.doc2bow(preprocess(text))
topics = lda_model[bow_vector]
return topics
# Function to get top keywords for a topic
def get_top_keywords(topic, num_keywords=10):
topic = lda_model.show_topic(topic, topn=num_keywords)
keywords = [f"{word} ({weight:.3f})" for word, weight in topic]
return keywords
# Streamlit app
def main():
st.title("Web Berita Topic Clustering Untuk Program Kedaireka UMKM📰")
# Sidebar with title and description
st.sidebar.title("Topic Clustering")
st.sidebar.write("Discover topics in news articles.")
# Input text area for user to enter their text
user_input = st.text_area("Enter your text here:", "")
# Submit button
if st.button("Submit"):
if user_input:
zkata =[]
# Masukkan user input ke dalam zkata
zkata.append(user_input)
# Gabungkan zkata menjadi satu string
string = ' '.join([str(item) for item in zkata])
tampung_kata = string
# Lakukan pemrosesan pada `tampung_kata` dan dictionary
bow_vector = dictionary.doc2bow(preprocess(tampung_kata))
# Analisis topik
st.subheader("🔥Top Topics🔥")
for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1 * tup[1]):
st.write(f"Score: {score}\t Topic: {index + 1} | {lda_model.print_topic(index, 10)}")
# Add a footer
st.sidebar.markdown("---")
st.sidebar.write("© 2023 Web Berita Topic Clustering")
if __name__ == "__main__":
main()