Spaces:
Runtime error
Runtime error
Commit
•
27230db
1
Parent(s):
bfa6667
Update app.py
Browse files
app.py
CHANGED
@@ -5,20 +5,12 @@ from gensim import corpora, models
|
|
5 |
from PIL import Image
|
6 |
|
7 |
# Load the saved models and data
|
8 |
-
dictionary = joblib.load('doc2bow.sav')
|
9 |
-
lda_model = joblib.load('ldamodel.sav')
|
10 |
|
11 |
# Function to preprocess input text and get topic distribution
|
12 |
-
def preprocess(text):
|
13 |
-
# Define your preprocessing logic here, as it was in your original code
|
14 |
-
result = []
|
15 |
-
for token in gensim.utils.simple_preprocess(text):
|
16 |
-
if token not in gensim.parsing.preprocessing.STOPWORDS and token not in newStopWords and len(token) > 3:
|
17 |
-
result.append(lemmatize_stemming(token))
|
18 |
-
return result
|
19 |
-
|
20 |
def get_topics(text):
|
21 |
-
bow_vector = dictionary.
|
22 |
topics = lda_model[bow_vector]
|
23 |
return topics
|
24 |
|
@@ -42,25 +34,20 @@ def main():
|
|
42 |
# Submit button
|
43 |
if st.button("Submit"):
|
44 |
if user_input:
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
# Gabungkan zkata menjadi satu string
|
50 |
-
string = ' '.join([str(item) for item in zkata])
|
51 |
-
tampung_kata = string
|
52 |
-
|
53 |
-
# Lakukan pemrosesan pada `tampung_kata` dan dictionary
|
54 |
-
bow_vector = dictionary.doc2bow(preprocess(tampung_kata))
|
55 |
-
|
56 |
-
# Analisis topik
|
57 |
st.subheader("🔥Top Topics🔥")
|
58 |
-
for
|
59 |
-
st.write(f"
|
|
|
|
|
|
|
60 |
|
61 |
# Add a footer
|
62 |
st.sidebar.markdown("---")
|
63 |
st.sidebar.write("© 2023 Web Berita Topic Clustering")
|
64 |
|
65 |
if __name__ == "__main__":
|
66 |
-
main()
|
|
|
5 |
from PIL import Image
|
6 |
|
7 |
# Load the saved models and data
|
8 |
+
dictionary = joblib.load('doc2bow.sav')
|
9 |
+
lda_model = joblib.load('ldamodel.sav')
|
10 |
|
11 |
# Function to preprocess input text and get topic distribution
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def get_topics(text):
|
13 |
+
bow_vector = dictionary(text.split())
|
14 |
topics = lda_model[bow_vector]
|
15 |
return topics
|
16 |
|
|
|
34 |
# Submit button
|
35 |
if st.button("Submit"):
|
36 |
if user_input:
|
37 |
+
# Process the user's input and get topic distribution
|
38 |
+
topics = get_topics(user_input)
|
39 |
+
|
40 |
+
# Display the top topics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
st.subheader("🔥Top Topics🔥")
|
42 |
+
for topic in topics:
|
43 |
+
st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})")
|
44 |
+
top_keywords = get_top_keywords(topic[0])
|
45 |
+
st.markdown(", ".join(top_keywords))
|
46 |
+
st.write("---")
|
47 |
|
48 |
# Add a footer
|
49 |
st.sidebar.markdown("---")
|
50 |
st.sidebar.write("© 2023 Web Berita Topic Clustering")
|
51 |
|
52 |
if __name__ == "__main__":
|
53 |
+
main()
|