salsabilapl commited on
Commit
27230db
1 Parent(s): bfa6667

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -26
app.py CHANGED
@@ -5,20 +5,12 @@ from gensim import corpora, models
5
  from PIL import Image
6
 
7
  # Load the saved models and data
8
- dictionary = joblib.load('doc2bow.sav') # Load the dictionary
9
- lda_model = joblib.load('ldamodel.sav') # Load the LDA model
10
 
11
  # Function to preprocess input text and get topic distribution
12
- def preprocess(text):
13
- # Define your preprocessing logic here, as it was in your original code
14
- result = []
15
- for token in gensim.utils.simple_preprocess(text):
16
- if token not in gensim.parsing.preprocessing.STOPWORDS and token not in newStopWords and len(token) > 3:
17
- result.append(lemmatize_stemming(token))
18
- return result
19
-
20
  def get_topics(text):
21
- bow_vector = dictionary.doc2bow(preprocess(text))
22
  topics = lda_model[bow_vector]
23
  return topics
24
 
@@ -42,25 +34,20 @@ def main():
42
  # Submit button
43
  if st.button("Submit"):
44
  if user_input:
45
- zkata =[]
46
- # Masukkan user input ke dalam zkata
47
- zkata.append(user_input)
48
-
49
- # Gabungkan zkata menjadi satu string
50
- string = ' '.join([str(item) for item in zkata])
51
- tampung_kata = string
52
-
53
- # Lakukan pemrosesan pada `tampung_kata` dan dictionary
54
- bow_vector = dictionary.doc2bow(preprocess(tampung_kata))
55
-
56
- # Analisis topik
57
  st.subheader("🔥Top Topics🔥")
58
- for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1 * tup[1]):
59
- st.write(f"Score: {score}\t Topic: {index + 1} | {lda_model.print_topic(index, 10)}")
 
 
 
60
 
61
  # Add a footer
62
  st.sidebar.markdown("---")
63
  st.sidebar.write("© 2023 Web Berita Topic Clustering")
64
 
65
  if __name__ == "__main__":
66
- main()
 
5
  from PIL import Image
6
 
7
  # Load the saved models and data
8
+ dictionary = joblib.load('doc2bow.sav')
9
+ lda_model = joblib.load('ldamodel.sav')
10
 
11
  # Function to preprocess input text and get topic distribution
 
 
 
 
 
 
 
 
12
  def get_topics(text):
13
+ bow_vector = dictionary(text.split())
14
  topics = lda_model[bow_vector]
15
  return topics
16
 
 
34
  # Submit button
35
  if st.button("Submit"):
36
  if user_input:
37
+ # Process the user's input and get topic distribution
38
+ topics = get_topics(user_input)
39
+
40
+ # Display the top topics
 
 
 
 
 
 
 
 
41
  st.subheader("🔥Top Topics🔥")
42
+ for topic in topics:
43
+ st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})")
44
+ top_keywords = get_top_keywords(topic[0])
45
+ st.markdown(", ".join(top_keywords))
46
+ st.write("---")
47
 
48
  # Add a footer
49
  st.sidebar.markdown("---")
50
  st.sidebar.write("© 2023 Web Berita Topic Clustering")
51
 
52
  if __name__ == "__main__":
53
+ main()