dhanikitkat's picture
Update app.py
6b5839d verified
raw
history blame
3.77 kB
import streamlit as st
import re
import pandas as pd
from transformers import pipeline
from gensim.models import LdaModel
from gensim.corpora import Dictionary
# Function to preprocess text
def text_preprocess(teks):
teks = teks.lower()
teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
teks = re.sub(r"\\n", " ", teks)
teks = teks.strip()
teks = re.sub(r"http\S+", " ", teks)
teks = re.sub(r"www.\S+", " ", teks)
teks = re.sub("[^A-Za-z\s']", " ", teks)
return teks
# Function to perform inference and get the topic with the highest probability
def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
new_bow = dictionary.doc2bow(new_document.split())
topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)
highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
topic_id, probability = highest_probability_topic
topic_name = topic_names.get(topic_id, f"Topic {topic_id}")
return topic_name, probability
# Load sentiment analysis model
pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)
# Streamlit app
def main():
st.title("Sentiment Analysis and Topic Inference App")
st.write("Enter your text below:")
input_text = st.text_area("Input Text")
if st.button("Analyze Sentiment"):
processed_text = text_preprocess(input_text)
result = nlp(processed_text)
sentiment = result[0]['label']
probability = result[0]['score']
st.write("Sentiment:", sentiment)
st.write("Probability:", probability)
if st.button("Infer Topic"):
lda_model = LdaModel.load("lda.model")
dictionary = Dictionary.load("dictionary.dict")
topic_names = {0: 'Kurang Memuaskan',
1: 'Aplikasi Lambat',
2: 'Aplikasi Error',
3: 'Sulit Sinkronisasi',
4: 'Tidak Bisa Login',
5: 'Aplikasi Sulit Dibuka',
6: 'Aplikasi Keseringan Update',
7: 'Neutral',
8: 'Aplikasi Bug',
9: 'Pelayanan Buruk',
10: 'Aplikasi Tidak Bisa Digunakan',
11: 'Aplikasi Belum Update',
12: 'Aplikasi Bug/Lag',
13: 'Sulit Komplain',
14: 'Gangguan Server',
15: 'Tidak Bisa Update',
16: 'Tidak Bisa Download',
17: 'Jaringan Bermasalah',
18: 'Transaksi Lambat',
19: 'Tidak Bisa Buka Aplikasi',
20: 'Terlalu Banyak Iklan',
21: 'Verifikasi Wajah Gagal',
22: 'Pengajuan Pinjaman',
23: 'Sms Kode Otp Tidak Masuk',
24: 'Sulit Pengajuan Pinjaman',
25: 'Tidak Bisa Transaksi / Lambat',
26: 'Sulit Daftar',
27: 'Sulit Transfer',
28: 'Banyak Potongan',
29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang',
30: 'Proses Kta Lama',
31: 'Aplikasi Tidak Real Time',
32: 'Kesulitan Pengajuan Kartu Kredit',
33: 'Mesin Atm Error',
}
inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
st.write("Inferred Topic:", inferred_topic)
st.write("Inference Probability:", inferred_probability)
if __name__ == "__main__":
main()