Spaces:
Sleeping
Sleeping
import streamlit as st | |
import re | |
import pandas as pd | |
from transformers import pipeline | |
from gensim.models import LdaModel | |
from gensim.corpora import Dictionary | |
# Function to preprocess text | |
def text_preprocess(teks): | |
teks = teks.lower() | |
teks = re.sub("@[A-Za-z0-9_]+", " ", teks) | |
teks = re.sub("#[A-Za-z0-9_]+", " ", teks) | |
teks = re.sub(r"\\n", " ", teks) | |
teks = teks.strip() | |
teks = re.sub(r"http\S+", " ", teks) | |
teks = re.sub(r"www.\S+", " ", teks) | |
teks = re.sub("[^A-Za-z\s']", " ", teks) | |
return teks | |
# Function to perform inference and get the topic with the highest probability | |
def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names): | |
new_bow = dictionary.doc2bow(new_document.split()) | |
topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0) | |
highest_probability_topic = max(topic_distribution, key=lambda x: x[1]) | |
topic_id, probability = highest_probability_topic | |
topic_name = topic_names.get(topic_id, f"Topic {topic_id}") | |
return topic_name, probability | |
# Load sentiment analysis model | |
pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier" | |
nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name) | |
# Streamlit app | |
def main(): | |
st.title("Sentiment Analysis and Topic Inference App") | |
st.write("Enter your text below:") | |
input_text = st.text_area("Input Text") | |
if st.button("Analyze Sentiment"): | |
processed_text = text_preprocess(input_text) | |
result = nlp(processed_text) | |
sentiment = result[0]['label'] | |
probability = result[0]['score'] | |
st.write("Sentiment:", sentiment) | |
st.write("Probability:", probability) | |
if st.button("Infer Topic"): | |
lda_model = LdaModel.load("lda.model") | |
dictionary = Dictionary.load("dictionary.dict") | |
topic_names = {0: 'Kurang Memuaskan', | |
1: 'Aplikasi Lambat', | |
2: 'Aplikasi Error', | |
3: 'Sulit Sinkronisasi', | |
4: 'Tidak Bisa Login', | |
5: 'Aplikasi Sulit Dibuka', | |
6: 'Aplikasi Keseringan Update', | |
7: 'Neutral', | |
8: 'Aplikasi Bug', | |
9: 'Pelayanan Buruk', | |
10: 'Aplikasi Tidak Bisa Digunakan', | |
11: 'Aplikasi Belum Update', | |
12: 'Aplikasi Bug/Lag', | |
13: 'Sulit Komplain', | |
14: 'Gangguan Server', | |
15: 'Tidak Bisa Update', | |
16: 'Tidak Bisa Download', | |
17: 'Jaringan Bermasalah', | |
18: 'Transaksi Lambat', | |
19: 'Tidak Bisa Buka Aplikasi', | |
20: 'Terlalu Banyak Iklan', | |
21: 'Verifikasi Wajah Gagal', | |
22: 'Pengajuan Pinjaman', | |
23: 'Sms Kode Otp Tidak Masuk', | |
24: 'Sulit Pengajuan Pinjaman', | |
25: 'Tidak Bisa Transaksi / Lambat', | |
26: 'Sulit Daftar', | |
27: 'Sulit Transfer', | |
28: 'Banyak Potongan', | |
29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang', | |
30: 'Proses Kta Lama', | |
31: 'Aplikasi Tidak Real Time', | |
32: 'Kesulitan Pengajuan Kartu Kredit', | |
33: 'Mesin Atm Error', | |
} | |
inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names) | |
st.write("Inferred Topic:", inferred_topic) | |
st.write("Inference Probability:", inferred_probability) | |
if __name__ == "__main__": | |
main() | |