Spaces:

dhanikitkat
/

demo-topic-detection

Sleeping

App Files Files Community

demo-topic-detection / app.py

dhanikitkat

Update app.py

6b5839d verified 9 months ago

raw

history blame

3.77 kB

	import streamlit as st
	import re
	import pandas as pd
	from transformers import pipeline
	from gensim.models import LdaModel
	from gensim.corpora import Dictionary

	# Function to preprocess text
	def text_preprocess(teks):
	teks = teks.lower()
	teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
	teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
	teks = re.sub(r"\\n", " ", teks)
	teks = teks.strip()
	teks = re.sub(r"http\S+", " ", teks)
	teks = re.sub(r"www.\S+", " ", teks)
	teks = re.sub("[^A-Za-z\s']", " ", teks)
	return teks

	# Function to perform inference and get the topic with the highest probability
	def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
	new_bow = dictionary.doc2bow(new_document.split())
	topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)

	highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
	topic_id, probability = highest_probability_topic
	topic_name = topic_names.get(topic_id, f"Topic {topic_id}")

	return topic_name, probability

	# Load sentiment analysis model
	pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
	nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)

	# Streamlit app
	def main():
	st.title("Sentiment Analysis and Topic Inference App")
	st.write("Enter your text below:")
	input_text = st.text_area("Input Text")

	if st.button("Analyze Sentiment"):
	processed_text = text_preprocess(input_text)
	result = nlp(processed_text)
	sentiment = result[0]['label']
	probability = result[0]['score']
	st.write("Sentiment:", sentiment)
	st.write("Probability:", probability)

	if st.button("Infer Topic"):
	lda_model = LdaModel.load("lda.model")
	dictionary = Dictionary.load("dictionary.dict")
	topic_names = {0: 'Kurang Memuaskan',
	1: 'Aplikasi Lambat',
	2: 'Aplikasi Error',
	3: 'Sulit Sinkronisasi',
	4: 'Tidak Bisa Login',
	5: 'Aplikasi Sulit Dibuka',
	6: 'Aplikasi Keseringan Update',
	7: 'Neutral',
	8: 'Aplikasi Bug',
	9: 'Pelayanan Buruk',
	10: 'Aplikasi Tidak Bisa Digunakan',
	11: 'Aplikasi Belum Update',
	12: 'Aplikasi Bug/Lag',
	13: 'Sulit Komplain',
	14: 'Gangguan Server',
	15: 'Tidak Bisa Update',
	16: 'Tidak Bisa Download',
	17: 'Jaringan Bermasalah',
	18: 'Transaksi Lambat',
	19: 'Tidak Bisa Buka Aplikasi',
	20: 'Terlalu Banyak Iklan',
	21: 'Verifikasi Wajah Gagal',
	22: 'Pengajuan Pinjaman',
	23: 'Sms Kode Otp Tidak Masuk',
	24: 'Sulit Pengajuan Pinjaman',
	25: 'Tidak Bisa Transaksi / Lambat',
	26: 'Sulit Daftar',
	27: 'Sulit Transfer',
	28: 'Banyak Potongan',
	29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang',
	30: 'Proses Kta Lama',
	31: 'Aplikasi Tidak Real Time',
	32: 'Kesulitan Pengajuan Kartu Kredit',
	33: 'Mesin Atm Error',
	}

	inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
	st.write("Inferred Topic:", inferred_topic)
	st.write("Inference Probability:", inferred_probability)

	if __name__ == "__main__":
	main()