text_classification_1 / prediction.py
didev007's picture
Upload 8 files
9942e88 verified
import streamlit as st
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import re
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
# Load the model
loaded_model = load_model('model_rnn')
# Create a dictionary to map the labels to the categories
label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
16: 'Tabungan & Investasi'}
def preprocessing(text):
'''
Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
'''
# Lowercase the text
text = text.lower()
# Normalize the text
text = re.sub(r'\d+', '', text) # Remove numbers
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
# Tokenize the text
tokens = word_tokenize(text)
# Get the English stopwords
stop_words = set(stopwords.words('indonesian'))
stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
# Remove stopwords
tokens = [word for word in tokens if word not in stop_words]
# Lemmatize the text
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(word) for word in tokens]
# Combine tokens back into a single string
text = ' '.join(tokens)
return text
def run():
st.title('Notes Categorization')
default = "konser twice"
user_input = st.text_area("Enter the notes text here:", default, height=50)
if st.button('Predict'):
# Apply the function to the 'Text' column in the data
text_processed = preprocessing(user_input)
# The model expects input data in batch, even if just predicting on one sample
# So, I'll add an extra dimension with np.expand_dims
preprocessed_notes = np.expand_dims(text_processed, axis=0)
# get the prediction
predictions = loaded_model.predict(preprocessed_notes)
# get the class with the highest probability
predicted_class = np.argmax(predictions[0])
# Decode the predicted class into the original category
predicted_category = label_dict[predicted_class]
st.write(f'The predicted category is: {predicted_category}')
if __name__ == '__main__':
main()