import streamlit as st import pandas as pd import numpy as np from tensorflow.keras.models import load_model import re import nltk nltk.download('punkt') from nltk.tokenize import word_tokenize nltk.download('stopwords') from nltk.corpus import stopwords nltk.download('wordnet') from nltk.stem import WordNetLemmatizer # Load the model loaded_model = load_model('model_rnn') # Create a dictionary to map the labels to the categories label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up', 5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal', 9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan', 13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi', 16: 'Tabungan & Investasi'} def preprocessing(text): ''' Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization ''' # Lowercase the text text = text.lower() # Normalize the text text = re.sub(r'\d+', '', text) # Remove numbers text = re.sub(r'[^\w\s]', '', text) # Remove punctuation text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces # Tokenize the text tokens = word_tokenize(text) # Get the English stopwords stop_words = set(stopwords.words('indonesian')) stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"]) # Remove stopwords tokens = [word for word in tokens if word not in stop_words] # Lemmatize the text lemmatizer = WordNetLemmatizer() tokens = [lemmatizer.lemmatize(word) for word in tokens] # Combine tokens back into a single string text = ' '.join(tokens) return text def run(): st.title('Notes Categorization') default = "konser twice" user_input = st.text_area("Enter the notes text here:", default, height=50) if st.button('Predict'): # Apply the function to the 'Text' column in the data text_processed = preprocessing(user_input) # The model expects input data in batch, even if just predicting on one sample # So, I'll add an extra dimension with np.expand_dims preprocessed_notes = np.expand_dims(text_processed, axis=0) # get the prediction predictions = loaded_model.predict(preprocessed_notes) # get the class with the highest probability predicted_class = np.argmax(predictions[0]) # Decode the predicted class into the original category predicted_category = label_dict[predicted_class] st.write(f'The predicted category is: {predicted_category}') if __name__ == '__main__': main()