Spaces:

didev007
/

testing_text_classification

Sleeping

App Files Files Community

didev007 commited on Mar 20, 2024

Commit

3ea0cf8

verified ·

1 Parent(s): 24c0483

Upload prediction.py

Browse files

Files changed (1) hide show

prediction.py +82 -0

prediction.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from tensorflow.keras.models import load_model
+import re
+import nltk
+nltk.download('punkt')
+from nltk.tokenize import word_tokenize
+nltk.download('stopwords')
+from nltk.corpus import stopwords
+nltk.download('wordnet')
+from nltk.stem import WordNetLemmatizer
+# Load the model
+loaded_model = load_model('model_rnn')
+# Create a dictionary to map the labels to the categories
+label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
+             5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
+             9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
+             13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
+             16: 'Tabungan & Investasi'}
+def preprocessing(text):
+    '''
+    Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
+    '''
+    # Lowercase the text
+    text = text.lower()
+    # Normalize the text
+    text = re.sub(r'\d+', '', text)  # Remove numbers
+    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+    text = re.sub(r'\s+', ' ', text).strip()  # Remove whitespaces
+    # Tokenize the text
+    tokens = word_tokenize(text)
+    # Get the English stopwords
+    stop_words = set(stopwords.words('indonesian'))
+    stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
+    # Remove stopwords
+    tokens = [word for word in tokens if word not in stop_words]
+    # Lemmatize the text
+    lemmatizer = WordNetLemmatizer()
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]
+    # Combine tokens back into a single string
+    text = ' '.join(tokens)
+    return text
+def run():
+    st.title('Notes Categorization')
+    default = "konser twice"
+    user_input = st.text_area("Enter the notes text here:", default, height=50)
+    if st.button('Predict'):
+        # Apply the function to the 'Text' column in the data
+        text_processed = preprocessing(user_input)
+        # The model expects input data in batch, even if just predicting on one sample
+        # So, I'll add an extra dimension with np.expand_dims
+        preprocessed_notes = np.expand_dims(text_processed, axis=0)
+        # get the prediction
+        predictions = loaded_model.predict(preprocessed_notes)
+        # get the class with the highest probability
+        predicted_class = np.argmax(predictions[0])
+        # Decode the predicted class into the original category
+        predicted_category = label_dict[predicted_class]
+        st.write(f'The predicted category is: {predicted_category}')
+if __name__ == '__main__':
+    main()