Spaces:

didev007
/

testing_text_classification

Sleeping

App Files Files Community

didev007 commited on Mar 20

Commit

381d880

•

1 Parent(s): d52d1f8

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
app.py +24 -0
model_rnn/fingerprint.pb +3 -0
model_rnn/keras_metadata.pb +3 -0
model_rnn/saved_model.pb +3 -0
model_rnn/variables/variables.data-00000-of-00001 +3 -0
model_rnn/variables/variables.index +0 -0
prediction.py +83 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_rnn/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import pandas as pd
+import streamlit as st
+import prediction
+# Set the page title and favicon
+st.set_page_config(page_title="Notes Text Classification",
+                layout='wide',
+                initial_sidebar_state='expanded'
+)
+# Create a sidebar with a title and a selection box
+st.sidebar.title("Choose a page:")
+page = st.sidebar.selectbox("", ('Landing Page' , 'Data Prediction'))
+# Display different content depending on the selected page
+if page == 'Data Prediction':
+    prediction.run()
+else:
+    # Add a header and a subheader with some text
+    st.title("What category does this note belong to?")
+    st.subheader("Find out the category with this space that uses NLP to do predictions.")
+    # Add an image about the case
+    st.image("https://imageio.forbes.com/specials-images/imageserve/60808d87824ab7edc3770486/Note-Pad-and-Pen-on-Yellow-background/960x0.jpg?height=474&width=711&fit=bounds")

model_rnn/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37d606dfcfb95b10bb426d66179918955507942eef68355640a24ec95fd18535
+size 57

model_rnn/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c769156491524cd04ccbcc89614a1c17acee03d76d4e99e6e73b6655a13a022f
+size 35540

model_rnn/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92fcd21f1a911fc986008fe7fd965c96a46293d0dd31546d0feddfc6e249be0d
+size 5117841

model_rnn/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3077fbe89ebff4ce89b4501a9c4b34782544026b7c815a3fc6e772e1b324a023
+size 275723988

model_rnn/variables/variables.index ADDED Viewed

Binary file (4.25 kB). View file

prediction.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import tensorflow_hub as tf_hub
+from tensorflow.keras.models import load_model
+import re
+import nltk
+nltk.download('punkt')
+from nltk.tokenize import word_tokenize
+nltk.download('stopwords')
+from nltk.corpus import stopwords
+nltk.download('wordnet')
+from nltk.stem import WordNetLemmatizer
+# Load the model
+loaded_model = load_model('model_rnn')
+# Create a dictionary to map the labels to the categories
+label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
+             5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
+             9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
+             13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
+             16: 'Tabungan & Investasi'}
+def preprocessing(text):
+    '''
+    Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
+    '''
+    # Lowercase the text
+    text = text.lower()
+    # Normalize the text
+    text = re.sub(r'\d+', '', text)  # Remove numbers
+    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+    text = re.sub(r'\s+', ' ', text).strip()  # Remove whitespaces
+    # Tokenize the text
+    tokens = word_tokenize(text)
+    # Get the English stopwords
+    stop_words = set(stopwords.words('indonesian'))
+    stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
+    # Remove stopwords
+    tokens = [word for word in tokens if word not in stop_words]
+    # Lemmatize the text
+    lemmatizer = WordNetLemmatizer()
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]
+    # Combine tokens back into a single string
+    text = ' '.join(tokens)
+    return text
+def run():
+    st.title('Notes Categorization')
+    default = "konser twice"
+    user_input = st.text_area("Enter the notes text here:", default, height=50)
+    if st.button('Predict'):
+        # Apply the function to the 'Text' column in the data
+        text_processed = preprocessing(user_input)
+        # The model expects input data in batch, even if just predicting on one sample
+        # So, I'll add an extra dimension with np.expand_dims
+        preprocessed_notes = np.expand_dims(text_processed, axis=0)
+        # get the prediction
+        predictions = loaded_model.predict(preprocessed_notes)
+        # get the class with the highest probability
+        predicted_class = np.argmax(predictions[0])
+        # Decode the predicted class into the original category
+        predicted_category = label_dict[predicted_class]
+        st.write(f'The predicted category is: {predicted_category}')
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

File without changes