didev007 commited on
Commit
381d880
1 Parent(s): d52d1f8

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_rnn/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import prediction
4
+
5
+ # Set the page title and favicon
6
+ st.set_page_config(page_title="Notes Text Classification",
7
+ layout='wide',
8
+ initial_sidebar_state='expanded'
9
+ )
10
+
11
+ # Create a sidebar with a title and a selection box
12
+ st.sidebar.title("Choose a page:")
13
+ page = st.sidebar.selectbox("", ('Landing Page' , 'Data Prediction'))
14
+
15
+ # Display different content depending on the selected page
16
+ if page == 'Data Prediction':
17
+ prediction.run()
18
+ else:
19
+ # Add a header and a subheader with some text
20
+ st.title("What category does this note belong to?")
21
+ st.subheader("Find out the category with this space that uses NLP to do predictions.")
22
+
23
+ # Add an image about the case
24
+ st.image("https://imageio.forbes.com/specials-images/imageserve/60808d87824ab7edc3770486/Note-Pad-and-Pen-on-Yellow-background/960x0.jpg?height=474&width=711&fit=bounds")
model_rnn/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d606dfcfb95b10bb426d66179918955507942eef68355640a24ec95fd18535
3
+ size 57
model_rnn/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c769156491524cd04ccbcc89614a1c17acee03d76d4e99e6e73b6655a13a022f
3
+ size 35540
model_rnn/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92fcd21f1a911fc986008fe7fd965c96a46293d0dd31546d0feddfc6e249be0d
3
+ size 5117841
model_rnn/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3077fbe89ebff4ce89b4501a9c4b34782544026b7c815a3fc6e772e1b324a023
3
+ size 275723988
model_rnn/variables/variables.index ADDED
Binary file (4.25 kB). View file
 
prediction.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import tensorflow_hub as tf_hub
5
+ from tensorflow.keras.models import load_model
6
+ import re
7
+ import nltk
8
+ nltk.download('punkt')
9
+ from nltk.tokenize import word_tokenize
10
+ nltk.download('stopwords')
11
+ from nltk.corpus import stopwords
12
+ nltk.download('wordnet')
13
+ from nltk.stem import WordNetLemmatizer
14
+
15
+ # Load the model
16
+ loaded_model = load_model('model_rnn')
17
+
18
+ # Create a dictionary to map the labels to the categories
19
+ label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
20
+ 5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
21
+ 9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
22
+ 13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
23
+ 16: 'Tabungan & Investasi'}
24
+
25
+ def preprocessing(text):
26
+ '''
27
+ Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
28
+ '''
29
+ # Lowercase the text
30
+ text = text.lower()
31
+
32
+ # Normalize the text
33
+ text = re.sub(r'\d+', '', text) # Remove numbers
34
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
35
+ text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
36
+
37
+ # Tokenize the text
38
+ tokens = word_tokenize(text)
39
+
40
+ # Get the English stopwords
41
+ stop_words = set(stopwords.words('indonesian'))
42
+ stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
43
+
44
+ # Remove stopwords
45
+ tokens = [word for word in tokens if word not in stop_words]
46
+
47
+ # Lemmatize the text
48
+ lemmatizer = WordNetLemmatizer()
49
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
50
+
51
+ # Combine tokens back into a single string
52
+ text = ' '.join(tokens)
53
+
54
+ return text
55
+
56
+ def run():
57
+ st.title('Notes Categorization')
58
+
59
+ default = "konser twice"
60
+
61
+ user_input = st.text_area("Enter the notes text here:", default, height=50)
62
+
63
+ if st.button('Predict'):
64
+ # Apply the function to the 'Text' column in the data
65
+ text_processed = preprocessing(user_input)
66
+
67
+ # The model expects input data in batch, even if just predicting on one sample
68
+ # So, I'll add an extra dimension with np.expand_dims
69
+ preprocessed_notes = np.expand_dims(text_processed, axis=0)
70
+
71
+ # get the prediction
72
+ predictions = loaded_model.predict(preprocessed_notes)
73
+
74
+ # get the class with the highest probability
75
+ predicted_class = np.argmax(predictions[0])
76
+
77
+ # Decode the predicted class into the original category
78
+ predicted_category = label_dict[predicted_class]
79
+
80
+ st.write(f'The predicted category is: {predicted_category}')
81
+
82
+ if __name__ == '__main__':
83
+ main()
requirements.txt ADDED
File without changes