bainskarman commited on
Commit
04eb1fc
·
verified ·
1 Parent(s): 8483b27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -78
app.py CHANGED
@@ -1,79 +1,81 @@
1
- import streamlit as st
2
- import pickle
3
- import numpy as np
4
- import os
5
- from tensorflow.keras.models import load_model
6
- import numpy as np
7
- import pandas as pd
8
- import re
9
- import nltk
10
- from nltk.stem import WordNetLemmatizer
11
- from nltk.tokenize import word_tokenize
12
- import matplotlib.pyplot as plt
13
- import seaborn as sns
14
- model = load_model('best_model.keras')
15
- # Load the tokenizer
16
- with open('tokenizer.pkl' ,'rb') as f:
17
- tokenizer = pickle.load(f)
18
-
19
- # Load the label encoder
20
- with open('label_encoder.pkl', 'rb') as f:
21
- label_encoder = pickle.load(f)
22
-
23
- # Load max_length
24
- with open('max_length.pkl', 'rb') as f:
25
- max_length = pickle.load(f)
26
-
27
- # Load stop words
28
- with open('stop_words.pkl', 'rb') as f:
29
- stop_words = pickle.load(f)
30
-
31
- lemmatizer = WordNetLemmatizer()
32
- def preprocess_text(text):
33
- text = str(text)
34
- text = text.lower()
35
- text = re.sub(r'[^a-z\s]', '', text)
36
- words = text.split()
37
- st_words = stop_words
38
- words = [word for word in words if word not in stop_words]
39
- words = [lemmatizer.lemmatize(word) for word in words]
40
- text = ' '.join(words)
41
- return text
42
- def classify_text(text):
43
- text = preprocess_text(text)
44
- seq = tokenizer.texts_to_sequences([text])
45
- padded_seq = np.pad(seq, ((0, 0), (0, max_length - len(seq[0]))), mode='constant')
46
-
47
- prediction = model.predict(padded_seq)
48
- predicted_label_index = np.argmax(prediction, axis=1)[0]
49
- predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
50
- categories = predicted_label.split('|')
51
-
52
- if len(categories) == 3:
53
- main_category = categories[0]
54
- sub_category = categories[1]
55
- lowest_category = categories[2]
56
- else:
57
- main_category = "Unknown"
58
- sub_category = "Unknown"
59
- lowest_category = "Unknown"
60
- return main_category, sub_category, lowest_category
61
-
62
-
63
- # Streamlit UI
64
- def main():
65
- st.title("Text Classifier")
66
-
67
- # Text input
68
- user_input = st.text_input("Enter text to classify")
69
-
70
- if st.button("Classify"):
71
- if user_input:
72
- # Classify input text
73
- main_category, sub_category, lowest_category = classify_text(user_input)
74
- st.success(f"Main Category: {main_category}, Sub Category: {sub_category}, Lowest Category: {lowest_category}")
75
- else:
76
- st.warning("Please enter some text.")
77
-
78
- if __name__ == '__main__':
 
 
79
  main()
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+ import os
5
+ from tensorflow.keras.models import load_model
6
+ import numpy as np
7
+ import pandas as pd
8
+ import re
9
+ import nltk
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.tokenize import word_tokenize
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+ import nltk
15
+ nltk.download('wordnet')
16
+ model = load_model('best_model.keras')
17
+ # Load the tokenizer
18
+ with open('tokenizer.pkl' ,'rb') as f:
19
+ tokenizer = pickle.load(f)
20
+
21
+ # Load the label encoder
22
+ with open('label_encoder.pkl', 'rb') as f:
23
+ label_encoder = pickle.load(f)
24
+
25
+ # Load max_length
26
+ with open('max_length.pkl', 'rb') as f:
27
+ max_length = pickle.load(f)
28
+
29
+ # Load stop words
30
+ with open('stop_words.pkl', 'rb') as f:
31
+ stop_words = pickle.load(f)
32
+
33
+ lemmatizer = WordNetLemmatizer()
34
+ def preprocess_text(text):
35
+ text = str(text)
36
+ text = text.lower()
37
+ text = re.sub(r'[^a-z\s]', '', text)
38
+ words = text.split()
39
+ st_words = stop_words
40
+ words = [word for word in words if word not in stop_words]
41
+ words = [lemmatizer.lemmatize(word) for word in words]
42
+ text = ' '.join(words)
43
+ return text
44
+ def classify_text(text):
45
+ text = preprocess_text(text)
46
+ seq = tokenizer.texts_to_sequences([text])
47
+ padded_seq = np.pad(seq, ((0, 0), (0, max_length - len(seq[0]))), mode='constant')
48
+
49
+ prediction = model.predict(padded_seq)
50
+ predicted_label_index = np.argmax(prediction, axis=1)[0]
51
+ predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
52
+ categories = predicted_label.split('|')
53
+
54
+ if len(categories) == 3:
55
+ main_category = categories[0]
56
+ sub_category = categories[1]
57
+ lowest_category = categories[2]
58
+ else:
59
+ main_category = "Unknown"
60
+ sub_category = "Unknown"
61
+ lowest_category = "Unknown"
62
+ return main_category, sub_category, lowest_category
63
+
64
+
65
+ # Streamlit UI
66
+ def main():
67
+ st.title("Text Classifier")
68
+
69
+ # Text input
70
+ user_input = st.text_input("Enter text to classify")
71
+
72
+ if st.button("Classify"):
73
+ if user_input:
74
+ # Classify input text
75
+ main_category, sub_category, lowest_category = classify_text(user_input)
76
+ st.success(f"Main Category: {main_category}, Sub Category: {sub_category}, Lowest Category: {lowest_category}")
77
+ else:
78
+ st.warning("Please enter some text.")
79
+
80
+ if __name__ == '__main__':
81
  main()