import streamlit as st import pandas as pd from keras import Sequential from keras.layers import Dense,Embedding from keras.utils import pad_sequences from keras.preprocessing.text import Tokenizer st.title("Spam-NonSpam Detector") Input=st.text_area("Input the message below") if st.button("Check"): st.text("Process may take upto a minute. Please be patient. Thank you!") df=pd.read_csv("mail_data.csv") df.loc[df['Category'] == 'spam', 'Category'] = 0 df.loc[df['Category'] == 'ham', 'Category'] = 1 X = df['Message'] Y = df['Category'] from keras.utils import pad_sequences tokenizer = Tokenizer() docs=X.astype("string") tokenizer.fit_on_texts(docs) sequences = tokenizer.texts_to_sequences(docs) sequences = pad_sequences(sequences,padding='post',maxlen=61) voc_size=len(tokenizer.word_index) model = Sequential() model.add(Embedding(voc_size+1,2,input_length=61)) model.add(Dense(5,activation="relu")) model.add(Dense(5,activation="relu")) model.add(Dense(1, activation='sigmoid')) X=sequences Y=Y.to_numpy() Y=Y.astype("int") Y=Y.reshape(-1,1) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.fit(X,Y,epochs=21) Input=[Input] seq=tokenizer.texts_to_sequences(Input) inp=pad_sequences(seq,padding='post',maxlen=61) a=model.predict(inp) value=a.argmax() st.text("Input:") st.markdown(Input[0]) st.text("Output:") if (value==1): st.text('Non-spam message') else: st.text('Spam message')