import streamlit as st
import pickle


import numpy as np
import pandas as pd
import re
import string
import nltk      
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report 
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix


stopwords = nltk.corpus.stopwords.words('english')

def text_clean(text):
    clean_words = []
    
    word_L = text.split()
    for w in word_L:
        word_l = w.lower().strip()
        if word_l.isalpha():
            if len(word_l) > 3:
                if word_l not in stopwords :
                    clean_words.append(word_l)
                else:
                    continue
    return clean_words  

model = pickle.load(open("model.pickle", "rb"))
tfidf = pickle.load(open("tfidf.pickle","rb"))


st.header("NLP Consumer Complaints")

st.write("Please enter the complaint")

t = st.text_area("comp")

print(model.predict(tfidf.transform([t]).toarray()))

b = st.button("Submit")

if len(t) > 0 and b:
    st.write(model.predict(tfidf.transform([t]).toarray())[0])