from fastapi import FastAPI from sklearn.model_selection import train_test_split from request_body import request_body from utilities import * from classifier import Classifier # Download necessary modules nltk.download('punkt') # Get dataset and convert it into suitable form of input for the classification model filename = "airline_sentiment_analysis.csv" raw_data = get_data_for_training(filename) sentences, labels = get_data_and_labels(raw_data) sentences = get_word_embeddings(sentences) # Spliting the dataset into Training and Testing dataset to train the model # Train Set = 50% # Test Set = 50% airline_train_data, airline_test_data, airline_train_labels, airline_test_labels = train_test_split( sentences, labels, test_size=0.5, random_state=42) # Vectorize the sequence for both train and test datasets x_train = vectorize_sequence(airline_train_data, 20000) x_test = vectorize_sequence(airline_test_data, 20000) y_train = np.asarray(airline_train_labels).astype('float32') y_test = np.asarray(airline_test_labels).astype('float32') print(x_train.shape) print(x_test.shape) print(y_train.shape) print(y_test.shape) # # Naive Bayes # classifier_mnb = Classifier("Naive Bayes") # classifier_mnb.train(x_train, y_train) # print("NAIVE BAYES") # print("train shape: " + str(x_train.shape)) # print("score on test: " + str(classifier_mnb.score(x_test, y_test))) # print("score on train: " + str(classifier_mnb.score(x_train, y_train))) # Logistic Regression classifier_lr = Classifier("Logistic Regression") classifier_lr.train(x_train, y_train) print("LOGISTIC REGRESSION") print("train shape: " + str(x_train.shape)) print("score on test: " + str(classifier_lr.score(x_test, y_test))) print("score on train: " + str(classifier_lr.score(x_train, y_train))) # # KNN # classifier_knn = Classifier("KNN") # classifier_knn.train(x_train, y_train) # print("KNN") # print("train shape: " + str(x_train.shape)) # print("score on train: " + str(classifier_knn.score(x_train, y_train))) # print("score on test: " + str(classifier_knn.score(x_test, y_test))) # # Support Vector Machines # classifier_svm = Classifier("SVM") # classifier_svm.train(x_train, y_train) # print("SUPPORT VECTOR MACHINE") # print("train shape: " + str(x_train.shape)) # print("score on test: " + str(classifier_svm.score(x_test, y_test))) # print("score on train: " + str(classifier_svm.score(x_train, y_train))) app = FastAPI() @app.post('/predict') def predict(data: request_body): text = data.text sequence = get_sequence(text) sequence = vectorize_sequence(sequence, 20000) print(sequence) class_idx = classifier_lr.classify(sequence)[0] print(class_idx) class_idx = (int)(class_idx) if class_idx==1: return {'sentiment': "positive"} else: return {'sentiment': "negative"}