maynaS's picture
Upload main.py
106f27e
from fastapi import FastAPI
from sklearn.model_selection import train_test_split
from request_body import request_body
from utilities import *
from classifier import Classifier
# Download necessary modules
nltk.download('punkt')
# Get dataset and convert it into suitable form of input for the classification model
filename = "airline_sentiment_analysis.csv"
raw_data = get_data_for_training(filename)
sentences, labels = get_data_and_labels(raw_data)
sentences = get_word_embeddings(sentences)
# Spliting the dataset into Training and Testing dataset to train the model
# Train Set = 50%
# Test Set = 50%
airline_train_data, airline_test_data, airline_train_labels, airline_test_labels = train_test_split(
sentences, labels, test_size=0.5, random_state=42)
# Vectorize the sequence for both train and test datasets
x_train = vectorize_sequence(airline_train_data, 20000)
x_test = vectorize_sequence(airline_test_data, 20000)
y_train = np.asarray(airline_train_labels).astype('float32')
y_test = np.asarray(airline_test_labels).astype('float32')
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
# # Naive Bayes
# classifier_mnb = Classifier("Naive Bayes")
# classifier_mnb.train(x_train, y_train)
# print("NAIVE BAYES")
# print("train shape: " + str(x_train.shape))
# print("score on test: " + str(classifier_mnb.score(x_test, y_test)))
# print("score on train: " + str(classifier_mnb.score(x_train, y_train)))
# Logistic Regression
classifier_lr = Classifier("Logistic Regression")
classifier_lr.train(x_train, y_train)
print("LOGISTIC REGRESSION")
print("train shape: " + str(x_train.shape))
print("score on test: " + str(classifier_lr.score(x_test, y_test)))
print("score on train: " + str(classifier_lr.score(x_train, y_train)))
# # KNN
# classifier_knn = Classifier("KNN")
# classifier_knn.train(x_train, y_train)
# print("KNN")
# print("train shape: " + str(x_train.shape))
# print("score on train: " + str(classifier_knn.score(x_train, y_train)))
# print("score on test: " + str(classifier_knn.score(x_test, y_test)))
# # Support Vector Machines
# classifier_svm = Classifier("SVM")
# classifier_svm.train(x_train, y_train)
# print("SUPPORT VECTOR MACHINE")
# print("train shape: " + str(x_train.shape))
# print("score on test: " + str(classifier_svm.score(x_test, y_test)))
# print("score on train: " + str(classifier_svm.score(x_train, y_train)))
app = FastAPI()
@app.post('/predict')
def predict(data: request_body):
text = data.text
sequence = get_sequence(text)
sequence = vectorize_sequence(sequence, 20000)
print(sequence)
class_idx = classifier_lr.classify(sequence)[0]
print(class_idx)
class_idx = (int)(class_idx)
if class_idx==1:
return {'sentiment': "positive"}
else:
return {'sentiment': "negative"}