Spaces:

zion581
/

sentiment_analysis_by_rohan

Runtime error

App Files Files Community

zion581 commited on Jul 18, 2022

Commit

2528553

•

1 Parent(s): 89a6762

Update main.py

Browse files

Files changed (1) hide show

main.py +94 -0

main.py CHANGED Viewed

	@@ -0,0 +1,94 @@

+import re
+from nltk.tokenize.treebank import TreebankWordDetokenizer
+import gensim
+from sklearn.model_selection import train_test_split
+import tensorflow as tf
+import keras
+import numpy as np
+import pandas as pd
+from keras.preprocessing.text import Tokenizer
+from keras_preprocessing.sequence import pad_sequences
+from keras.models import Sequential
+from keras import layers
+from keras.callbacks import ModelCheckpoint
+from fastapi import FastAPI
+from fastapi.openapi.utils import get_openapi
+from pydantic import BaseModel
+import streamlit as st
+app = FastAPI()
+csv_data = pd.read_csv('airline_sentiment_analysis.csv')
+train = csv_data[['airline_sentiment', 'text']]
+def purify_data(data):
+    url_pattern = re.compile(r'https?://\S+|www\.\S+')
+    data = url_pattern.sub(r'', data)
+    data = re.sub('\S*@\S*\s?', '', data)
+    data = re.sub('\.', '', data)
+    data = re.sub('\s+', ' ', data)
+    data = re.sub("\'", "", data)
+    data = re.sub(r'"', '', data)
+    return data
+temp = []
+# Splitting pd.Series to list
+data_to_list = train['text'].values.tolist()
+for i in range(len(data_to_list)):
+    temp.append(purify_data(data_to_list[i]))
+def sent_to_words(sentences):
+    for sentence in sentences:
+        yield (gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations
+data_words = list(sent_to_words(temp))
+def detokenize(text):
+    return TreebankWordDetokenizer().detokenize(text)
+data = []
+for i in range(len(data_words)):
+    data.append(detokenize(data_words[i]))
+data = np.array(data)
+labels = np.array(train['airline_sentiment'])
+y = []
+for i in range(len(labels)):
+    if labels[i] == 'positive':
+        y.append(1)
+    else:
+        y.append(0)
+y = np.array(y)
+labels = tf.keras.utils.to_categorical(y, 2, dtype="float32")
+del y
+max_words = 5000
+max_len = 200
+tokenizer = Tokenizer(num_words=max_words)
+tokenizer.fit_on_texts(data)
+sequences = tokenizer.texts_to_sequences(data)
+tweets = pad_sequences(sequences, maxlen=max_len)
+X_train, X_test, y_train, y_test = train_test_split(tweets, labels, random_state=0, test_size=0.1)
+best_model = keras.models.load_model("best_model3.hdf5")
+sentiment = ['Negative','Positive']
+text = st.text_area("Please enter the text here:")
+text = purify_data(text)
+sequence = tokenizer.texts_to_sequences([text])
+test = pad_sequences(sequence, maxlen=max_len)
+prediction = sentiment[np.around(best_model.predict(test), decimals=0).argmax(axis=1)[0]]
+if text:
+  out = prediction
+  st.json(out)