{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b8101bc5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Embedding, LSTM, Dense\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "# Load the preprocessed data\n", "train_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_train_data.csv\")\n", "test_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_test_data.csv\")\n", "\n", "# Tokenize the text data\n", "tokenizer = Tokenizer()\n", "tokenizer.fit_on_texts(train_data['text'])\n", "\n", "train_sequences = tokenizer.texts_to_sequences(train_data['text'])\n", "test_sequences = tokenizer.texts_to_sequences(test_data['text'])\n", "\n", "# Pad sequences to ensure uniform length\n", "max_length = max(len(seq) for seq in train_sequences)\n", "train_sequences = pad_sequences(train_sequences, maxlen=max_length)\n", "test_sequences = pad_sequences(test_sequences, maxlen=max_length)\n", "\n", "# Encode sentiment labels\n", "label_encoder = LabelEncoder()\n", "train_labels = label_encoder.fit_transform(train_data['sentiment'])\n", "test_labels = label_encoder.transform(test_data['sentiment'])\n", "\n", "# Define and compile the model\n", "model = Sequential()\n", "model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))\n", "model.add(LSTM(units=128))\n", "model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))\n", "\n", "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n", "\n", "# Train the model\n", "model.fit(train_sequences, train_labels, epochs=3, batch_size=16, validation_split=0.2)\n", "\n", "# Evaluate the model\n", "test_loss, test_accuracy = model.evaluate(test_sequences, test_labels)\n", "print(f'Test Accuracy: {test_accuracy}')\n", "\n", "# Save the trained model\n", "model.save(\"/Users/saish/Downloads/sentitensor1.keras\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }