{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "IMPORTS" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.preprocessing import LabelEncoder\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from sklearn.model_selection import train_test_split\n", "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader, TensorDataset\n", "import torch.optim as optim\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "LOAD DATA" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ " Character Morse Code\n", "0 A .-\n", "1 B -...\n", "2 C -.-.\n", "3 D -..\n", "4 E ." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Checking Data types" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(str, str)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df['Character'][0]), type(df['Morse Code'][0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "ENCODE THE STRINGS" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "lb = LabelEncoder()\n", "df['Character'] = lb.fit_transform(df['Character'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "ENCODE THE MORSE CODES
\n", "'.' -> 0,
\n", "'-' -> 1,
\n", "' ' -> 2 PADDING" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ " Character Morse Code Morse Code Enc\n", "0 25 .- [0, 1]\n", "1 26 -... [1, 0, 0, 0]\n", "2 27 -.-. [1, 0, 1, 0]\n", "3 28 -.. [1, 0, 0]\n", "4 29 . [0]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "morse_dict = {'.':0,'-':1,' ':2}\n", "df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max_length = df['Morse Code Enc'].apply(len).max()\n", "max_length" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Adding Padding to equalize the length of each morse code enocoded to max length" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[0, 2, 2, 2, 2, 2, 2, 2]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'],maxlen = max_length, padding='post', value=2).tolist()\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Taking Features and Labels" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "X = df['Character'].values\n", "y = df['Morse Code Enc'].tolist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Splitting Data (Traditional Way) (NOT PREFERRED) (Scroll Down for torch approach)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "X_train_tensor = torch.tensor(X_train, dtype=torch.long).view(-1, 1)\n", "X_test_tensor = torch.tensor(X_test, dtype=torch.long)\n", "y_train_tensor = torch.tensor(y_train, dtype=torch.long).view(-1, 1)\n", "y_test_tensor = torch.tensor(y_test, dtype=torch.long)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "class MorseH_Model(nn.Module):\n", " def __init__(self, input_size, output_size, max_length):\n", " super(MorseH_Model, self).__init__()\n", " # Embedding layer to represent each character as a vector\n", " self.emmbedding = nn.Embedding(input_size, 16)\n", "\n", " # Linear Layers\n", " self.fc1 = nn.Linear(16, 32)\n", " self.fc2 = nn.Linear(32, output_size*max_length)\n", "\n", " #Reshaping output shape to match morse code shape\n", " self.output_size = output_size\n", " self.max_length = max_length\n", " \n", " def forward(self, x):\n", " # Pass input through embedding layer\n", " x = self.emmbedding(x).view(-1, 16)\n", " x = torch.relu(self.fc1(x))\n", " x = self.fc2(x)\n", "\n", " return x.view(-1, self.max_length, self.output_size)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MorseH_Model(\n", " (emmbedding): Embedding(54, 16)\n", " (fc1): Linear(in_features=16, out_features=32, bias=True)\n", " (fc2): Linear(in_features=32, out_features=24, bias=True)\n", ")" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_size = len(lb.classes_)\n", "output_size = 3\n", "max_len = max_length\n", "model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n", "# Load the weights into a new model\n", "model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n", "model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Prepare Data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "\n", "X = torch.tensor(df['Character'].values, dtype=torch.long)\n", "y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)\n", "\n", "data = TensorDataset(X, y)\n", "loader = DataLoader(data, batch_size=16, shuffle=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define Loss Function and Optimizer" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.Adam(model.parameters(), lr = 0.001)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Training Loop" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# num_epochs = 20\n", "# for epoch in range(num_epochs):\n", "# model.train()\n", "# running_loss = 0.0\n", "# for inputs, targets in loader:\n", "# optimizer.zero_grad() # Reset gradients\n", "# outputs = model(inputs) # Forward Pass\n", "\n", "# # Redhape for Loss Calculation\n", "# targets = targets.view(-1)\n", "# outputs = outputs.view(-1, output_size)\n", "\n", "# loss = criterion(outputs, targets) # Calculate loss\n", "# loss.backward() # Backward Pass\n", "# optimizer.step() # Update weights\n", "\n", "# running_loss += loss.item()\n", " \n", "# print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Evaluating Trained Model" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# model.eval() # set model to evaluation mode\n", "# sample_size = 10\n", "# correct = 0\n", "# total = 0\n", "# with torch.no_grad():\n", "# for i in range(sample_size):\n", "# input_sample = X[i].unsqueeze(0)\n", "# target_sample = y[i]\n", "\n", "# output = model(input_sample)\n", "# _, predicted = torch.max(output.data, 2)\n", "\n", "# total += target_sample.size(0)\n", "# correct += (predicted.squeeze()==target_sample).sum().item()\n", "\n", "# accuracy = 100*correct/total\n", "# print(f'Accuracy on sample of training set: {accuracy:.2f}%')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Predicting and Decoding the Predicted Output" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "def predict(char_index):\n", " with torch.no_grad():\n", " output = model(torch.tensor([char_index]))\n", " _, prediction = torch.max(output, 2)\n", " return prediction[0]\n", "\n", "def decode(prediction):\n", " # Removing Padding\n", " prediction = [p for p in prediction if p!=2]\n", " decode_symb = ['.' if c == 0 else '-' for c in prediction]\n", " morse_code = ''.join(decode_symb)\n", " return morse_code" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def encode(word):\n", " word = word.upper()\n", " return [lb.transform([c])[0] for c in word]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Testing with Some Random Data" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['.- .--. .--. .-.. . ',\n", " '-... .- .-.. .-.. ',\n", " '-.-. .- - ',\n", " '-..- -- .- ... -....- - .-. . . ']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trancode_list = [\"apple\", \"ball\", \"cat\" ,\"xmas-tree\"]\n", "def get_morse_word(word):\n", " char_indices = encode(word)\n", " decoded = []\n", " for ind in char_indices:\n", " pred = predict(ind)\n", " decoded.append(decode(pred))\n", " decoded.append(' ')\n", " return ''.join(decoded)\n", "codes = [get_morse_word(word) for word in trancode_list]\n", "codes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Testing with long Sentences" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[['Be', 'yourself;', 'everyone', 'else', 'is', 'already', 'taken.'],\n", " ['So', 'many', 'books', 'so', 'little', 'time.'],\n", " ['Two',\n", " 'things',\n", " 'are',\n", " 'infinite:',\n", " 'the',\n", " 'universe',\n", " 'and',\n", " 'human',\n", " 'stupidity;',\n", " 'and',\n", " \"I'm\",\n", " 'not',\n", " 'sure',\n", " 'about',\n", " 'the',\n", " 'universe.']]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trancode_sentences = [\"Be yourself; everyone else is already taken.\", \"So many books so little time.\", \"Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.\" ]\n", "trancode_lists = [ sen.split(' ') for sen in trancode_sentences ]\n", "trancode_lists" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['-... . -.-- --- ..- .-. ... . .-.. ..-. -.-.-. . ...- . .-. -.-- --- -. . . .-.. ... . .. ... .- .-.. .-. . .- -.. -.-- - .- -.- . -. .-.-.- ',\n", " '... --- -- .- -. -.-- -... --- --- -.- ... ... --- .-.. .. - - .-.. . - .. -- . .-.-.- ',\n", " '- .-- --- - .... .. -. --. ... .- .-. . .. -. ..-. .. -. .. - . ---... - .... . ..- -. .. ...- . .-. ... . .- -. -.. .... ..- -- .- -. ... - ..- .--. .. -.. .. - -.-- -.-.-. .- -. -.. .. .----. -- -. --- - ... ..- .-. . .- -... --- ..- - - .... . ..- -. .. ...- . .-. ... . .-.-.- ']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_morse_codes = []\n", "for l1 in trancode_lists:\n", " codes = [get_morse_word(word)+' ' for word in l1]\n", " get_morse_codes.append(''.join(codes))\n", "get_morse_codes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### INFERENCE API" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "- . -- .--. . .-. .- - ..- .-. . " ] } ], "source": [ "import time\n", "take_input = input(\"Type your message: \")\n", "response = [get_morse_word(word)+' ' for word in take_input.split()]\n", "response = ''.join(response)\n", "for i in response:\n", " print(i, end=\"\")\n", " # time.sleep(100*pow(10, -3)) FUN" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Save the model's weights\n", "torch.save(model.state_dict(), 'morse_model_weights.pth')\n", "\n", "# Load the weights into a new model\n", "model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n", "\n", "# Set the model to evaluation mode\n", "model.eval()\n", "# Save the entire model\n", "torch.save(model, 'complete_model.pth')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MorseH_Model(\n", " (emmbedding): Embedding(54, 16)\n", " (fc1): Linear(in_features=16, out_features=32, bias=True)\n", " (fc2): Linear(in_features=32, out_features=24, bias=True)\n", ")" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# Save the model weights as pytorch_model.bin\n", "import torch\n", "torch.save(model.state_dict(), \"pytorch_model.bin\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To Use it later" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# # Instantiate the model (ensure it has the same architecture)\n", "# model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n", "\n", "# # Load the saved weights\n", "# model.load_state_dict(torch.load(\"pytorch_model.bin\"))\n", "\n", "# # Set the model to evaluation mode if needed\n", "# model.eval()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }