{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"IMPORTS"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from sklearn.model_selection import train_test_split\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import DataLoader, TensorDataset\n",
"import torch.optim as optim\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"LOAD DATA"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Character | \n",
" Morse Code | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A | \n",
" .- | \n",
"
\n",
" \n",
" 1 | \n",
" B | \n",
" -... | \n",
"
\n",
" \n",
" 2 | \n",
" C | \n",
" -.-. | \n",
"
\n",
" \n",
" 3 | \n",
" D | \n",
" -.. | \n",
"
\n",
" \n",
" 4 | \n",
" E | \n",
" . | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Character Morse Code\n",
"0 A .-\n",
"1 B -...\n",
"2 C -.-.\n",
"3 D -..\n",
"4 E ."
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Checking Data types"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(str, str)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df['Character'][0]), type(df['Morse Code'][0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ENCODE THE STRINGS"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"lb = LabelEncoder()\n",
"df['Character'] = lb.fit_transform(df['Character'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ENCODE THE MORSE CODES
\n",
"'.' -> 0,
\n",
"'-' -> 1,
\n",
"' ' -> 2 PADDING"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Character | \n",
" Morse Code | \n",
" Morse Code Enc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 25 | \n",
" .- | \n",
" [0, 1] | \n",
"
\n",
" \n",
" 1 | \n",
" 26 | \n",
" -... | \n",
" [1, 0, 0, 0] | \n",
"
\n",
" \n",
" 2 | \n",
" 27 | \n",
" -.-. | \n",
" [1, 0, 1, 0] | \n",
"
\n",
" \n",
" 3 | \n",
" 28 | \n",
" -.. | \n",
" [1, 0, 0] | \n",
"
\n",
" \n",
" 4 | \n",
" 29 | \n",
" . | \n",
" [0] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Character Morse Code Morse Code Enc\n",
"0 25 .- [0, 1]\n",
"1 26 -... [1, 0, 0, 0]\n",
"2 27 -.-. [1, 0, 1, 0]\n",
"3 28 -.. [1, 0, 0]\n",
"4 29 . [0]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"morse_dict = {'.':0,'-':1,' ':2}\n",
"df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_length = df['Morse Code Enc'].apply(len).max()\n",
"max_length"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding Padding to equalize the length of each morse code enocoded to max length"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Character | \n",
" Morse Code | \n",
" Morse Code Enc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 25 | \n",
" .- | \n",
" [0, 1, 2, 2, 2, 2, 2, 2] | \n",
"
\n",
" \n",
" 1 | \n",
" 26 | \n",
" -... | \n",
" [1, 0, 0, 0, 2, 2, 2, 2] | \n",
"
\n",
" \n",
" 2 | \n",
" 27 | \n",
" -.-. | \n",
" [1, 0, 1, 0, 2, 2, 2, 2] | \n",
"
\n",
" \n",
" 3 | \n",
" 28 | \n",
" -.. | \n",
" [1, 0, 0, 2, 2, 2, 2, 2] | \n",
"
\n",
" \n",
" 4 | \n",
" 29 | \n",
" . | \n",
" [0, 2, 2, 2, 2, 2, 2, 2] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Character Morse Code Morse Code Enc\n",
"0 25 .- [0, 1, 2, 2, 2, 2, 2, 2]\n",
"1 26 -... [1, 0, 0, 0, 2, 2, 2, 2]\n",
"2 27 -.-. [1, 0, 1, 0, 2, 2, 2, 2]\n",
"3 28 -.. [1, 0, 0, 2, 2, 2, 2, 2]\n",
"4 29 . [0, 2, 2, 2, 2, 2, 2, 2]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'],maxlen = max_length, padding='post', value=2).tolist()\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Taking Features and Labels"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"X = df['Character'].values\n",
"y = df['Morse Code Enc'].tolist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Splitting Data (Traditional Way) (NOT PREFERRED) (Scroll Down for torch approach)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"X_train_tensor = torch.tensor(X_train, dtype=torch.long).view(-1, 1)\n",
"X_test_tensor = torch.tensor(X_test, dtype=torch.long)\n",
"y_train_tensor = torch.tensor(y_train, dtype=torch.long).view(-1, 1)\n",
"y_test_tensor = torch.tensor(y_test, dtype=torch.long)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"class MorseH_Model(nn.Module):\n",
" def __init__(self, input_size, output_size, max_length):\n",
" super(MorseH_Model, self).__init__()\n",
" # Embedding layer to represent each character as a vector\n",
" self.emmbedding = nn.Embedding(input_size, 16)\n",
"\n",
" # Linear Layers\n",
" self.fc1 = nn.Linear(16, 32)\n",
" self.fc2 = nn.Linear(32, output_size*max_length)\n",
"\n",
" #Reshaping output shape to match morse code shape\n",
" self.output_size = output_size\n",
" self.max_length = max_length\n",
" \n",
" def forward(self, x):\n",
" # Pass input through embedding layer\n",
" x = self.emmbedding(x).view(-1, 16)\n",
" x = torch.relu(self.fc1(x))\n",
" x = self.fc2(x)\n",
"\n",
" return x.view(-1, self.max_length, self.output_size)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MorseH_Model(\n",
" (emmbedding): Embedding(54, 16)\n",
" (fc1): Linear(in_features=16, out_features=32, bias=True)\n",
" (fc2): Linear(in_features=32, out_features=24, bias=True)\n",
")"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"input_size = len(lb.classes_)\n",
"output_size = 3\n",
"max_len = max_length\n",
"model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n",
"# Load the weights into a new model\n",
"model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n",
"model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Prepare Data"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"\n",
"X = torch.tensor(df['Character'].values, dtype=torch.long)\n",
"y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)\n",
"\n",
"data = TensorDataset(X, y)\n",
"loader = DataLoader(data, batch_size=16, shuffle=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define Loss Function and Optimizer"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = optim.Adam(model.parameters(), lr = 0.001)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Training Loop"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# num_epochs = 20\n",
"# for epoch in range(num_epochs):\n",
"# model.train()\n",
"# running_loss = 0.0\n",
"# for inputs, targets in loader:\n",
"# optimizer.zero_grad() # Reset gradients\n",
"# outputs = model(inputs) # Forward Pass\n",
"\n",
"# # Redhape for Loss Calculation\n",
"# targets = targets.view(-1)\n",
"# outputs = outputs.view(-1, output_size)\n",
"\n",
"# loss = criterion(outputs, targets) # Calculate loss\n",
"# loss.backward() # Backward Pass\n",
"# optimizer.step() # Update weights\n",
"\n",
"# running_loss += loss.item()\n",
" \n",
"# print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Evaluating Trained Model"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# model.eval() # set model to evaluation mode\n",
"# sample_size = 10\n",
"# correct = 0\n",
"# total = 0\n",
"# with torch.no_grad():\n",
"# for i in range(sample_size):\n",
"# input_sample = X[i].unsqueeze(0)\n",
"# target_sample = y[i]\n",
"\n",
"# output = model(input_sample)\n",
"# _, predicted = torch.max(output.data, 2)\n",
"\n",
"# total += target_sample.size(0)\n",
"# correct += (predicted.squeeze()==target_sample).sum().item()\n",
"\n",
"# accuracy = 100*correct/total\n",
"# print(f'Accuracy on sample of training set: {accuracy:.2f}%')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Predicting and Decoding the Predicted Output"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def predict(char_index):\n",
" with torch.no_grad():\n",
" output = model(torch.tensor([char_index]))\n",
" _, prediction = torch.max(output, 2)\n",
" return prediction[0]\n",
"\n",
"def decode(prediction):\n",
" # Removing Padding\n",
" prediction = [p for p in prediction if p!=2]\n",
" decode_symb = ['.' if c == 0 else '-' for c in prediction]\n",
" morse_code = ''.join(decode_symb)\n",
" return morse_code"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def encode(word):\n",
" word = word.upper()\n",
" return [lb.transform([c])[0] for c in word]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing with Some Random Data"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['.- .--. .--. .-.. . ',\n",
" '-... .- .-.. .-.. ',\n",
" '-.-. .- - ',\n",
" '-..- -- .- ... -....- - .-. . . ']"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trancode_list = [\"apple\", \"ball\", \"cat\" ,\"xmas-tree\"]\n",
"def get_morse_word(word):\n",
" char_indices = encode(word)\n",
" decoded = []\n",
" for ind in char_indices:\n",
" pred = predict(ind)\n",
" decoded.append(decode(pred))\n",
" decoded.append(' ')\n",
" return ''.join(decoded)\n",
"codes = [get_morse_word(word) for word in trancode_list]\n",
"codes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing with long Sentences"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['Be', 'yourself;', 'everyone', 'else', 'is', 'already', 'taken.'],\n",
" ['So', 'many', 'books', 'so', 'little', 'time.'],\n",
" ['Two',\n",
" 'things',\n",
" 'are',\n",
" 'infinite:',\n",
" 'the',\n",
" 'universe',\n",
" 'and',\n",
" 'human',\n",
" 'stupidity;',\n",
" 'and',\n",
" \"I'm\",\n",
" 'not',\n",
" 'sure',\n",
" 'about',\n",
" 'the',\n",
" 'universe.']]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trancode_sentences = [\"Be yourself; everyone else is already taken.\", \"So many books so little time.\", \"Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.\" ]\n",
"trancode_lists = [ sen.split(' ') for sen in trancode_sentences ]\n",
"trancode_lists"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['-... . -.-- --- ..- .-. ... . .-.. ..-. -.-.-. . ...- . .-. -.-- --- -. . . .-.. ... . .. ... .- .-.. .-. . .- -.. -.-- - .- -.- . -. .-.-.- ',\n",
" '... --- -- .- -. -.-- -... --- --- -.- ... ... --- .-.. .. - - .-.. . - .. -- . .-.-.- ',\n",
" '- .-- --- - .... .. -. --. ... .- .-. . .. -. ..-. .. -. .. - . ---... - .... . ..- -. .. ...- . .-. ... . .- -. -.. .... ..- -- .- -. ... - ..- .--. .. -.. .. - -.-- -.-.-. .- -. -.. .. .----. -- -. --- - ... ..- .-. . .- -... --- ..- - - .... . ..- -. .. ...- . .-. ... . .-.-.- ']"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_morse_codes = []\n",
"for l1 in trancode_lists:\n",
" codes = [get_morse_word(word)+' ' for word in l1]\n",
" get_morse_codes.append(''.join(codes))\n",
"get_morse_codes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### INFERENCE API"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"- . -- .--. . .-. .- - ..- .-. . "
]
}
],
"source": [
"import time\n",
"take_input = input(\"Type your message: \")\n",
"response = [get_morse_word(word)+' ' for word in take_input.split()]\n",
"response = ''.join(response)\n",
"for i in response:\n",
" print(i, end=\"\")\n",
" # time.sleep(100*pow(10, -3)) FUN"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# Save the model's weights\n",
"torch.save(model.state_dict(), 'morse_model_weights.pth')\n",
"\n",
"# Load the weights into a new model\n",
"model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n",
"\n",
"# Set the model to evaluation mode\n",
"model.eval()\n",
"# Save the entire model\n",
"torch.save(model, 'complete_model.pth')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MorseH_Model(\n",
" (emmbedding): Embedding(54, 16)\n",
" (fc1): Linear(in_features=16, out_features=32, bias=True)\n",
" (fc2): Linear(in_features=32, out_features=24, bias=True)\n",
")"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# Save the model weights as pytorch_model.bin\n",
"import torch\n",
"torch.save(model.state_dict(), \"pytorch_model.bin\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To Use it later"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# # Instantiate the model (ensure it has the same architecture)\n",
"# model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n",
"\n",
"# # Load the saved weights\n",
"# model.load_state_dict(torch.load(\"pytorch_model.bin\"))\n",
"\n",
"# # Set the model to evaluation mode if needed\n",
"# model.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}