Harinivas-28 commited on
Commit
1895c5d
1 Parent(s): fce1b1a

Adding MorseHModel and documentation

Browse files
MorseH_Model.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IMPORTS
2
+ import pandas as pd
3
+ from sklearn.preprocessing import LabelEncoder
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import torch
6
+ import torch.nn as nn
7
+ from torch.utils.data import DataLoader, TensorDataset
8
+ import torch.optim as optim
9
+ import matplotlib.pyplot as plt
10
+ import time
11
+
12
+ # LOAD DATA
13
+ df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')
14
+
15
+ # ENCODE CHARACTERS AND MORSE CODE
16
+ # Encoding characters as integers
17
+ label_encoder = LabelEncoder()
18
+ df['Character'] = label_encoder.fit_transform(df['Character'])
19
+
20
+ # Encoding Morse Code
21
+ morse_dict = {'.': 0, '-': 1, ' ': 2} # '.' -> 0, '-' -> 1, ' ' -> 2 for padding
22
+ df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])
23
+
24
+ # Pad Morse Code sequences to equal length
25
+ max_length = df['Morse Code Enc'].apply(len).max()
26
+ df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'], maxlen=max_length, padding='post', value=2).tolist()
27
+
28
+ # PREPARE FEATURES AND LABELS
29
+ X = torch.tensor(df['Character'].values, dtype=torch.long)
30
+ y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)
31
+
32
+ # MODEL DEFINITION
33
+ class MorseHModel(nn.Module):
34
+ def __init__(self, input_size, output_size, max_length):
35
+ super(MorseHModel, self).__init__()
36
+ self.emmbedding = nn.Embedding(input_size, 16)
37
+ self.fc1 = nn.Linear(16, 32)
38
+ self.fc2 = nn.Linear(32, output_size * max_length)
39
+ self.output_size = output_size
40
+ self.max_length = max_length
41
+
42
+ def forward(self, x):
43
+ x = self.emmbedding(x).view(-1, 16)
44
+ x = torch.relu(self.fc1(x))
45
+ x = self.fc2(x)
46
+ return x.view(-1, self.max_length, self.output_size)
47
+
48
+ input_size = len(label_encoder.classes_)
49
+ output_size = 3
50
+ model = MorseHModel(input_size=input_size, output_size=output_size, max_length=max_length)
51
+
52
+ # Load the model weights if available
53
+ not_pretrained = True
54
+ try:
55
+ model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))
56
+ not_pretrained = False
57
+ except FileNotFoundError:
58
+ print("Pre-trained weights not found, starting training from scratch.")
59
+
60
+ # CREATE DATALOADER
61
+ dataset = TensorDataset(X, y)
62
+ data_loader = DataLoader(dataset, batch_size=16, shuffle=True)
63
+
64
+ # LOSS FUNCTION AND OPTIMIZER
65
+ criterion = nn.CrossEntropyLoss()
66
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
67
+
68
+ # TRAINING LOOP
69
+ num_epochs = 20
70
+ if not_pretrained:
71
+ for epoch in range(num_epochs):
72
+ model.train()
73
+ total_loss = 0.0
74
+ for inputs, targets in data_loader:
75
+ optimizer.zero_grad()
76
+ outputs = model(inputs)
77
+
78
+ targets = targets.view(-1)
79
+ outputs = outputs.view(-1, output_size)
80
+
81
+ loss = criterion(outputs, targets)
82
+ loss.backward()
83
+ optimizer.step()
84
+
85
+ total_loss += loss.item()
86
+
87
+ print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(data_loader):.4f}")
88
+
89
+ # MODEL EVALUATION
90
+ model.eval()
91
+ sample_size = 10
92
+ correct_predictions = 0
93
+ total_elements = 0
94
+
95
+ with torch.no_grad():
96
+ for i in range(sample_size):
97
+ input_sample = X[i].unsqueeze(0)
98
+ target_sample = y[i]
99
+
100
+ output = model(input_sample)
101
+ _, predicted = torch.max(output.data, 2)
102
+
103
+ total_elements += target_sample.size(0)
104
+ correct_predictions += (predicted.squeeze() == target_sample).sum().item()
105
+
106
+ accuracy = 100 * correct_predictions / total_elements
107
+ print(f"Accuracy on sample of training set: {accuracy:.2f}%")
108
+
109
+ # INFERENCE FUNCTIONS
110
+ def predict(character_index):
111
+ """Predict the Morse code sequence for a given character index."""
112
+ with torch.no_grad():
113
+ output = model(torch.tensor([character_index]))
114
+ _, prediction = torch.max(output, 2)
115
+ return prediction[0]
116
+
117
+ def decode(prediction):
118
+ """Decode a prediction from numerical values to Morse code symbols."""
119
+ prediction = [p for p in prediction if p != 2]
120
+ return ''.join('.' if c == 0 else '-' for c in prediction)
121
+
122
+ def encode(word):
123
+ """Encode a word into character indices."""
124
+ return [label_encoder.transform([char])[0] for char in word.upper()]
125
+
126
+ def get_morse_word(word):
127
+ """Convert a word into Morse code using the model predictions."""
128
+ char_indices = encode(word)
129
+ morse_sequence = []
130
+ for index in char_indices:
131
+ pred = predict(index)
132
+ morse_sequence.append(decode(pred))
133
+ morse_sequence.append(' ')
134
+ return ''.join(morse_sequence)
135
+
136
+ # USER INPUT INFERENCE
137
+ user_input = input("Type your message: ")
138
+ response = [get_morse_word(word) + ' ' for word in user_input.split()]
139
+ response = ''.join(response)
140
+
141
+ print("Response: ", response)
142
+ # for char in response:
143
+ # print(char, end="")
144
+ # time.sleep(10*pow(10, -3)) # Delay for visualization
145
+
146
+ # SAVE MODEL
147
+ torch.save(model.state_dict(), 'morse_model_weights.pth')
148
+ torch.save(model, 'complete_model.pth')
README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MorseHModel
2
+
3
+ This model is designed to convert textual characters into Morse code symbols (dots, dashes, and spaces) using a custom neural network in PyTorch.
4
+
5
+ ## Model Architecture
6
+ The model uses an embedding layer followed by two fully connected layers to predict Morse code encodings.
7
+
8
+ ### Model Inputs and Outputs
9
+ - **Inputs:** Character indices of textual input.
10
+ - **Outputs:** Morse code sequence for each character in the input.
11
+
12
+ ### Training and Dataset
13
+ - **Dataset:** Custom Morse code dataset.
14
+ - **Training:** Trained for 20 epochs with a batch size of 16.
15
+
16
+ ### Usage
17
+ Below is an example of how to use the model.
18
+
19
+ ```python
20
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
21
+ import torch
22
+
23
+ # Load model and tokenizer
24
+ model = torch.load("morse_model_weights.pth")
25
+ tokenizer = AutoTokenizer.from_pretrained("username/MorseH_Model")
26
+
27
+ # Predict Morse code
28
+ input_text = "HELLO"
29
+ inputs = tokenizer(input_text, return_tensors="pt")
30
+ outputs = model(**inputs)
complete_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9990aec7a43bc52b3ad9ffb120987f7bc8a8ad251bab82630e7a625dd1fcbd3f
3
+ size 12683
config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "morseh_model",
3
+ "input_size": 26,
4
+ "output_size": 3,
5
+ "max_length": 10
6
+ }
model.ipynb ADDED
@@ -0,0 +1,859 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "IMPORTS"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 3,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import pandas as pd\n",
17
+ "from sklearn.preprocessing import LabelEncoder\n",
18
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
19
+ "from sklearn.model_selection import train_test_split\n",
20
+ "import torch\n",
21
+ "import torch.nn as nn\n",
22
+ "from torch.utils.data import DataLoader, TensorDataset\n",
23
+ "import torch.optim as optim\n",
24
+ "import matplotlib.pyplot as plt"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "markdown",
29
+ "metadata": {},
30
+ "source": [
31
+ "LOAD DATA"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "data": {
41
+ "text/html": [
42
+ "<div>\n",
43
+ "<style scoped>\n",
44
+ " .dataframe tbody tr th:only-of-type {\n",
45
+ " vertical-align: middle;\n",
46
+ " }\n",
47
+ "\n",
48
+ " .dataframe tbody tr th {\n",
49
+ " vertical-align: top;\n",
50
+ " }\n",
51
+ "\n",
52
+ " .dataframe thead th {\n",
53
+ " text-align: right;\n",
54
+ " }\n",
55
+ "</style>\n",
56
+ "<table border=\"1\" class=\"dataframe\">\n",
57
+ " <thead>\n",
58
+ " <tr style=\"text-align: right;\">\n",
59
+ " <th></th>\n",
60
+ " <th>Character</th>\n",
61
+ " <th>Morse Code</th>\n",
62
+ " </tr>\n",
63
+ " </thead>\n",
64
+ " <tbody>\n",
65
+ " <tr>\n",
66
+ " <th>0</th>\n",
67
+ " <td>A</td>\n",
68
+ " <td>.-</td>\n",
69
+ " </tr>\n",
70
+ " <tr>\n",
71
+ " <th>1</th>\n",
72
+ " <td>B</td>\n",
73
+ " <td>-...</td>\n",
74
+ " </tr>\n",
75
+ " <tr>\n",
76
+ " <th>2</th>\n",
77
+ " <td>C</td>\n",
78
+ " <td>-.-.</td>\n",
79
+ " </tr>\n",
80
+ " <tr>\n",
81
+ " <th>3</th>\n",
82
+ " <td>D</td>\n",
83
+ " <td>-..</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>4</th>\n",
87
+ " <td>E</td>\n",
88
+ " <td>.</td>\n",
89
+ " </tr>\n",
90
+ " </tbody>\n",
91
+ "</table>\n",
92
+ "</div>"
93
+ ],
94
+ "text/plain": [
95
+ " Character Morse Code\n",
96
+ "0 A .-\n",
97
+ "1 B -...\n",
98
+ "2 C -.-.\n",
99
+ "3 D -..\n",
100
+ "4 E ."
101
+ ]
102
+ },
103
+ "execution_count": 4,
104
+ "metadata": {},
105
+ "output_type": "execute_result"
106
+ }
107
+ ],
108
+ "source": [
109
+ "df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')\n",
110
+ "df.head()"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "markdown",
115
+ "metadata": {},
116
+ "source": [
117
+ "Checking Data types"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 5,
123
+ "metadata": {},
124
+ "outputs": [
125
+ {
126
+ "data": {
127
+ "text/plain": [
128
+ "(str, str)"
129
+ ]
130
+ },
131
+ "execution_count": 5,
132
+ "metadata": {},
133
+ "output_type": "execute_result"
134
+ }
135
+ ],
136
+ "source": [
137
+ "type(df['Character'][0]), type(df['Morse Code'][0])"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "markdown",
142
+ "metadata": {},
143
+ "source": [
144
+ "ENCODE THE STRINGS"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 6,
150
+ "metadata": {},
151
+ "outputs": [],
152
+ "source": [
153
+ "lb = LabelEncoder()\n",
154
+ "df['Character'] = lb.fit_transform(df['Character'])"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "markdown",
159
+ "metadata": {},
160
+ "source": [
161
+ "ENCODE THE MORSE CODES <br>\n",
162
+ "'.' -> 0, <br>\n",
163
+ "'-' -> 1, <br>\n",
164
+ "' ' -> 2 PADDING"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": 7,
170
+ "metadata": {},
171
+ "outputs": [
172
+ {
173
+ "data": {
174
+ "text/html": [
175
+ "<div>\n",
176
+ "<style scoped>\n",
177
+ " .dataframe tbody tr th:only-of-type {\n",
178
+ " vertical-align: middle;\n",
179
+ " }\n",
180
+ "\n",
181
+ " .dataframe tbody tr th {\n",
182
+ " vertical-align: top;\n",
183
+ " }\n",
184
+ "\n",
185
+ " .dataframe thead th {\n",
186
+ " text-align: right;\n",
187
+ " }\n",
188
+ "</style>\n",
189
+ "<table border=\"1\" class=\"dataframe\">\n",
190
+ " <thead>\n",
191
+ " <tr style=\"text-align: right;\">\n",
192
+ " <th></th>\n",
193
+ " <th>Character</th>\n",
194
+ " <th>Morse Code</th>\n",
195
+ " <th>Morse Code Enc</th>\n",
196
+ " </tr>\n",
197
+ " </thead>\n",
198
+ " <tbody>\n",
199
+ " <tr>\n",
200
+ " <th>0</th>\n",
201
+ " <td>25</td>\n",
202
+ " <td>.-</td>\n",
203
+ " <td>[0, 1]</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>1</th>\n",
207
+ " <td>26</td>\n",
208
+ " <td>-...</td>\n",
209
+ " <td>[1, 0, 0, 0]</td>\n",
210
+ " </tr>\n",
211
+ " <tr>\n",
212
+ " <th>2</th>\n",
213
+ " <td>27</td>\n",
214
+ " <td>-.-.</td>\n",
215
+ " <td>[1, 0, 1, 0]</td>\n",
216
+ " </tr>\n",
217
+ " <tr>\n",
218
+ " <th>3</th>\n",
219
+ " <td>28</td>\n",
220
+ " <td>-..</td>\n",
221
+ " <td>[1, 0, 0]</td>\n",
222
+ " </tr>\n",
223
+ " <tr>\n",
224
+ " <th>4</th>\n",
225
+ " <td>29</td>\n",
226
+ " <td>.</td>\n",
227
+ " <td>[0]</td>\n",
228
+ " </tr>\n",
229
+ " </tbody>\n",
230
+ "</table>\n",
231
+ "</div>"
232
+ ],
233
+ "text/plain": [
234
+ " Character Morse Code Morse Code Enc\n",
235
+ "0 25 .- [0, 1]\n",
236
+ "1 26 -... [1, 0, 0, 0]\n",
237
+ "2 27 -.-. [1, 0, 1, 0]\n",
238
+ "3 28 -.. [1, 0, 0]\n",
239
+ "4 29 . [0]"
240
+ ]
241
+ },
242
+ "execution_count": 7,
243
+ "metadata": {},
244
+ "output_type": "execute_result"
245
+ }
246
+ ],
247
+ "source": [
248
+ "morse_dict = {'.':0,'-':1,' ':2}\n",
249
+ "df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])\n",
250
+ "df.head()"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": 8,
256
+ "metadata": {},
257
+ "outputs": [
258
+ {
259
+ "data": {
260
+ "text/plain": [
261
+ "8"
262
+ ]
263
+ },
264
+ "execution_count": 8,
265
+ "metadata": {},
266
+ "output_type": "execute_result"
267
+ }
268
+ ],
269
+ "source": [
270
+ "max_length = df['Morse Code Enc'].apply(len).max()\n",
271
+ "max_length"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "markdown",
276
+ "metadata": {},
277
+ "source": [
278
+ "Adding Padding to equalize the length of each morse code enocoded to max length"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 9,
284
+ "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "data": {
288
+ "text/html": [
289
+ "<div>\n",
290
+ "<style scoped>\n",
291
+ " .dataframe tbody tr th:only-of-type {\n",
292
+ " vertical-align: middle;\n",
293
+ " }\n",
294
+ "\n",
295
+ " .dataframe tbody tr th {\n",
296
+ " vertical-align: top;\n",
297
+ " }\n",
298
+ "\n",
299
+ " .dataframe thead th {\n",
300
+ " text-align: right;\n",
301
+ " }\n",
302
+ "</style>\n",
303
+ "<table border=\"1\" class=\"dataframe\">\n",
304
+ " <thead>\n",
305
+ " <tr style=\"text-align: right;\">\n",
306
+ " <th></th>\n",
307
+ " <th>Character</th>\n",
308
+ " <th>Morse Code</th>\n",
309
+ " <th>Morse Code Enc</th>\n",
310
+ " </tr>\n",
311
+ " </thead>\n",
312
+ " <tbody>\n",
313
+ " <tr>\n",
314
+ " <th>0</th>\n",
315
+ " <td>25</td>\n",
316
+ " <td>.-</td>\n",
317
+ " <td>[0, 1, 2, 2, 2, 2, 2, 2]</td>\n",
318
+ " </tr>\n",
319
+ " <tr>\n",
320
+ " <th>1</th>\n",
321
+ " <td>26</td>\n",
322
+ " <td>-...</td>\n",
323
+ " <td>[1, 0, 0, 0, 2, 2, 2, 2]</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>2</th>\n",
327
+ " <td>27</td>\n",
328
+ " <td>-.-.</td>\n",
329
+ " <td>[1, 0, 1, 0, 2, 2, 2, 2]</td>\n",
330
+ " </tr>\n",
331
+ " <tr>\n",
332
+ " <th>3</th>\n",
333
+ " <td>28</td>\n",
334
+ " <td>-..</td>\n",
335
+ " <td>[1, 0, 0, 2, 2, 2, 2, 2]</td>\n",
336
+ " </tr>\n",
337
+ " <tr>\n",
338
+ " <th>4</th>\n",
339
+ " <td>29</td>\n",
340
+ " <td>.</td>\n",
341
+ " <td>[0, 2, 2, 2, 2, 2, 2, 2]</td>\n",
342
+ " </tr>\n",
343
+ " </tbody>\n",
344
+ "</table>\n",
345
+ "</div>"
346
+ ],
347
+ "text/plain": [
348
+ " Character Morse Code Morse Code Enc\n",
349
+ "0 25 .- [0, 1, 2, 2, 2, 2, 2, 2]\n",
350
+ "1 26 -... [1, 0, 0, 0, 2, 2, 2, 2]\n",
351
+ "2 27 -.-. [1, 0, 1, 0, 2, 2, 2, 2]\n",
352
+ "3 28 -.. [1, 0, 0, 2, 2, 2, 2, 2]\n",
353
+ "4 29 . [0, 2, 2, 2, 2, 2, 2, 2]"
354
+ ]
355
+ },
356
+ "execution_count": 9,
357
+ "metadata": {},
358
+ "output_type": "execute_result"
359
+ }
360
+ ],
361
+ "source": [
362
+ "df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'],maxlen = max_length, padding='post', value=2).tolist()\n",
363
+ "df.head()"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "markdown",
368
+ "metadata": {},
369
+ "source": [
370
+ "Taking Features and Labels"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": 10,
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "X = df['Character'].values\n",
380
+ "y = df['Morse Code Enc'].tolist()"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "markdown",
385
+ "metadata": {},
386
+ "source": [
387
+ "Splitting Data (Traditional Way) (NOT PREFERRED) (Scroll Down for torch approach)"
388
+ ]
389
+ },
390
+ {
391
+ "cell_type": "code",
392
+ "execution_count": 11,
393
+ "metadata": {},
394
+ "outputs": [],
395
+ "source": [
396
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": 12,
402
+ "metadata": {},
403
+ "outputs": [],
404
+ "source": [
405
+ "X_train_tensor = torch.tensor(X_train, dtype=torch.long).view(-1, 1)\n",
406
+ "X_test_tensor = torch.tensor(X_test, dtype=torch.long)\n",
407
+ "y_train_tensor = torch.tensor(y_train, dtype=torch.long).view(-1, 1)\n",
408
+ "y_test_tensor = torch.tensor(y_test, dtype=torch.long)"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 13,
414
+ "metadata": {},
415
+ "outputs": [],
416
+ "source": [
417
+ "class MorseH_Model(nn.Module):\n",
418
+ " def __init__(self, input_size, output_size, max_length):\n",
419
+ " super(MorseH_Model, self).__init__()\n",
420
+ " # Embedding layer to represent each character as a vector\n",
421
+ " self.emmbedding = nn.Embedding(input_size, 16)\n",
422
+ "\n",
423
+ " # Linear Layers\n",
424
+ " self.fc1 = nn.Linear(16, 32)\n",
425
+ " self.fc2 = nn.Linear(32, output_size*max_length)\n",
426
+ "\n",
427
+ " #Reshaping output shape to match morse code shape\n",
428
+ " self.output_size = output_size\n",
429
+ " self.max_length = max_length\n",
430
+ " \n",
431
+ " def forward(self, x):\n",
432
+ " # Pass input through embedding layer\n",
433
+ " x = self.emmbedding(x).view(-1, 16)\n",
434
+ " x = torch.relu(self.fc1(x))\n",
435
+ " x = self.fc2(x)\n",
436
+ "\n",
437
+ " return x.view(-1, self.max_length, self.output_size)"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": 14,
443
+ "metadata": {},
444
+ "outputs": [
445
+ {
446
+ "data": {
447
+ "text/plain": [
448
+ "MorseH_Model(\n",
449
+ " (emmbedding): Embedding(54, 16)\n",
450
+ " (fc1): Linear(in_features=16, out_features=32, bias=True)\n",
451
+ " (fc2): Linear(in_features=32, out_features=24, bias=True)\n",
452
+ ")"
453
+ ]
454
+ },
455
+ "execution_count": 14,
456
+ "metadata": {},
457
+ "output_type": "execute_result"
458
+ }
459
+ ],
460
+ "source": [
461
+ "input_size = len(lb.classes_)\n",
462
+ "output_size = 3\n",
463
+ "max_len = max_length\n",
464
+ "model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n",
465
+ "# Load the weights into a new model\n",
466
+ "model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n",
467
+ "model"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "markdown",
472
+ "metadata": {},
473
+ "source": [
474
+ "Prepare Data"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "execution_count": 15,
480
+ "metadata": {},
481
+ "outputs": [],
482
+ "source": [
483
+ "\n",
484
+ "X = torch.tensor(df['Character'].values, dtype=torch.long)\n",
485
+ "y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)\n",
486
+ "\n",
487
+ "data = TensorDataset(X, y)\n",
488
+ "loader = DataLoader(data, batch_size=16, shuffle=True)"
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "markdown",
493
+ "metadata": {},
494
+ "source": [
495
+ "Define Loss Function and Optimizer"
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "code",
500
+ "execution_count": 16,
501
+ "metadata": {},
502
+ "outputs": [],
503
+ "source": [
504
+ "criterion = nn.CrossEntropyLoss()\n",
505
+ "optimizer = optim.Adam(model.parameters(), lr = 0.001)"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "markdown",
510
+ "metadata": {},
511
+ "source": [
512
+ "Training Loop"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "code",
517
+ "execution_count": 17,
518
+ "metadata": {},
519
+ "outputs": [],
520
+ "source": [
521
+ "# num_epochs = 20\n",
522
+ "# for epoch in range(num_epochs):\n",
523
+ "# model.train()\n",
524
+ "# running_loss = 0.0\n",
525
+ "# for inputs, targets in loader:\n",
526
+ "# optimizer.zero_grad() # Reset gradients\n",
527
+ "# outputs = model(inputs) # Forward Pass\n",
528
+ "\n",
529
+ "# # Redhape for Loss Calculation\n",
530
+ "# targets = targets.view(-1)\n",
531
+ "# outputs = outputs.view(-1, output_size)\n",
532
+ "\n",
533
+ "# loss = criterion(outputs, targets) # Calculate loss\n",
534
+ "# loss.backward() # Backward Pass\n",
535
+ "# optimizer.step() # Update weights\n",
536
+ "\n",
537
+ "# running_loss += loss.item()\n",
538
+ " \n",
539
+ "# print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')"
540
+ ]
541
+ },
542
+ {
543
+ "cell_type": "markdown",
544
+ "metadata": {},
545
+ "source": [
546
+ "Evaluating Trained Model"
547
+ ]
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "execution_count": 18,
552
+ "metadata": {},
553
+ "outputs": [],
554
+ "source": [
555
+ "# model.eval() # set model to evaluation mode\n",
556
+ "# sample_size = 10\n",
557
+ "# correct = 0\n",
558
+ "# total = 0\n",
559
+ "# with torch.no_grad():\n",
560
+ "# for i in range(sample_size):\n",
561
+ "# input_sample = X[i].unsqueeze(0)\n",
562
+ "# target_sample = y[i]\n",
563
+ "\n",
564
+ "# output = model(input_sample)\n",
565
+ "# _, predicted = torch.max(output.data, 2)\n",
566
+ "\n",
567
+ "# total += target_sample.size(0)\n",
568
+ "# correct += (predicted.squeeze()==target_sample).sum().item()\n",
569
+ "\n",
570
+ "# accuracy = 100*correct/total\n",
571
+ "# print(f'Accuracy on sample of training set: {accuracy:.2f}%')"
572
+ ]
573
+ },
574
+ {
575
+ "cell_type": "markdown",
576
+ "metadata": {},
577
+ "source": [
578
+ "Predicting and Decoding the Predicted Output"
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "execution_count": 19,
584
+ "metadata": {},
585
+ "outputs": [],
586
+ "source": [
587
+ "def predict(char_index):\n",
588
+ " with torch.no_grad():\n",
589
+ " output = model(torch.tensor([char_index]))\n",
590
+ " _, prediction = torch.max(output, 2)\n",
591
+ " return prediction[0]\n",
592
+ "\n",
593
+ "def decode(prediction):\n",
594
+ " # Removing Padding\n",
595
+ " prediction = [p for p in prediction if p!=2]\n",
596
+ " decode_symb = ['.' if c == 0 else '-' for c in prediction]\n",
597
+ " morse_code = ''.join(decode_symb)\n",
598
+ " return morse_code"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "execution_count": 20,
604
+ "metadata": {},
605
+ "outputs": [],
606
+ "source": [
607
+ "def encode(word):\n",
608
+ " word = word.upper()\n",
609
+ " return [lb.transform([c])[0] for c in word]"
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "markdown",
614
+ "metadata": {},
615
+ "source": [
616
+ "Testing with Some Random Data"
617
+ ]
618
+ },
619
+ {
620
+ "cell_type": "code",
621
+ "execution_count": 21,
622
+ "metadata": {},
623
+ "outputs": [
624
+ {
625
+ "data": {
626
+ "text/plain": [
627
+ "['.- .--. .--. .-.. . ',\n",
628
+ " '-... .- .-.. .-.. ',\n",
629
+ " '-.-. .- - ',\n",
630
+ " '-..- -- .- ... -....- - .-. . . ']"
631
+ ]
632
+ },
633
+ "execution_count": 21,
634
+ "metadata": {},
635
+ "output_type": "execute_result"
636
+ }
637
+ ],
638
+ "source": [
639
+ "trancode_list = [\"apple\", \"ball\", \"cat\" ,\"xmas-tree\"]\n",
640
+ "def get_morse_word(word):\n",
641
+ " char_indices = encode(word)\n",
642
+ " decoded = []\n",
643
+ " for ind in char_indices:\n",
644
+ " pred = predict(ind)\n",
645
+ " decoded.append(decode(pred))\n",
646
+ " decoded.append(' ')\n",
647
+ " return ''.join(decoded)\n",
648
+ "codes = [get_morse_word(word) for word in trancode_list]\n",
649
+ "codes"
650
+ ]
651
+ },
652
+ {
653
+ "cell_type": "markdown",
654
+ "metadata": {},
655
+ "source": [
656
+ "Testing with long Sentences"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": 22,
662
+ "metadata": {},
663
+ "outputs": [
664
+ {
665
+ "data": {
666
+ "text/plain": [
667
+ "[['Be', 'yourself;', 'everyone', 'else', 'is', 'already', 'taken.'],\n",
668
+ " ['So', 'many', 'books', 'so', 'little', 'time.'],\n",
669
+ " ['Two',\n",
670
+ " 'things',\n",
671
+ " 'are',\n",
672
+ " 'infinite:',\n",
673
+ " 'the',\n",
674
+ " 'universe',\n",
675
+ " 'and',\n",
676
+ " 'human',\n",
677
+ " 'stupidity;',\n",
678
+ " 'and',\n",
679
+ " \"I'm\",\n",
680
+ " 'not',\n",
681
+ " 'sure',\n",
682
+ " 'about',\n",
683
+ " 'the',\n",
684
+ " 'universe.']]"
685
+ ]
686
+ },
687
+ "execution_count": 22,
688
+ "metadata": {},
689
+ "output_type": "execute_result"
690
+ }
691
+ ],
692
+ "source": [
693
+ "trancode_sentences = [\"Be yourself; everyone else is already taken.\", \"So many books so little time.\", \"Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.\" ]\n",
694
+ "trancode_lists = [ sen.split(' ') for sen in trancode_sentences ]\n",
695
+ "trancode_lists"
696
+ ]
697
+ },
698
+ {
699
+ "cell_type": "code",
700
+ "execution_count": 23,
701
+ "metadata": {},
702
+ "outputs": [
703
+ {
704
+ "data": {
705
+ "text/plain": [
706
+ "['-... . -.-- --- ..- .-. ... . .-.. ..-. -.-.-. . ...- . .-. -.-- --- -. . . .-.. ... . .. ... .- .-.. .-. . .- -.. -.-- - .- -.- . -. .-.-.- ',\n",
707
+ " '... --- -- .- -. -.-- -... --- --- -.- ... ... --- .-.. .. - - .-.. . - .. -- . .-.-.- ',\n",
708
+ " '- .-- --- - .... .. -. --. ... .- .-. . .. -. ..-. .. -. .. - . ---... - .... . ..- -. .. ...- . .-. ... . .- -. -.. .... ..- -- .- -. ... - ..- .--. .. -.. .. - -.-- -.-.-. .- -. -.. .. .----. -- -. --- - ... ..- .-. . .- -... --- ..- - - .... . ..- -. .. ...- . .-. ... . .-.-.- ']"
709
+ ]
710
+ },
711
+ "execution_count": 23,
712
+ "metadata": {},
713
+ "output_type": "execute_result"
714
+ }
715
+ ],
716
+ "source": [
717
+ "get_morse_codes = []\n",
718
+ "for l1 in trancode_lists:\n",
719
+ " codes = [get_morse_word(word)+' ' for word in l1]\n",
720
+ " get_morse_codes.append(''.join(codes))\n",
721
+ "get_morse_codes"
722
+ ]
723
+ },
724
+ {
725
+ "cell_type": "markdown",
726
+ "metadata": {},
727
+ "source": [
728
+ "### INFERENCE API"
729
+ ]
730
+ },
731
+ {
732
+ "cell_type": "code",
733
+ "execution_count": 24,
734
+ "metadata": {},
735
+ "outputs": [
736
+ {
737
+ "name": "stdout",
738
+ "output_type": "stream",
739
+ "text": [
740
+ "- . -- .--. . .-. .- - ..- .-. . "
741
+ ]
742
+ }
743
+ ],
744
+ "source": [
745
+ "import time\n",
746
+ "take_input = input(\"Type your message: \")\n",
747
+ "response = [get_morse_word(word)+' ' for word in take_input.split()]\n",
748
+ "response = ''.join(response)\n",
749
+ "for i in response:\n",
750
+ " print(i, end=\"\")\n",
751
+ " # time.sleep(100*pow(10, -3)) FUN"
752
+ ]
753
+ },
754
+ {
755
+ "cell_type": "code",
756
+ "execution_count": 25,
757
+ "metadata": {},
758
+ "outputs": [],
759
+ "source": [
760
+ "# Save the model's weights\n",
761
+ "torch.save(model.state_dict(), 'morse_model_weights.pth')\n",
762
+ "\n",
763
+ "# Load the weights into a new model\n",
764
+ "model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))\n",
765
+ "\n",
766
+ "# Set the model to evaluation mode\n",
767
+ "model.eval()\n",
768
+ "# Save the entire model\n",
769
+ "torch.save(model, 'complete_model.pth')"
770
+ ]
771
+ },
772
+ {
773
+ "cell_type": "code",
774
+ "execution_count": 26,
775
+ "metadata": {},
776
+ "outputs": [
777
+ {
778
+ "data": {
779
+ "text/plain": [
780
+ "MorseH_Model(\n",
781
+ " (emmbedding): Embedding(54, 16)\n",
782
+ " (fc1): Linear(in_features=16, out_features=32, bias=True)\n",
783
+ " (fc2): Linear(in_features=32, out_features=24, bias=True)\n",
784
+ ")"
785
+ ]
786
+ },
787
+ "execution_count": 26,
788
+ "metadata": {},
789
+ "output_type": "execute_result"
790
+ }
791
+ ],
792
+ "source": [
793
+ "model"
794
+ ]
795
+ },
796
+ {
797
+ "cell_type": "code",
798
+ "execution_count": 27,
799
+ "metadata": {},
800
+ "outputs": [],
801
+ "source": [
802
+ "# Save the model weights as pytorch_model.bin\n",
803
+ "import torch\n",
804
+ "torch.save(model.state_dict(), \"pytorch_model.bin\")"
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "markdown",
809
+ "metadata": {},
810
+ "source": [
811
+ "To Use it later"
812
+ ]
813
+ },
814
+ {
815
+ "cell_type": "code",
816
+ "execution_count": 28,
817
+ "metadata": {},
818
+ "outputs": [],
819
+ "source": [
820
+ "# # Instantiate the model (ensure it has the same architecture)\n",
821
+ "# model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)\n",
822
+ "\n",
823
+ "# # Load the saved weights\n",
824
+ "# model.load_state_dict(torch.load(\"pytorch_model.bin\"))\n",
825
+ "\n",
826
+ "# # Set the model to evaluation mode if needed\n",
827
+ "# model.eval()"
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "code",
832
+ "execution_count": null,
833
+ "metadata": {},
834
+ "outputs": [],
835
+ "source": []
836
+ }
837
+ ],
838
+ "metadata": {
839
+ "kernelspec": {
840
+ "display_name": "Python 3",
841
+ "language": "python",
842
+ "name": "python3"
843
+ },
844
+ "language_info": {
845
+ "codemirror_mode": {
846
+ "name": "ipython",
847
+ "version": 3
848
+ },
849
+ "file_extension": ".py",
850
+ "mimetype": "text/x-python",
851
+ "name": "python",
852
+ "nbconvert_exporter": "python",
853
+ "pygments_lexer": "ipython3",
854
+ "version": "3.12.2"
855
+ }
856
+ },
857
+ "nbformat": 4,
858
+ "nbformat_minor": 2
859
+ }
morse_data.csv ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Character,Morse Code
2
+ A,.-
3
+ B,-...
4
+ C,-.-.
5
+ D,-..
6
+ E,.
7
+ F,..-.
8
+ G,--.
9
+ H,....
10
+ I,..
11
+ J,.---
12
+ K,-.-
13
+ L,.-..
14
+ M,--
15
+ N,-.
16
+ O,---
17
+ P,.--.
18
+ Q,--.-
19
+ R,.-.
20
+ S,...
21
+ T,-
22
+ U,..-
23
+ V,...-
24
+ W,.--
25
+ X,-..-
26
+ Y,-.--
27
+ Z,--..
28
+ 0,-----
29
+ 1,.----
30
+ 2,..---
31
+ 3,...--
32
+ 4,....-
33
+ 5,.....
34
+ 6,-....
35
+ 7,--...
36
+ 8,---..
37
+ 9,----.
38
+ .,.-.-.-
39
+ c,--..--
40
+ ?,..--..
41
+ ’,.----.
42
+ !,-.-.--
43
+ /,-..-.
44
+ (,-.--.
45
+ ),-.--.-
46
+ &,.-...
47
+ :,---...
48
+ ;,-.-.-.
49
+ =,-...-
50
+ +,.-.-.
51
+ -,-....-
52
+ _,..--.-
53
+ $,...-..-.
54
+ ,
55
+ ',.----.
morse_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda920b27896795388d1c5479204c8ca14828741ad13073714812c3decad9355
3
+ size 11256
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a6b27780d2d6187d91f3080b9ed45e9ecab93c1862241a76bb46d7d6688140f
3
+ size 11202