deshanksuman commited on
Commit
ccad850
1 Parent(s): 7f6c6ce

Upload suggestion.ipynb

Browse files
Files changed (1) hide show
  1. suggestion.ipynb +265 -0
suggestion.ipynb ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "sData = [] # sinhala data tuple\n",
10
+ "eData = [] # english data tuple\n",
11
+ "\n",
12
+ "lstword = []\n",
13
+ "final = []\n"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "\n",
23
+ "import pickle\n",
24
+ "import TranslaterLogic\n"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "def clearlstword():\n",
34
+ " global lstword\n",
35
+ " lstword = []\n",
36
+ "\n",
37
+ "\n",
38
+ "def translate(txt,t):\n",
39
+ " global lstword\n",
40
+ " lstword.clear()\n",
41
+ " if len(txt)==0:\n",
42
+ " return lstword\n",
43
+ " else:\n",
44
+ " l=t.printAutoSuggestions(txt, 2)\n",
45
+ " #if len(txt) >= 5:\n",
46
+ " # t.printAutoSuggestions(txt, 1)\n",
47
+ " #else:\n",
48
+ " # t.printAutoSuggestions(txt, 2)\n",
49
+ " #print(lstword)\n",
50
+ " return l\n",
51
+ "\n",
52
+ "\n",
53
+ "class TrieNode():\n",
54
+ " def __init__(self):\n",
55
+ " # Initialising one node for trie\n",
56
+ " self.children = {}\n",
57
+ " self.last = False\n",
58
+ "\n",
59
+ "\n",
60
+ "class Trie():\n",
61
+ "\n",
62
+ " def __init__(self):\n",
63
+ "\n",
64
+ " # Initialising the trie structure.\n",
65
+ " self.root = TrieNode()\n",
66
+ "\n",
67
+ " def formTrie(self, keys):\n",
68
+ "\n",
69
+ " # Forms a trie structure with the given set of strings\n",
70
+ " # if it does not exists already else it merges the key\n",
71
+ " # into it by extending the structure as required\n",
72
+ " for key in keys:\n",
73
+ " self.insert(key) # inserting one key to the trie.\n",
74
+ "\n",
75
+ " def insert(self, key):\n",
76
+ "\n",
77
+ " # Inserts a key into trie if it does not exist already.\n",
78
+ " # And if the key is a prefix of the trie node, just\n",
79
+ " # marks it as leaf node.\n",
80
+ " node = self.root\n",
81
+ "\n",
82
+ " for a in key:\n",
83
+ " if not node.children.get(a):\n",
84
+ " node.children[a] = TrieNode()\n",
85
+ "\n",
86
+ " node = node.children[a]\n",
87
+ "\n",
88
+ " node.last = True\n",
89
+ "\n",
90
+ " def suggestionsRec(self, node, word):\n",
91
+ "\n",
92
+ " # Method to recursively traverse the trie\n",
93
+ " # and return a whole word.\n",
94
+ " lstword=[]\n",
95
+ " \n",
96
+ " if node.last:\n",
97
+ " sin_indexes = [n for n, x in enumerate(eData) if x == word]\n",
98
+ " for i in sin_indexes:\n",
99
+ " y = int(i)\n",
100
+ " if sData[y] not in lstword:\n",
101
+ " # print(sData[y])\n",
102
+ " lstword.append(sData[y])\n",
103
+ " #adding the rule\n",
104
+ " #txt=str(TranslaterLogic.convertText(word))\n",
105
+ " # print(txt)\n",
106
+ " #lstword.append(txt)\n",
107
+ " return lstword\n",
108
+ " \n",
109
+ " def suggestionsRecsuffix(self, node, word):\n",
110
+ "\n",
111
+ " # Method to recursively traverse the trie\n",
112
+ " # and return a whole word.\n",
113
+ " if node.last:\n",
114
+ " sin_indexes = [n for n, x in enumerate(eData) if x == word]\n",
115
+ " for i in sin_indexes:\n",
116
+ " y = int(i)\n",
117
+ " if sData[y] not in lstword:\n",
118
+ " # print(sData[y])\n",
119
+ " lstword.append(sData[y])\n",
120
+ "\n",
121
+ " for a, n in node.children.items():\n",
122
+ " self.suggestionsRec(n, word + a)\n",
123
+ "\n",
124
+ " def printAutoSuggestions(self, key, para):\n",
125
+ "\n",
126
+ " # adding text using rule\n",
127
+ " # lstword.append(str(TranslaterLogic.convertText(key)))\n",
128
+ "\n",
129
+ " # Returns all the words in the trie whose common\n",
130
+ " # prefix is the given key thus listing out all\n",
131
+ " # the suggestions for autocomplete.\n",
132
+ "\n",
133
+ " node = self.root\n",
134
+ "\n",
135
+ " for a in key:\n",
136
+ " # no string in the Trie has this prefix\n",
137
+ " if not node.children.get(a):\n",
138
+ " return 0\n",
139
+ " node = node.children[a]\n",
140
+ "\n",
141
+ " # If prefix is present as a word, but\n",
142
+ " # there is no subtree below the last\n",
143
+ " # matching node.\n",
144
+ " if not node.children:\n",
145
+ " return -1\n",
146
+ " if para == 1:\n",
147
+ " lst=self.suggestionsRecsuffix(node, key)\n",
148
+ " return lst\n",
149
+ " else:\n",
150
+ " lst=self.suggestionsRec(node, key)\n",
151
+ " return lst"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 7,
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": [
160
+ "# Code for creating trie\n",
161
+ "\n",
162
+ "textFile = open(\"singlishtrainsuggest.txt\",\n",
163
+ " mode='r', encoding='utf-8')\n",
164
+ "for i in textFile:\n",
165
+ " txt = i.split(\"/\")\n",
166
+ " eData.append(txt[0])\n",
167
+ " sData.append(txt[1].strip('\\n'))\n",
168
+ "\n",
169
+ "keys = eData\n",
170
+ "# keys to form the trie structure.\n",
171
+ "# creating trie object\n",
172
+ "t = Trie()\n",
173
+ "\n",
174
+ "# creating the trie structure with the\n",
175
+ "# given set of strings.\n",
176
+ "# t.formTrie(keys)\n",
177
+ "# print(\"Trie generated .\")"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": 13,
183
+ "metadata": {},
184
+ "outputs": [
185
+ {
186
+ "name": "stdout",
187
+ "output_type": "stream",
188
+ "text": [
189
+ "Trie saved.\n"
190
+ ]
191
+ }
192
+ ],
193
+ "source": [
194
+ "# import pickle\n",
195
+ "\n",
196
+ "# # Saving the Trie object\n",
197
+ "# with open('trie.pkl', 'wb') as f:\n",
198
+ "# pickle.dump(t, f)\n",
199
+ "\n",
200
+ "# print(\"Trie saved.\")\n",
201
+ "\n"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 4,
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "name": "stdout",
211
+ "output_type": "stream",
212
+ "text": [
213
+ "Trie loaded.\n"
214
+ ]
215
+ }
216
+ ],
217
+ "source": [
218
+ "import pickle\n",
219
+ "# Loading the Trie object\n",
220
+ "with open('trie.pkl', 'rb') as f:\n",
221
+ " loaded_t = pickle.load(f)\n",
222
+ "\n",
223
+ "print(\"Trie loaded.\")"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 8,
229
+ "metadata": {},
230
+ "outputs": [
231
+ {
232
+ "name": "stdout",
233
+ "output_type": "stream",
234
+ "text": [
235
+ "['ආදරය', 'ආදාරය', 'අදාරය', 'ආධාරය', 'අදරය']\n"
236
+ ]
237
+ }
238
+ ],
239
+ "source": [
240
+ "print(translate(\"adaraya\",loaded_t))"
241
+ ]
242
+ }
243
+ ],
244
+ "metadata": {
245
+ "kernelspec": {
246
+ "display_name": "llm",
247
+ "language": "python",
248
+ "name": "python3"
249
+ },
250
+ "language_info": {
251
+ "codemirror_mode": {
252
+ "name": "ipython",
253
+ "version": 3
254
+ },
255
+ "file_extension": ".py",
256
+ "mimetype": "text/x-python",
257
+ "name": "python",
258
+ "nbconvert_exporter": "python",
259
+ "pygments_lexer": "ipython3",
260
+ "version": "3.11.5"
261
+ }
262
+ },
263
+ "nbformat": 4,
264
+ "nbformat_minor": 2
265
+ }