MounikaAithagoni
commited on
Commit
•
7333eec
1
Parent(s):
f609538
Upload 5 files
Browse files- BLEU_SCORES_OF SEQ2SEQ & LSTM.csv +201 -0
- CHRF_SCORES_OF_SEQ2SEQ & LSTM.csv +202 -0
- LSTM & SEQ2SEQ TRAINING CURVE.docx +0 -0
- LSTM_based_translator.ipynb +569 -0
- Seq_to_Seq_based_translator.ipynb +519 -0
BLEU_SCORES_OF SEQ2SEQ & LSTM.csv
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SEQ2SEQBLEU Score,LSTM BLEU Score
|
2 |
+
0.366401164,0.175463441
|
3 |
+
0.344950659,0.172047992
|
4 |
+
0.362642706,0.172047992
|
5 |
+
0.370238348,0.177222528
|
6 |
+
0.366401164,0.175463441
|
7 |
+
0.378159006,0.177222528
|
8 |
+
0.382247806,0.176338597
|
9 |
+
0.370238348,0.144604126
|
10 |
+
0.539854828,0.236497176
|
11 |
+
0.574978593,0.203781401
|
12 |
+
0.531734397,0.230334052
|
13 |
+
0.531734397,0.228843138
|
14 |
+
0.556863322,0.234925679
|
15 |
+
0.548227167,0.234925679
|
16 |
+
0.556863322,0.19918896
|
17 |
+
0.584485592,0.23184452
|
18 |
+
0.2397041,0.126950176
|
19 |
+
0.228843138,0.126495964
|
20 |
+
0.190598282,0.126044991
|
21 |
+
0.227371402,0.111006458
|
22 |
+
0.221669014,0.126495964
|
23 |
+
0.23337493,0.121288505
|
24 |
+
0.227371402,0.105381626
|
25 |
+
0.227371402,0.126495964
|
26 |
+
0.234925679,0.895430728
|
27 |
+
0.2397041,1.02533116
|
28 |
+
0.246386148,0.852245287
|
29 |
+
0.20982859,0.91870806
|
30 |
+
0.19918896,1.055971718
|
31 |
+
0.23337493,0.91870806
|
32 |
+
0.2397041,0.969094302
|
33 |
+
0.230334052,0.873304243
|
34 |
+
0.164828995,0.155409961
|
35 |
+
0.136245346,0.157486759
|
36 |
+
0.16181145,0.159619817
|
37 |
+
0.164828995,0.162555431
|
38 |
+
0.163306284,0.158191416
|
39 |
+
0.164064106,0.129745456
|
40 |
+
0.152724628,0.158191416
|
41 |
+
0.141140484,0.161074249
|
42 |
+
0.156096114,1.335353406
|
43 |
+
0.128800118,1.199348129
|
44 |
+
0.153387223,1.506189323
|
45 |
+
0.158902406,1.387819278
|
46 |
+
0.143430843,1.444580998
|
47 |
+
0.154729813,1.387819278
|
48 |
+
0.156788353,1.286713096
|
49 |
+
0.154055593,1.444580998
|
50 |
+
0.714431561,0.146400488
|
51 |
+
0.794752929,0.144604126
|
52 |
+
0.873304243,0.118444178
|
53 |
+
0.852245287,0.144604126
|
54 |
+
0.729169864,0.147009235
|
55 |
+
0.794752929,0.145197995
|
56 |
+
0.777275156,0.147623066
|
57 |
+
0.873304243,0.144015095
|
58 |
+
0.172889327,0.097264116
|
59 |
+
0.169572409,0.096731886
|
60 |
+
0.16181145,0.098073534
|
61 |
+
0.16181145,0.086338146
|
62 |
+
0.135203297,0.097532434
|
63 |
+
0.171214805,0.086761895
|
64 |
+
0.158902406,0.099452922
|
65 |
+
0.17038965,0.090307748
|
66 |
+
0.303493282,0.190598282
|
67 |
+
0.308794976,0.200317555
|
68 |
+
0.308794976,0.195878212
|
69 |
+
0.317104185,0.194798952
|
70 |
+
0.300910125,0.194798952
|
71 |
+
0.317104185,0.195878212
|
72 |
+
0.306121174,0.172047992
|
73 |
+
0.279499763,0.18957627
|
74 |
+
0.157486759,0.322896641
|
75 |
+
0.158191416,0.341617412
|
76 |
+
0.164064106,0.358960578
|
77 |
+
0.158902406,0.344950659
|
78 |
+
0.158191416,0.355352476
|
79 |
+
0.138378386,0.348349598
|
80 |
+
0.157486759,0.341617412
|
81 |
+
0.157486759,0.358960578
|
82 |
+
0.185595527,0.138378386
|
83 |
+
0.185595527,0.137303582
|
84 |
+
0.186574957,0.116492847
|
85 |
+
0.186574957,0.140022517
|
86 |
+
0.192675724,0.137303582
|
87 |
+
0.168762972,0.116492847
|
88 |
+
0.186574957,0.125597222
|
89 |
+
0.18756478,0.137303582
|
90 |
+
0.158191416,0.873304243
|
91 |
+
0.158902406,0.873304243
|
92 |
+
0.157486759,0.832178395
|
93 |
+
0.156096114,0.943228467
|
94 |
+
0.157486759,0.832178395
|
95 |
+
0.135722321,0.813035057
|
96 |
+
0.147623066,0.813035057
|
97 |
+
0.153387223,0.852245287
|
98 |
+
0.105068452,0.158902406
|
99 |
+
0.111006458,0.158191416
|
100 |
+
0.12471116,0.128332596
|
101 |
+
0.126044991,0.161074249
|
102 |
+
0.126495964,0.159619817
|
103 |
+
0.125597222,0.16181145
|
104 |
+
0.126044991,0.161074249
|
105 |
+
0.127407661,0.158902406
|
106 |
+
0.378159006,0.295873528
|
107 |
+
0.386425998,0.291002766
|
108 |
+
0.41844301,0.283990079
|
109 |
+
0.370238348,0.295873528
|
110 |
+
0.386425998,0.308794976
|
111 |
+
0.378159006,0.293417934
|
112 |
+
0.390696547,0.248115279
|
113 |
+
0.408766391,0.291002766
|
114 |
+
0.172047992,0.126495964
|
115 |
+
0.172047992,0.148866235
|
116 |
+
0.168762972,0.15013052
|
117 |
+
0.159619817,0.147623066
|
118 |
+
0.167961225,0.13947015
|
119 |
+
0.172047992,0.125152623
|
120 |
+
0.169572409,0.154055593
|
121 |
+
0.147009235,0.154729813
|
122 |
+
0.777275156,0.230334052
|
123 |
+
0.714431561,0.227371402
|
124 |
+
0.714431561,0.200317555
|
125 |
+
0.832178395,0.218923754
|
126 |
+
0.686673295,0.216245662
|
127 |
+
0.729169864,0.224483999
|
128 |
+
0.794752929,0.23184452
|
129 |
+
0.714431561,0.207367151
|
130 |
+
0.094155781,0.147009235
|
131 |
+
0.094407201,0.147623066
|
132 |
+
0.096205449,0.148242044
|
133 |
+
0.085918517,0.148866235
|
134 |
+
0.078306639,0.144604126
|
135 |
+
0.094659966,0.118842315
|
136 |
+
0.094155781,0.148866235
|
137 |
+
0.085710228,0.15013052
|
138 |
+
1.727223799,0.341617412
|
139 |
+
1.727223799,0.362642706
|
140 |
+
1.646664242,0.370238348
|
141 |
+
1.914603069,0.370238348
|
142 |
+
1.914603069,0.355352476
|
143 |
+
1.816084942,0.37415676
|
144 |
+
1.914603069,0.399527241
|
145 |
+
1.914603069,0.370238348
|
146 |
+
0.413548098,0.08137044
|
147 |
+
0.41844301,0.076943294
|
148 |
+
0.456245405,0.081746722
|
149 |
+
0.386425998,0.073888058
|
150 |
+
0.413548098,0.081183595
|
151 |
+
0.423455197,0.067407854
|
152 |
+
0.41844301,0.079539822
|
153 |
+
0.439239126,0.066395149
|
154 |
+
0.295873528,0.114604769
|
155 |
+
0.306121174,0.106653213
|
156 |
+
0.293417934,0.093409494
|
157 |
+
0.293417934,0.113137809
|
158 |
+
0.293417934,0.113501017
|
159 |
+
0.291002766,0.097264116
|
160 |
+
0.288627034,0.113866564
|
161 |
+
0.273024377,0.114977472
|
162 |
+
0.200317555,0.213632301
|
163 |
+
0.198073012,0.216245662
|
164 |
+
0.207367151,0.213632301
|
165 |
+
0.198073012,0.217576467
|
166 |
+
0.203781401,0.185595527
|
167 |
+
0.191631373,0.20982859
|
168 |
+
0.198073012,0.201459012
|
169 |
+
0.195878212,0.208590609
|
170 |
+
0.158191416,0.141706189
|
171 |
+
0.156788353,0.126495964
|
172 |
+
0.159619817,0.141706189
|
173 |
+
0.157486759,0.141706189
|
174 |
+
0.158902406,0.142276446
|
175 |
+
0.133669773,0.12005295
|
176 |
+
0.158191416,0.141140484
|
177 |
+
0.157486759,0.125597222
|
178 |
+
0.134177068,0.107627226
|
179 |
+
0.145197995,0.109629617
|
180 |
+
0.149495705,0.087189824
|
181 |
+
0.153387223,0.108619194
|
182 |
+
0.149495705,0.109970615
|
183 |
+
0.15013052,0.108286513
|
184 |
+
0.148242044,0.107627226
|
185 |
+
0.13947015,0.108619194
|
186 |
+
0.20982859,0.794752929
|
187 |
+
0.242999195,0.760549782
|
188 |
+
0.2397041,0.794752929
|
189 |
+
0.236497176,0.813035057
|
190 |
+
0.184626327,0.832178395
|
191 |
+
0.23184452,0.744529234
|
192 |
+
0.236497176,0.832178395
|
193 |
+
0.23184452,0.794752929
|
194 |
+
0.095169581,0.344950659
|
195 |
+
0.092191628,0.35181619
|
196 |
+
0.092432654,0.366401164
|
197 |
+
0.097532434,0.35181619
|
198 |
+
0.097802236,0.358960578
|
199 |
+
0.079006591,0.355352476
|
200 |
+
0.099173949,0.35181619
|
201 |
+
0.096997271,0.344950659
|
CHRF_SCORES_OF_SEQ2SEQ & LSTM.csv
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SEQ2SEQ_CHRF Score,LSTM_CHRF Score
|
2 |
+
1.275510204,0.317662008
|
3 |
+
0.980392157,0.318471338
|
4 |
+
1.308900524,0.322997416
|
5 |
+
1.291989664,0.318471338
|
6 |
+
1.240694789,0.317460317
|
7 |
+
1.326259947,0.312695435
|
8 |
+
1.216545012,0.322372663
|
9 |
+
1.17370892,0.491642085
|
10 |
+
1.851851852,0.437445319
|
11 |
+
1.766784452,0.586166471
|
12 |
+
1.838235294,0.451671183
|
13 |
+
1.908396947,0.453720508
|
14 |
+
1.879699248,0.479386385
|
15 |
+
1.915708812,0.451671183
|
16 |
+
2.092050209,0.770416025
|
17 |
+
1.766784452,0.448833034
|
18 |
+
0.79491256,0.220167327
|
19 |
+
0.78125,0.220945647
|
20 |
+
0.598086124,0.21872266
|
21 |
+
0.807754443,0.326797386
|
22 |
+
0.789889415,0.220361393
|
23 |
+
0.811688312,0.238435861
|
24 |
+
0.772797527,0.332889481
|
25 |
+
0.807754443,0.220070423
|
26 |
+
0.863557858,3.333333333
|
27 |
+
0.78369906,3.267973856
|
28 |
+
0.847457627,3.401360544
|
29 |
+
0.64683053,3.571428571
|
30 |
+
0.673854447,3.424657534
|
31 |
+
0.801282051,3.546099291
|
32 |
+
0.854700855,3.246753247
|
33 |
+
0.815660685,3.703703704
|
34 |
+
0.564971751,0.296384114
|
35 |
+
0.45045045,0.284738041
|
36 |
+
0.578034682,0.284900285
|
37 |
+
0.564971751,0.282326369
|
38 |
+
0.556173526,0.286368843
|
39 |
+
0.553097345,0.440528634
|
40 |
+
0.4995005,0.282645562
|
41 |
+
0.422297297,0.286861733
|
42 |
+
0.568828214,5.263157895
|
43 |
+
0.416319734,5.050505051
|
44 |
+
0.492125984,4.807692308
|
45 |
+
0.544069641,5.154639175
|
46 |
+
0.513874615,5.208333333
|
47 |
+
0.544069641,5.747126437
|
48 |
+
0.518672199,5.681818182
|
49 |
+
0.540540541,4.504504505
|
50 |
+
3.355704698,0.258799172
|
51 |
+
3.105590062,0.263435195
|
52 |
+
2.673796791,0.41736227
|
53 |
+
3.401360544,0.261369577
|
54 |
+
3.125,0.260688217
|
55 |
+
2.793296089,0.26123302
|
56 |
+
3.571428571,0.254971953
|
57 |
+
2.857142857,0.26795284
|
58 |
+
0.584795322,0.168406871
|
59 |
+
0.597371565,0.171115674
|
60 |
+
0.534759358,0.166666667
|
61 |
+
0.502008032,0.269396552
|
62 |
+
0.438596491,0.16772895
|
63 |
+
0.603136309,0.277469478
|
64 |
+
0.5,0.167000668
|
65 |
+
0.602409639,0.248015873
|
66 |
+
1.082251082,0.363636364
|
67 |
+
1.113585746,0.357909807
|
68 |
+
1.077586207,0.367107195
|
69 |
+
1.10619469,0.360750361
|
70 |
+
1.057082452,0.360750361
|
71 |
+
1.086956522,0.364963504
|
72 |
+
1.10864745,0.542888165
|
73 |
+
0.859106529,0.375093773
|
74 |
+
0.553709856,0.771604938
|
75 |
+
0.541125541,0.974658869
|
76 |
+
0.558035714,0.749625187
|
77 |
+
0.537634409,0.730994152
|
78 |
+
0.576036866,0.745156483
|
79 |
+
0.417014178,0.716332378
|
80 |
+
0.573394495,0.73313783
|
81 |
+
0.564971751,0.70323488
|
82 |
+
0.657894737,0.240384615
|
83 |
+
0.643500644,0.242836328
|
84 |
+
0.647668394,0.38491147
|
85 |
+
0.641025641,0.24473813
|
86 |
+
0.655307995,0.244021474
|
87 |
+
0.540540541,0.315258512
|
88 |
+
0.679347826,0.367376929
|
89 |
+
0.643500644,0.244021474
|
90 |
+
0.544662309,2.824858757
|
91 |
+
0.554938957,3.048780488
|
92 |
+
0.530222694,2.873563218
|
93 |
+
0.544069641,2.941176471
|
94 |
+
0.553097345,2.617801047
|
95 |
+
0.440140845,2.840909091
|
96 |
+
0.514933059,2.976190476
|
97 |
+
0.54884742,3.184713376
|
98 |
+
0.329597891,0.300480769
|
99 |
+
0.372856078,0.289687138
|
100 |
+
0.444444444,0.485436893
|
101 |
+
0.444839858,0.287852619
|
102 |
+
0.436681223,0.295508274
|
103 |
+
0.44603033,0.289687138
|
104 |
+
0.436300175,0.291036088
|
105 |
+
0.430663221,0.291205591
|
106 |
+
1.35501355,0.572737686
|
107 |
+
1.243781095,0.588928151
|
108 |
+
1.436781609,0.591715976
|
109 |
+
1.388888889,0.58685446
|
110 |
+
1.377410468,0.583430572
|
111 |
+
1.385041551,0.581395349
|
112 |
+
1.385041551,0.859106529
|
113 |
+
1.285347044,0.587544066
|
114 |
+
0.584795322,0.448028674
|
115 |
+
0.615763547,0.269541779
|
116 |
+
0.591016548,0.271444083
|
117 |
+
0.473933649,0.283607487
|
118 |
+
0.609013398,0.405515004
|
119 |
+
0.611995104,0.440528634
|
120 |
+
0.58685446,0.27027027
|
121 |
+
0.4784689,0.268672757
|
122 |
+
2.34741784,0.424808836
|
123 |
+
2.941176471,0.424448217
|
124 |
+
1.992031873,0.69735007
|
125 |
+
2.777777778,0.42408821
|
126 |
+
2.645502646,0.423728814
|
127 |
+
2.604166667,0.427350427
|
128 |
+
2.164502165,0.421229992
|
129 |
+
2.525252525,0.471253534
|
130 |
+
0.32808399,0.265111347
|
131 |
+
0.332667997,0.266382525
|
132 |
+
0.334448161,0.266240682
|
133 |
+
0.288683603,0.266382525
|
134 |
+
0.237529691,0.264970853
|
135 |
+
0.325309044,0.411522634
|
136 |
+
0.329815303,0.271591526
|
137 |
+
0.273522976,0.263852243
|
138 |
+
7.042253521,1.024590164
|
139 |
+
7.246376812,0.818330606
|
140 |
+
7.575757576,0.798722045
|
141 |
+
7.352941176,0.822368421
|
142 |
+
6.25,1.222493888
|
143 |
+
7.575757576,0.81300813
|
144 |
+
7.042253521,0.815660685
|
145 |
+
7.246376812,0.810372771
|
146 |
+
1.369863014,0.135943448
|
147 |
+
1.453488372,0.20242915
|
148 |
+
1.515151515,0.137969095
|
149 |
+
1.519756839,0.210526316
|
150 |
+
1.492537313,0.136537411
|
151 |
+
1.488095238,0.214132762
|
152 |
+
1.592356688,0.138159713
|
153 |
+
1.453488372,0.198333994
|
154 |
+
1.030927835,0.196078431
|
155 |
+
1.082251082,0.264270613
|
156 |
+
1,0.321750322
|
157 |
+
1.103752759,0.198886237
|
158 |
+
1.048218029,0.198807157
|
159 |
+
1.030927835,0.318066158
|
160 |
+
1.054852321,0.198412698
|
161 |
+
0.860585198,0.196309384
|
162 |
+
0.673854447,0.394944708
|
163 |
+
0.708215297,0.402900886
|
164 |
+
0.720461095,0.40192926
|
165 |
+
0.698324022,0.394632991
|
166 |
+
0.677506775,0.616522811
|
167 |
+
0.547645126,0.394944708
|
168 |
+
0.668449198,0.453720508
|
169 |
+
0.710227273,0.393081761
|
170 |
+
0.553097345,0.246305419
|
171 |
+
0.561167228,0.384319754
|
172 |
+
0.564334086,0.251509054
|
173 |
+
0.550055006,0.253164557
|
174 |
+
0.557413601,0.248508946
|
175 |
+
0.437062937,0.346740638
|
176 |
+
0.543478261,0.250752257
|
177 |
+
0.577367206,0.381679389
|
178 |
+
0.407830343,0.18615041
|
179 |
+
0.52687039,0.187265918
|
180 |
+
0.534188034,0.279017857
|
181 |
+
0.531349628,0.185666543
|
182 |
+
0.529661017,0.186636805
|
183 |
+
0.521920668,0.188111362
|
184 |
+
0.502008032,0.186776242
|
185 |
+
0.46641791,0.187758167
|
186 |
+
0.715307582,2.659574468
|
187 |
+
0.833333333,2.577319588
|
188 |
+
0.798722045,2.403846154
|
189 |
+
0.786163522,2.717391304
|
190 |
+
0.601684717,2.732240437
|
191 |
+
0.78369906,2.89017341
|
192 |
+
0.802568218,2.427184466
|
193 |
+
0.836120401,2.475247525
|
194 |
+
0.318066158,1.149425287
|
195 |
+
0.318674315,0.749625187
|
196 |
+
0.326583932,0.744047619
|
197 |
+
0.345065562,0.747384155
|
198 |
+
0.345303867,0.740740741
|
199 |
+
0.248015873,0.744047619
|
200 |
+
0.345303867,0.746268657
|
201 |
+
0.34106412,0.762195122
|
202 |
+
0.34106412,
|
LSTM & SEQ2SEQ TRAINING CURVE.docx
ADDED
Binary file (56.7 kB). View file
|
|
LSTM_based_translator.ipynb
ADDED
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {
|
7 |
+
"colab": {
|
8 |
+
"base_uri": "https://localhost:8080/"
|
9 |
+
},
|
10 |
+
"id": "pbwsp9uR23iH",
|
11 |
+
"outputId": "ccb481f3-ce09-489c-e7e8-30ab83b0ef34"
|
12 |
+
},
|
13 |
+
"outputs": [
|
14 |
+
{
|
15 |
+
"name": "stdout",
|
16 |
+
"output_type": "stream",
|
17 |
+
"text": [
|
18 |
+
"Collecting sacrebleu\n",
|
19 |
+
" Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)\n",
|
20 |
+
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/51.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r",
|
21 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.8/51.8 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
22 |
+
"\u001b[?25hCollecting portalocker (from sacrebleu)\n",
|
23 |
+
" Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)\n",
|
24 |
+
"Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (2024.9.11)\n",
|
25 |
+
"Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (0.9.0)\n",
|
26 |
+
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (1.26.4)\n",
|
27 |
+
"Collecting colorama (from sacrebleu)\n",
|
28 |
+
" Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n",
|
29 |
+
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (5.3.0)\n",
|
30 |
+
"Downloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)\n",
|
31 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.0/104.0 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
32 |
+
"\u001b[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
|
33 |
+
"Downloading portalocker-2.10.1-py3-none-any.whl (18 kB)\n",
|
34 |
+
"Installing collected packages: portalocker, colorama, sacrebleu\n",
|
35 |
+
"Successfully installed colorama-0.4.6 portalocker-2.10.1 sacrebleu-2.4.3\n"
|
36 |
+
]
|
37 |
+
}
|
38 |
+
],
|
39 |
+
"source": [
|
40 |
+
"!pip install sacrebleu # install sacrebleu library\n",
|
41 |
+
"import torch\n",
|
42 |
+
"import torch.optim as optim\n",
|
43 |
+
"import torch.nn as nn\n",
|
44 |
+
"from torch.utils.data import DataLoader, Dataset, random_split\n",
|
45 |
+
"from torch.nn.utils.rnn import pad_sequence\n",
|
46 |
+
"import matplotlib.pyplot as plt\n",
|
47 |
+
"from collections import Counter\n",
|
48 |
+
"import sacrebleu\n",
|
49 |
+
"import numpy as np\n",
|
50 |
+
"import json\n",
|
51 |
+
"import pandas as pd\n",
|
52 |
+
"from sklearn.metrics import make_scorer, mean_squared_error\n",
|
53 |
+
"import numpy as np"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": 7,
|
59 |
+
"metadata": {
|
60 |
+
"colab": {
|
61 |
+
"base_uri": "https://localhost:8080/"
|
62 |
+
},
|
63 |
+
"id": "nQtyR9RD27Cn",
|
64 |
+
"outputId": "4b70ce7b-3551-40c8-d950-a4fad7128178"
|
65 |
+
},
|
66 |
+
"outputs": [
|
67 |
+
{
|
68 |
+
"name": "stdout",
|
69 |
+
"output_type": "stream",
|
70 |
+
"text": [
|
71 |
+
"Collecting ijson\n",
|
72 |
+
" Downloading ijson-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n",
|
73 |
+
"Downloading ijson-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
|
74 |
+
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/114.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r",
|
75 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
76 |
+
"\u001b[?25hInstalling collected packages: ijson\n",
|
77 |
+
"Successfully installed ijson-3.3.0\n"
|
78 |
+
]
|
79 |
+
}
|
80 |
+
],
|
81 |
+
"source": [
|
82 |
+
"'''# Load JSON dataset (using UTF-8 encoding)\n",
|
83 |
+
"with open('/content/Arabic.json', encoding='utf-8') as f:\n",
|
84 |
+
" arabic_data = json.load(f)\n",
|
85 |
+
"\n",
|
86 |
+
"# Convert data into parallel pairs (first 100 rows for simplicity)\n",
|
87 |
+
"arabic_sentences = [entry['output'] for entry in arabic_data[:1000]]\n",
|
88 |
+
"en_sentences = [entry['input'] for entry in arabic_data[:1000]]'''\n",
|
89 |
+
"\n",
|
90 |
+
"!pip install ijson\n",
|
91 |
+
"import ijson\n",
|
92 |
+
"\n",
|
93 |
+
"# Load JSON dataset in chunks\n",
|
94 |
+
"def load_json_data(file_path):\n",
|
95 |
+
" with open(file_path, encoding='utf-8') as f:\n",
|
96 |
+
" # Use ijson to parse the file as an array of objects\n",
|
97 |
+
" objects = ijson.items(f, 'item')\n",
|
98 |
+
" data = list(objects)\n",
|
99 |
+
" return data\n",
|
100 |
+
"\n",
|
101 |
+
"arabic_data = load_json_data('/content/Arabic.json')\n",
|
102 |
+
"# Convert data into parallel pairs\n",
|
103 |
+
"arabic_sentences = [entry['output'] for entry in arabic_data[:1000]]\n",
|
104 |
+
"en_sentences = [entry['input'] for entry in arabic_data[:1000]]\n",
|
105 |
+
"\n",
|
106 |
+
"\n",
|
107 |
+
"\n",
|
108 |
+
"# Tokenize sentences (basic whitespace-based tokenization)\n",
|
109 |
+
"def tokenize(sentences):\n",
|
110 |
+
" return [sentence.split() for sentence in sentences]\n",
|
111 |
+
"\n",
|
112 |
+
"# Tokenize English and Arabic sentences\n",
|
113 |
+
"en_tokens = tokenize(en_sentences)\n",
|
114 |
+
"arabic_tokens = tokenize(arabic_sentences)\n",
|
115 |
+
"\n",
|
116 |
+
"# Create vocabularies with special tokens\n",
|
117 |
+
"vocab_en = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}\n",
|
118 |
+
"vocab_arabic = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}\n",
|
119 |
+
"\n",
|
120 |
+
"# Update vocabulary from tokens\n",
|
121 |
+
"vocab_en.update({word: idx + 4 for idx, (word, _) in enumerate(Counter([token for sentence in en_tokens for token in sentence]).items())})\n",
|
122 |
+
"vocab_arabic.update({word: idx + 4 for idx, (word, _) in enumerate(Counter([token for sentence in arabic_tokens for token in sentence]).items())})\n",
|
123 |
+
"\n",
|
124 |
+
"# Model parameters\n",
|
125 |
+
"input_dim = len(vocab_en)\n",
|
126 |
+
"output_dim = len(vocab_arabic)\n",
|
127 |
+
"emb_dim = 256\n",
|
128 |
+
"hidden_dim = 512\n",
|
129 |
+
"n_layers = 2\n",
|
130 |
+
"dropout = 0.5"
|
131 |
+
]
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"cell_type": "code",
|
135 |
+
"execution_count": 8,
|
136 |
+
"metadata": {
|
137 |
+
"id": "KhZopSVW2_fQ"
|
138 |
+
},
|
139 |
+
"outputs": [],
|
140 |
+
"source": [
|
141 |
+
"# Define LSTM Model (without Seq2Seq architecture)\n",
|
142 |
+
"class LSTMModel(nn.Module):\n",
|
143 |
+
" def __init__(self, source_vocab, target_vocab, embedding_dim, hidden_dim, dropout=0.1):\n",
|
144 |
+
" super(LSTMModel, self).__init__()\n",
|
145 |
+
"\n",
|
146 |
+
" # Define embedding layers\n",
|
147 |
+
" self.embedding_src = nn.Embedding(len(source_vocab), embedding_dim)\n",
|
148 |
+
" self.embedding_trg = nn.Embedding(len(target_vocab), embedding_dim)\n",
|
149 |
+
"\n",
|
150 |
+
" # LSTM encoder\n",
|
151 |
+
" self.encoder = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
|
152 |
+
"\n",
|
153 |
+
" # LSTM decoder (using the same LSTM layer for simplicity)\n",
|
154 |
+
" self.decoder = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
|
155 |
+
"\n",
|
156 |
+
" # Fully connected layer to generate target output\n",
|
157 |
+
" self.fc_out = nn.Linear(hidden_dim, len(target_vocab))\n",
|
158 |
+
"\n",
|
159 |
+
" # Dropout for regularization\n",
|
160 |
+
" self.dropout = nn.Dropout(dropout)\n",
|
161 |
+
"\n",
|
162 |
+
" def forward(self, src, trg):\n",
|
163 |
+
" # Embed source and target sequences\n",
|
164 |
+
" embedded_src = self.dropout(self.embedding_src(src))\n",
|
165 |
+
" embedded_trg = self.dropout(self.embedding_trg(trg))\n",
|
166 |
+
"\n",
|
167 |
+
" # Pass source sequence through encoder (LSTM)\n",
|
168 |
+
" _, (hidden, cell) = self.encoder(embedded_src)\n",
|
169 |
+
"\n",
|
170 |
+
" # Pass target sequence through decoder (LSTM)\n",
|
171 |
+
" output, _ = self.decoder(embedded_trg, (hidden, cell))\n",
|
172 |
+
"\n",
|
173 |
+
" # Output from fully connected layer\n",
|
174 |
+
" output = self.fc_out(output)\n",
|
175 |
+
" return output"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"cell_type": "code",
|
180 |
+
"execution_count": 9,
|
181 |
+
"metadata": {
|
182 |
+
"id": "QkP6GOWb3HBX"
|
183 |
+
},
|
184 |
+
"outputs": [],
|
185 |
+
"source": [
|
186 |
+
"# Initialize weights\n",
|
187 |
+
"def initialize_weights(model):\n",
|
188 |
+
" for name, param in model.named_parameters():\n",
|
189 |
+
" if 'weight' in name:\n",
|
190 |
+
" nn.init.xavier_uniform_(param)\n",
|
191 |
+
" else:\n",
|
192 |
+
" nn.init.zeros_(param)\n",
|
193 |
+
"\n",
|
194 |
+
"# Define Dataset and DataLoader\n",
|
195 |
+
"class ParallelDataset(Dataset):\n",
|
196 |
+
" def __init__(self, source_sentences, target_sentences, source_vocab, target_vocab):\n",
|
197 |
+
" self.source_sentences = source_sentences\n",
|
198 |
+
" self.target_sentences = target_sentences\n",
|
199 |
+
" self.source_vocab = source_vocab\n",
|
200 |
+
" self.target_vocab = target_vocab\n",
|
201 |
+
"\n",
|
202 |
+
" # Ensure special tokens are added to vocabularies\n",
|
203 |
+
" special_tokens = ['<pad>', '<sos>', '<eos>', '<unk>']\n",
|
204 |
+
" for token in special_tokens:\n",
|
205 |
+
" if token not in self.source_vocab:\n",
|
206 |
+
" self.source_vocab[token] = len(self.source_vocab)\n",
|
207 |
+
" if token not in self.target_vocab:\n",
|
208 |
+
" self.target_vocab[token] = len(self.target_vocab)\n",
|
209 |
+
"\n",
|
210 |
+
" # Set max index to prevent index errors\n",
|
211 |
+
" self.source_max_idx = len(self.source_vocab) - 1\n",
|
212 |
+
" self.target_max_idx = len(self.target_vocab) - 1\n",
|
213 |
+
"\n",
|
214 |
+
" def __len__(self):\n",
|
215 |
+
" return len(self.source_sentences)\n",
|
216 |
+
"\n",
|
217 |
+
" def __getitem__(self, idx):\n",
|
218 |
+
" # Convert source sentence to indices, handling unknown tokens\n",
|
219 |
+
" source_indices = [\n",
|
220 |
+
" min(self.source_vocab.get(word, self.source_vocab['<unk>']), self.source_max_idx)\n",
|
221 |
+
" for word in self.source_sentences[idx].split()\n",
|
222 |
+
" ]\n",
|
223 |
+
" target_indices = [\n",
|
224 |
+
" min(self.target_vocab.get(word, self.target_vocab['<unk>']), self.target_max_idx)\n",
|
225 |
+
" for word in self.target_sentences[idx].split()\n",
|
226 |
+
" ]\n",
|
227 |
+
"\n",
|
228 |
+
" # Adding <sos> and <eos> tokens\n",
|
229 |
+
" source_indices = [self.source_vocab['<sos>']] + source_indices + [self.source_vocab['<eos>']]\n",
|
230 |
+
" target_indices = [self.target_vocab['<sos>']] + target_indices + [self.target_vocab['<eos>']]\n",
|
231 |
+
"\n",
|
232 |
+
" # Convert to tensors\n",
|
233 |
+
" source_tensor = torch.tensor(source_indices, dtype=torch.long)\n",
|
234 |
+
" target_tensor = torch.tensor(target_indices, dtype=torch.long)\n",
|
235 |
+
"\n",
|
236 |
+
" return source_tensor, target_tensor"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"execution_count": 10,
|
242 |
+
"metadata": {
|
243 |
+
"id": "Yeb9bklo3Nur"
|
244 |
+
},
|
245 |
+
"outputs": [],
|
246 |
+
"source": [
|
247 |
+
"\n",
|
248 |
+
"# Collate function for padding sequences\n",
|
249 |
+
"def collate_fn(batch):\n",
|
250 |
+
" source_sentences, target_sentences = zip(*batch)\n",
|
251 |
+
" source_padded = pad_sequence(source_sentences, padding_value=0, batch_first=True)\n",
|
252 |
+
" target_padded = pad_sequence(target_sentences, padding_value=0, batch_first=True)\n",
|
253 |
+
" return source_padded, target_padded\n",
|
254 |
+
"\n",
|
255 |
+
"# Hyperparameters\n",
|
256 |
+
"batch_size = 8\n",
|
257 |
+
"epochs = 10\n",
|
258 |
+
"clip = 1\n",
|
259 |
+
"\n",
|
260 |
+
"# DataLoader initialization\n",
|
261 |
+
"train_data = ParallelDataset(en_sentences, arabic_sentences, vocab_en, vocab_arabic)\n",
|
262 |
+
"train_size = int(0.8 * len(train_data))\n",
|
263 |
+
"val_size = len(train_data) - train_size\n",
|
264 |
+
"\n",
|
265 |
+
"train_dataset, val_dataset = random_split(train_data, [train_size, val_size])\n",
|
266 |
+
"train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)\n",
|
267 |
+
"val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)\n",
|
268 |
+
"\n",
|
269 |
+
"# Model, optimizer, and criterion\n",
|
270 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
271 |
+
"model = LSTMModel(vocab_en, vocab_arabic, emb_dim, hidden_dim, dropout).to(device)\n",
|
272 |
+
"model.apply(initialize_weights)\n",
|
273 |
+
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
|
274 |
+
"criterion = nn.CrossEntropyLoss(ignore_index=vocab_arabic['<pad>'])"
|
275 |
+
]
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"cell_type": "code",
|
279 |
+
"execution_count": 11,
|
280 |
+
"metadata": {
|
281 |
+
"colab": {
|
282 |
+
"base_uri": "https://localhost:8080/"
|
283 |
+
},
|
284 |
+
"id": "YmCgmD413Yot",
|
285 |
+
"outputId": "7bd4b2df-da85-484e-da1a-84de7581e3cc"
|
286 |
+
},
|
287 |
+
"outputs": [
|
288 |
+
{
|
289 |
+
"name": "stdout",
|
290 |
+
"output_type": "stream",
|
291 |
+
"text": [
|
292 |
+
"Epoch [1/10], Train Loss: 7.9180, Val Loss: 7.1097\n",
|
293 |
+
"Epoch [2/10], Train Loss: 6.6431, Val Loss: 6.7556\n",
|
294 |
+
"Epoch [3/10], Train Loss: 5.9405, Val Loss: 6.6448\n",
|
295 |
+
"Epoch [4/10], Train Loss: 5.4055, Val Loss: 6.5482\n",
|
296 |
+
"Epoch [5/10], Train Loss: 4.7510, Val Loss: 6.5753\n",
|
297 |
+
"Epoch [6/10], Train Loss: 4.1411, Val Loss: 6.5533\n",
|
298 |
+
"Epoch [7/10], Train Loss: 3.4415, Val Loss: 6.6021\n",
|
299 |
+
"Epoch [8/10], Train Loss: 2.8304, Val Loss: 6.5049\n",
|
300 |
+
"Epoch [9/10], Train Loss: 2.2867, Val Loss: 6.5281\n",
|
301 |
+
"Epoch [10/10], Train Loss: 1.7618, Val Loss: 6.6043\n"
|
302 |
+
]
|
303 |
+
}
|
304 |
+
],
|
305 |
+
"source": [
|
306 |
+
"# Training loop with validation\n",
|
307 |
+
"def train(model, train_loader, optimizer, criterion):\n",
|
308 |
+
" model.train()\n",
|
309 |
+
" train_loss = 0.0\n",
|
310 |
+
" for source, target in train_loader:\n",
|
311 |
+
" source, target = source.to(device), target.to(device)\n",
|
312 |
+
" optimizer.zero_grad()\n",
|
313 |
+
" output = model(source, target)\n",
|
314 |
+
" output = output.view(-1, output_dim)\n",
|
315 |
+
" target = target.view(-1)\n",
|
316 |
+
" loss = criterion(output, target)\n",
|
317 |
+
" loss.backward()\n",
|
318 |
+
" torch.nn.utils.clip_grad_norm_(model.parameters(), clip)\n",
|
319 |
+
" optimizer.step()\n",
|
320 |
+
" train_loss += loss.item()\n",
|
321 |
+
" return train_loss / len(train_loader)\n",
|
322 |
+
"\n",
|
323 |
+
"def validate(model, val_loader, criterion):\n",
|
324 |
+
" model.eval()\n",
|
325 |
+
" val_loss = 0.0\n",
|
326 |
+
" with torch.no_grad():\n",
|
327 |
+
" for source, target in val_loader:\n",
|
328 |
+
" source, target = source.to(device), target.to(device)\n",
|
329 |
+
" output = model(source, target)\n",
|
330 |
+
" output = output.view(-1, output_dim)\n",
|
331 |
+
" target = target.view(-1)\n",
|
332 |
+
" loss = criterion(output, target)\n",
|
333 |
+
" val_loss += loss.item()\n",
|
334 |
+
" return val_loss / len(val_loader)\n",
|
335 |
+
"\n",
|
336 |
+
"# Train the model\n",
|
337 |
+
"train_losses = []\n",
|
338 |
+
"val_losses = []\n",
|
339 |
+
"\n",
|
340 |
+
"for epoch in range(epochs):\n",
|
341 |
+
" train_loss = train(model, train_loader, optimizer, criterion)\n",
|
342 |
+
" val_loss = validate(model, val_loader, criterion)\n",
|
343 |
+
"\n",
|
344 |
+
" # Append the losses for plotting\n",
|
345 |
+
" train_losses.append(train_loss)\n",
|
346 |
+
" val_losses.append(val_loss)\n",
|
347 |
+
"\n",
|
348 |
+
" print(f\"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}\")\n",
|
349 |
+
"\n",
|
350 |
+
"# Save the model\n",
|
351 |
+
"torch.save(model.state_dict(), 'lstm_model.pth')"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"cell_type": "code",
|
356 |
+
"execution_count": 12,
|
357 |
+
"metadata": {
|
358 |
+
"colab": {
|
359 |
+
"base_uri": "https://localhost:8080/",
|
360 |
+
"height": 449
|
361 |
+
},
|
362 |
+
"id": "2c-NQedm3a8u",
|
363 |
+
"outputId": "fb3b3c19-ba4a-4e5d-978d-2d26c1779e1d"
|
364 |
+
},
|
365 |
+
"outputs": [
|
366 |
+
{
|
367 |
+
"data": {
|
368 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGwCAYAAACHJU4LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTLUlEQVR4nO3dd3hTdf/G8XeS7l12C2UKFAqUjWxUFJAlQxBxoLiZ8ugj/BQElOFAQVAUB7gAJ0OGCDwIZclG9pDRsmR30pXk90egUCjQlrYnbe/XdeVqc3Jyzickmruf7/ecY7Lb7XZEREREnJDZ6AJEREREbkZBRURERJyWgoqIiIg4LQUVERERcVoKKiIiIuK0FFRERETEaSmoiIiIiNNyMbqAO2Gz2Thx4gS+vr6YTCajyxEREZFMsNvtxMbGEhwcjNl8655Jvg4qJ06cICQkxOgyREREJBuioqIoU6bMLdfJ10HF19cXcLxQPz8/g6sRERGRzIiJiSEkJCTte/xW8nVQuTLc4+fnp6AiIiKSz2Rm2oYm04qIiIjTUlARERERp6WgIiIiIk4rX89RERGRO2Oz2UhOTja6DClgXF1dsVgsObItBRURkUIqOTmZw4cPY7PZjC5FCqCAgABKlSp1x+c5U1ARESmE7HY7J0+exGKxEBISctuTbolklt1uJyEhgdOnTwMQFBR0R9szNKhYrVZGjhzJd999x6lTpwgODqZPnz688cYbOtOsiEguSk1NJSEhgeDgYLy8vIwuRwoYT09PAE6fPk2JEiXuaBjI0KDyzjvvMHXqVL7++mvCwsLYtGkTTz31FP7+/gwcONDI0kRECjSr1QqAm5ubwZVIQXUlAKekpOTfoLJ27Vo6d+5M+/btAShfvjyzZs1iw4YNGa6flJREUlJS2v2YmJg8qVNEpKBS91pyS059tgwdlGzSpAnLly9n//79AGzfvp3Vq1fTrl27DNcfN24c/v7+aTdd50dERKRgM7SjMnToUGJiYggNDcVisWC1WhkzZgy9e/fOcP1hw4YxZMiQtPtXrhUgIiIiBZOhHZUff/yR77//npkzZ7Jlyxa+/vpr3n//fb7++usM13d3d0+7ro+u7yMiIjmhfPnyTJw40egy5CYMDSqvvvoqQ4cO5ZFHHqFmzZo8/vjjvPzyy4wbN87IsgDYdOQ8FxN0EiQREWdhMplueRs5cmS2trtx40aee+65O6qtVatWDB48+I62IRkzdOgnISHhhmP3LRaL4Scf+nb9Ud6ct5MHqpdi6mN1NdlMRMQJnDx5Mu33H374gREjRrBv3760ZT4+Pmm/2+12rFYrLi63/5orXrx4zhYqOcrQjkrHjh0ZM2YMCxcu5MiRI8yZM4cPPviALl26GFkWdUICsJhN/L7rFLM3Rhlai4hIXrDb7SQkpxpys9vtmaqxVKlSaTd/f39MJlPa/b179+Lr68vixYupV68e7u7urF69mn/++YfOnTtTsmRJfHx8aNCgAcuWLUu33euHfkwmE1988QVdunTBy8uLypUrM3/+/Dv69/3ll18ICwvD3d2d8uXLM2HChHSPf/LJJ1SuXBkPDw9KlixJ9+7d0x77+eefqVmzJp6enhQtWpTWrVsTHx9/R/XkJ4Z2VCZPnszw4cN56aWXOH36NMHBwTz//POMGDHCyLKoUdqf/7YJZcyiPYz6bRcNyhfhrhI+t3+iiEg+dSnFSvURSwzZ9+7RbfByy5mvo6FDh/L+++9TsWJFAgMDiYqK4sEHH2TMmDG4u7vzzTff0LFjR/bt20fZsmVvup1Ro0bx7rvv8t577zF58mR69+7N0aNHKVKkSJZr2rx5Mz169GDkyJH07NmTtWvX8tJLL1G0aFH69OnDpk2bGDhwIN9++y1NmjTh/PnzREREAI4uUq9evXj33Xfp0qULsbGxREREZDrcFQSGBhVfX18mTpzolJOY+jarwKoDZ4g4cJaBs7Yyp18T3F1y5gJLIiKSO0aPHs3999+fdr9IkSKEh4en3X/rrbeYM2cO8+fPp3///jfdTp8+fejVqxcAY8eO5aOPPmLDhg20bds2yzV98MEH3HfffQwfPhyAKlWqsHv3bt577z369OlDZGQk3t7edOjQAV9fX8qVK0edOnUAR1BJTU2la9eulCtXDoCaNWtmuYb8TNf6uQmz2cSEHuG0mxjB7pMxvPv7PoZ3qG50WSIiucLT1cLu0W0M23dOqV+/frr7cXFxjBw5koULF6Z96V+6dInIyMhbbqdWrVppv3t7e+Pn55d27Zqs2rNnD507d063rGnTpkycOBGr1cr9999PuXLlqFixIm3btqVt27Zpw07h4eHcd9991KxZkzZt2vDAAw/QvXt3AgMDs1VLfqSrUN1CCV8P3nvY8WH9cvVh/tyXvQ+piIizM5lMeLm5GHLLyQMWvL29091/5ZVXmDNnDmPHjiUiIoJt27ZRs2ZNkpNvfVSnq6vrDf8+uXWgh6+vL1u2bGHWrFkEBQUxYsQIwsPDuXjxIhaLhaVLl7J48WKqV6/O5MmTqVq1KocPH86VWpyRgspt3Btakj5NygPwyk/bORObdOsniIiI01izZg19+vShS5cu1KxZk1KlSnHkyJE8raFatWqsWbPmhrqqVKmSdg0cFxcXWrduzbvvvsvff//NkSNH+N///gc4QlLTpk0ZNWoUW7duxc3NjTlz5uTpazCShn4yYWi7UNYfOsfeU7G88tN2pvdpgNmsQ5ZFRJxd5cqV+fXXX+nYsSMmk4nhw4fnWmfkzJkzbNu2Ld2yoKAg/vOf/9CgQQPeeustevbsybp165gyZQqffPIJAAsWLODQoUO0aNGCwMBAFi1ahM1mo2rVqvz1118sX76cBx54gBIlSvDXX39x5swZqlWrliuvwRmpo5IJHq4WPupVB3cXMyv3n2HG2iNGlyQiIpnwwQcfEBgYSJMmTejYsSNt2rShbt26ubKvmTNnUqdOnXS3zz//nLp16/Ljjz8ye/ZsatSowYgRIxg9ejR9+vQBICAggF9//ZV7772XatWq8emnnzJr1izCwsLw8/Nj1apVPPjgg1SpUoU33niDCRMm3PSaeAWRyZ6Pj3GKiYnB39+f6OjoPDmd/rfrjzJ87k7cLGbm9GtCWLB/ru9TRCQ3JCYmcvjwYSpUqICHh4fR5UgBdKvPWFa+v9VRyYLHGpXl/uolSbbaGDhrK5eSrUaXJCIiUqApqGSByWTinW61KOnnzj9n4nlr4W6jSxIRESnQFFSyqIi3Gx/2qI3JBDP/iuT3nSdv/yQRERHJFgWVbGhyVzFeaFkJgNd+2cHJ6EsGVyQiIlIwKahk05D7qxBexp/oSym8/MM2rLZ8OydZRETEaSmoZJOrxcykR+rg5WZh/aHzfLryH6NLEhERKXAUVO5A+WLejO5cA4APlu5na+QFgysSEREpWBRU7lC3uqXpFB6M1WZn0OxtxCamGF2SiIhIgaGgcodMJhNvd6lBmUBPIs8nMGLeLqNLEhGRW2jVqhWDBw9Ou1++fHkmTpx4y+eYTCbmzp17x/vOqe0UJgoqOcDPw5VJj9TGYjYxZ+tx5mw9ZnRJIiIFTseOHWnbtm2Gj0VERGAymfj777+zvN2NGzfy3HPP3Wl56YwcOZLatWvfsPzkyZO5fvr7GTNmEBAQkKv7yEsKKjmkXrkiDLqvMgDD5+4i8lyCwRWJiBQsffv2ZenSpRw7duMfg9OnT6d+/frUqlUry9stXrw4Xl5eOVHibZUqVQp3d/c82VdBoaCSg/rdcxcNyxchLimVgbO3kmLNnSt0iogURh06dKB48eLMmDEj3fK4uDh++ukn+vbty7lz5+jVqxelS5fGy8uLmjVrMmvWrFtu9/qhnwMHDtCiRQs8PDyoXr06S5cuveE5r732GlWqVMHLy4uKFSsyfPhwUlIccxRnzJjBqFGj2L59OyaTCZPJlFbz9UM/O3bs4N5778XT05OiRYvy3HPPERcXl/Z4nz59eOihh3j//fcJCgqiaNGi9OvXL21f2REZGUnnzp3x8fHBz8+PHj168O+//6Y9vn37du655x58fX3x8/OjXr16bNq0CYCjR4/SsWNHAgMD8fb2JiwsjEWLFmW7lsxwydWtFzIWs4kPH6lNu4mr2BZ1kUnLDvBKm6pGlyUicnt2O6QY1Al29QKT6barubi48MQTTzBjxgxef/11TJef89NPP2G1WunVqxdxcXHUq1eP1157DT8/PxYuXMjjjz9OpUqVaNiw4W33YbPZ6Nq1KyVLluSvv/4iOjo63XyWK3x9fZkxYwbBwcHs2LGDZ599Fl9fX/773//Ss2dPdu7cye+//86yZcsA8Pe/8SK28fHxtGnThsaNG7Nx40ZOnz7NM888Q//+/dOFsRUrVhAUFMSKFSs4ePAgPXv2pHbt2jz77LO3fT0Zvb4rIWXlypWkpqbSr18/evbsyZ9//glA7969qVOnDlOnTsVisbBt2zZcXV0B6NevH8nJyaxatQpvb292796Nj49PluvICgWVHFY6wJPx3Wrx0vdb+PjPgzSrXIy7KxY1uiwRkVtLSYCxwcbs+/9OgJt3plZ9+umnee+991i5ciWtWrUCHMM+3bp1w9/fH39/f1555ZW09QcMGMCSJUv48ccfMxVUli1bxt69e1myZAnBwY5/j7Fjx94wr+SNN95I+718+fK88sorzJ49m//+9794enri4+ODi4sLpUqVuum+Zs6cSWJiIt988w3e3o7XP2XKFDp27Mg777xDyZIlAQgMDGTKlClYLBZCQ0Np3749y5cvz1ZQWb58OTt27ODw4cOEhIQA8M033xAWFsbGjRtp0KABkZGRvPrqq4SGhgJQuXLltOdHRkbSrVs3atasCUDFihWzXENWaegnFzxYM4ie9UOw2+HlH7ZxMSHZ6JJERAqE0NBQmjRpwldffQXAwYMHiYiIoG/fvgBYrVbeeustatasSZEiRfDx8WHJkiVERkZmavt79uwhJCQkLaQANG7c+Ib1fvjhB5o2bUqpUqXw8fHhjTfeyPQ+rt1XeHh4WkgBaNq0KTabjX379qUtCwsLw2KxpN0PCgri9OnTWdrXtfsMCQlJCykA1atXJyAggD179gAwZMgQnnnmGVq3bs348eP555+rJzQdOHAgb7/9Nk2bNuXNN9/M1uTlrFJHJZe82ak6G4+c59DZeIb+soOpj9VNa1OKiDgdVy9HZ8OofWdB3759GTBgAB9//DHTp0+nUqVKtGzZEoD33nuPSZMmMXHiRGrWrIm3tzeDBw8mOTnn/mBct24dvXv3ZtSoUbRp0wZ/f39mz57NhAkTcmwf17oy7HKFyWTCZsu9OZAjR47k0UcfZeHChSxevJg333yT2bNn06VLF5555hnatGnDwoUL+eOPPxg3bhwTJkxgwIABuVaPOiq5xMvNhY961cHVYuL3XaeYvTHK6JJERG7OZHIMvxhxy+IfcT169MBsNjNz5ky++eYbnn766bQ/BNesWUPnzp157LHHCA8Pp2LFiuzfvz/T265WrRpRUVGcPHkybdn69evTrbN27VrKlSvH66+/Tv369alcuTJHjx5Nt46bmxtWq/W2+9q+fTvx8fFpy9asWYPZbKZq1dyZ33jl9UVFXf1O2r17NxcvXqR69eppy6pUqcLLL7/MH3/8QdeuXZk+fXraYyEhIbzwwgv8+uuv/Oc//+Hzzz/PlVqvUFDJRTVK+/PfNo4xvlG/7eLg6ViDKxIRyf98fHzo2bMnw4YN4+TJk/Tp0yftscqVK7N06VLWrl3Lnj17eP7559Md0XI7rVu3pkqVKjz55JNs376diIgIXn/99XTrVK5cmcjISGbPns0///zDRx99xJw5c9KtU758eQ4fPsy2bds4e/YsSUlJN+yrd+/eeHh48OSTT7Jz505WrFjBgAEDePzxx9Pmp2SX1Wpl27Zt6W579uyhdevW1KxZk969e7NlyxY2bNjAE088QcuWLalfvz6XLl2if//+/Pnnnxw9epQ1a9awceNGqlWrBsDgwYNZsmQJhw8fZsuWLaxYsSLtsdyioJLL+jarQPPKxUhMsTFg1jaSUm+dsEVE5Pb69u3LhQsXaNOmTbr5JG+88QZ169alTZs2tGrVilKlSvHQQw9lertms5k5c+Zw6dIlGjZsyDPPPMOYMWPSrdOpUydefvll+vfvT+3atVm7di3Dhw9Pt063bt1o27Yt99xzD8WLF8/wEGkvLy+WLFnC+fPnadCgAd27d+e+++5jypQpWfvHyEBcXBx16tRJd+vYsSMmk4l58+YRGBhIixYtaN26NRUrVuSHH34AwGKxcO7cOZ544gmqVKlCjx49aNeuHaNGjQIcAahfv35Uq1aNtm3bUqVKFT755JM7rvdWTHa73Z6re8hFMTEx+Pv7Ex0djZ+fn9Hl3NTp2ETaTYzgXHwyfZtVYHiH6rd/kohILkpMTOTw4cNUqFABDw8Po8uRAuhWn7GsfH+ro5IHSvh68N7DjrMlfrn6MH/uy95sbRERkcJGQSWP3Btakj5NygPwyk/bORN743iliIiIpKegkoeGtgsltJQvZ+OSeeWn7dhs+XbUTUREJE8oqOQhD1cLH/Wqg7uLmZX7zzB97RGjSxIREXFqCip5rEpJX964PJn2ncV72XUi2uCKRKQwy8fHU4iTy6nPloKKAR5rVJb7q5ck2Wpj4KytXErWIcsikreunJI9J8/YKnKthATHRS6vP7NuVukU+gYwmUy8060Wfx9bxT9n4nlr4W7GdqlpdFkiUoi4uLjg5eXFmTNncHV1xWzW362SM+x2OwkJCZw+fZqAgIB01ynKDp1HxUBrDp7lsS//wm6HTx+rS9saQUaXJCKFSHJyMocPH87V68ZI4RUQEECpUqUyvM5dVr6/1VExUNO7ivF8i0p8uvIfXvtlB+EhAQT5expdlogUEm5ublSuXFnDP5LjXF1d77iTcoWCisH+80AV1v1zlu3Honn5h218/8zdWMy6yrKI5A2z2awz04pT06CkwVwtZiY9UgcvNwvrD53n05X/GF2SiIiI01BQcQLli3kzunMNAD5Yup+tkRcMrkhERMQ5KKg4iW51S9MpPBirzc6g2duITUwxuiQRERHDKahkxG6Hc3k7BGMymXi7Sw3KBHoSeT6BEfN25en+RUREnJGCSkaOb4bJdeHLB2DLN5AUmye79fNwZdIjtbGYTczZepw5W4/lyX5FRESclYJKRk5sBZMFov6C+QPg/aowrx9Ernd0W3JRvXJFGHRfZQCGz91F5LmEXN2fiIiIM9MJ324m9hRsnwVbv4NzB68uL1YF6jwGtR4B35I5u8/LrDY7vaatZ8OR89QOCeCnFxrjalGmFBGRgiEr39+GfvuVL18ek8l0w61fv35GluXgWwqavQz9N8FTi6F2b3D1grP7YekI+KAazHoU9i0Ga2qO7tpiNvHhI7Xx9XBhW9RFJi07kKPbFxERyS8M7aicOXMGq/XqBfl27tzJ/fffz4oVK2jVqtVtn5/np9BPjIFdv8KWb+H4pqvLfUpB7V5Q+zEodleO7W7h3yfpN3MLJhPMevZu7q5YNMe2LSIiYpSsfH871dDP4MGDWbBgAQcOHMjw2gDXM/RaP6f3OIaFts+ChHNXl5dtAnUfh+qdwc37jnfz2s9/88OmKIL8PVg8qDkBXm53vE0REREj5cugkpycTHBwMEOGDOH//u//MlwnKSmJpKSktPsxMTGEhIQYe1HC1GTY/zts/RYOLgP75Yt7uflCja5Q9wkoXQ8yEbwyEp+USsfJqzl0Np62YaWY+ljdTIU4ERERZ5Vv5qhca+7cuVy8eJE+ffrcdJ1x48bh7++fdgsJCcm7Am/GxQ2qd4LeP8HgnXDvcAisAMmxsOVr+OI++KQxrPsY4s9mefPe7i581KsOrhYTv+86xeyNUbnwIkRERJyT03RU2rRpg5ubG7/99ttN13HKjkpGbDY4usbRZdk9D1ITHcvNrlC1naPLUuleMGf+ypKfrzrEmEV78HA1s2BAM+4q4ZtLxYuIiOSufDf0c/ToUSpWrMivv/5K586dM/08Q+eoZFZiNOz42RFaTmy9utw3GGo/6jjUuUiF227GZrPz5PQNRBw4S7UgP+b2a4K7S85cQltERCQv5buhn+nTp1OiRAnat29vdCk5z8MfGvSF5/6EF9ZAoxfBMxBiT0DE+/BRbZjRAbb/ACmXbroZs9nEhIfDKeLtxp6TMbz7+748ewkiIiJGMbyjYrPZqFChAr169WL8+PFZem6+6KhkJDUJ9i50dFn+WQFcfgvc/aFmd8dRQ0G1M5yA+7+9//L0DMeh0TOeakCrqiXyrm4REZEckK+Gfv744w/atGnDvn37qFKlSpaem2+DyrUuRsG2mbDtO7gYeXV5yRpQ53Go1QO8iqR7ysj5u5ix9gjFfNxYPKgFxX3d87hoERGR7MtXQeVOFIigcoXNBkdWOU4mt+c3sF6eNGxxg9D2jtBS8R4wm0lMsfLQx2vYeyqWllWKM71PA8xmHbIsIiL5g4JKfpdwHnb+4rhy86m/ry73D3Gcyr9Ob/YnBdJx8mqSUm0M71Cdvs1uPyFXRETEGSioFCQntzu6LDt+dBxBBIAJKrYkwrcdz/xVErvFgzn9mhAW7G9oqSIiIpmhoFIQpSTC3gWOLsvhlWmL482+/JTcmLV+7Zg0+Ek83XTIsoiIODcFlYLuwlHY9j1s/R5ijqUtPu5ZhdL3POc4csgz0MACRUREbk5BpbCwWeHQCs5GfInvkT9wN6U6lrt4QLWOjgm45ZuD2SlOlyMiIgIoqBRKk+av5+Jf39HLdSVVuOYw54ByULEl+JeFgBDHhFz/MuBXGiwuxhUsIiKFloJKIZScaqP7p2v5+9hFepU+x5jy2zDv/BmSYjJ+gsnsOI3/lfCS7mdZR5hx88rbFyEiIoWCgkohdeRsPA9+FEFCspVX21SlX9NgOLAEzuyH6EjHyeWioyD6GFiTb79Br2KOwHIlvFwfajwDMzx7roiIyK0oqBRiP28+xis/bcdiNvHzC42pUzaDSbU2G8Sfvhxcrgkw1/5Mjr39ztx80gcX/zIQUPbqMp9Smh8jIiI3UFApxOx2OwNnb+O37ScIKeLJooHN8fVwzfqGLl28LrxEpr8ff+b22zC7gn/py8Gl7DVh5ppg46LT/4uIFDYKKoVcTGIKD06K4NiFS3SpU5oPe9bO+Z2kXHIMIV2MdPy8viMTcxzs1ttsxAQ+JTOYJ3NNqPHQ+yoiUtAoqAibj57n4U/XYbPDe91r8XD9kLwtwJoKsSevCTBXhpiuCTWpl26/HQ//q/Nj/EqDX5BjEvC1P919c//1iIhIjlFQEQAmLTvAh8v2YzLBgHsrM+i+ylic5eKFdjsknLtxSOnaUJN4MXPbcvO9HFyCwC/4mp+lroYZn5Jg1ll7RUScgYKKAGC12Xlz/k6+W+84r0qzu4ox6ZHaFPXJJ/NCkmIvDy9dDi8xJyDmJMRe+Xny5odfX89kdoSVdGFG3RnJh+x2SI53/PeRHOf4byApFmypjgnsfsE6Ik+yx2Z1XBQ3/gwknIX4s44/KAMrQOXWOborBRVJZ+7W4wz7dQeXUqyU8vNgyqN1qF++iNFl5YykOEdgiTlxk58nIe7fTMyXuUzdGckNdrtjXldynCNUXHtLCxtxGSyLvbr82udym/9tu3ik/8ze8Hm+vMzFLU9evhjEmgqXzjsCR1r4OHf555mrQeTK75cukOFnq2YP6PZ5jpamoCI3OPBvLC98t5l/zsTjYjYxtF0ofZtVwFQY/uqyWR3/IWYUYgpKd8Zud5wbJzXJcbNe/pmamMGyax67flm65yVnvE5qomNfdhtYXMHidvnmChb3a36/ZrmLewbrumVwy+zzrltutuROByE16SbBIrNh45p1MhuWM8tkdnzG3HwdP01miDvl+OLJLO/iGYeYaz/HHgHqzjgLa6rj/b3S7Yg/czlo3CSIXLrIbUPtDUyOjpx3Mcfnw6solG8GjZ7P0ZeioCIZik9KZeivO/ht+wkA2oaV4t2Ha+GXncOXC6LbdWdiTzlud9qdcfW6+mV/bSBICwrXhoTk9GHjVoGjUDNdDS0uNwk9NwScyz+xO977jAKILSXnS70SLNx9LgeNyz/d/a4uS1t+/bJrnuvqlXGASE26LohfP2R6wvE5zsxJHwFcPNMHF7/gG0O5T0nHv6dkjTXlatBICx9nMw4iCVc6HlllAq8ijhN4el++eV33M+334o6QkgeXV1FQkZuy2+18t/4ooxfsJsVqp3xRLz7pXY/qwfr3y5Sc7s7kFou7oxvh4p7+94zuu3hkbx1Mji9ya8rlQJXs+GlNvros7fekmyy/1fNutq3kzH/J5hRX71sHi5su80sfSly9neMkiFcms9/wOT5+9TMccyLzE9oxgU+Jm3Rlgq8uKwinG7BZr3Yv0z6Pl39PW3b5D4iE64ddrgsimf73vdbl4OFd/HK4KHo1ZHgXc3RA0n4v5ljXCYeqFVTktrZHXeSl77dw/OIl3F3MvNW5Bj0a5PEhzAXZrbozqUlXv/zTBQG3y8vcrrt/q3UyCBgWN+f4MsxNdvvNA8ytAk66L5fLy7HfOmy4+RTeC3gmJ1zuJl4fxq/p1MSdckzkzQw3n5sPlV756V38ahi4NuSmBdpr3sPrl6Um3fg5SL3+/jVh4obPxfX7ymBZbgzheRa5SYfj+t8vdzycMHhklYKKZMqF+GSG/LiNFfscZ5l9uF4ZRneugadb/v+PQETyiM3m6BjEXj/EdF1AN7rLmFvM186lumaOlovH5SGXojfvdngXKzDBI6sUVCTTbDY7U1f+w4Q/9mGzQ2gpX6Y+Vo8KxbyNLk1ECpJMHaF3yjFJ+1pmlxsna6fNQ7rmdv2yDMOD+022c83cpeuXubjfel+aaJwtCiqSZWv/OcvAWVs5G5eMj7sL73WvRbuaQUaXJSKFic3qOFLF4nI1KBTCbkNhkJXv7wI+kC2Z1aRSMRYObE7D8kWIS0rlxe+38NaC3aRYbbd/sohITjBbHJNDPfzB1UMhRQAFFblGST8PZj7biOdbVgTgy9WHeWTaek5GZ+KaPCIiIrlAQUXScbGYGdauGtMer4evhwubj16g/UeriThwxujSRESkEFJQkQw9EFaKhQOaExbsx/n4ZJ74agMTl+3Hasu3U5pERCQfUlCRmypb1ItfXmxCr4Zlsdth4rID9Jm+gfPxeXyyLRERKbQUVOSWPFwtjOtakwkPh+PhaibiwFnafxTB5qPZOZWziIhI1iioSKZ0q1eGef2aUbGYNyejE+n52Tq+Wn2YfHx0u4iI5AMKKpJpVUv5Mn9AM9rXCiLVZmf0gt30m7mF2MRcuHCbiIgICiqSRT7uLkzpVYdRncJwtZhYtOMUnaasYe+pAnp6bBERMZSCimSZyWTiySbl+eH5xgT7e3D4bDwPfbyGnzcfM7o0EREpYBRUJNvqlg1kwcDmtKhSnMQUG6/8tJ2hv/xNYkoOX11UREQKLQUVuSNFvN2Y0acBQ+6vgskEszdG0fWTtRw9F290aSIiUgAoqMgdM5tNDLyvMt8+3Yii3m7sPhlDh49W8/vOU0aXJiIi+ZyCiuSYZpUdFzasXy6Q2KRUXvhuM2MW6sKGIiKSfQoqkqNK+Xsw67m7ebZ5BQA+jzhMr2nrORWdaHBlIiKSHymoSI5ztZh5vX11Pn2sLr7uLmw6eoH2H0Ww5uBZo0sTEZF8RkFFck3bGkH8NqAZ1YL8OBefzGNf/sXk5Qew6cKGIiKSSQoqkqvKF/NmzktN6Fk/BLsdJizdz9Nfb+SCLmwoIiKZoKAiuc7D1cI73WvxbvdauLuY+XPfGdp/FMHWSF3YUEREbk1BRfJMj/ohzO3XlPJFvTgRnUiPz9bx9dojurChiIjclIKK5KlqQX7MH9CMdjVKkWK18+b8XQyYtZW4pFSjSxMRESekoCJ5zs/DlU9612V4h+q4mE0s+PsknaasZt+pWKNLExERJ2N4UDl+/DiPPfYYRYsWxdPTk5o1a7Jp0yajy5JcZjKZ6NusAj88fzdB/h4cOhNP549X8+sWXdhQRESuMjSoXLhwgaZNm+Lq6srixYvZvXs3EyZMIDAw0MiyJA/VK1eEBQOa0bxyMRJTbAz5cTvDft2hCxuKiAgAJruBMxmHDh3KmjVriIiIyNbzY2Ji8Pf3Jzo6Gj8/vxyuTvKS1WZn8v8OMGn5Aex2CAv2Y2rvepQt6mV0aSIiksOy8v1taEdl/vz51K9fn4cffpgSJUpQp04dPv/885uun5SURExMTLqbFAwWs4nBravw9VMNCfRyZdeJGNpPjmDp7n+NLk1ERAxkaFA5dOgQU6dOpXLlyixZsoQXX3yRgQMH8vXXX2e4/rhx4/D390+7hYSE5HHFkttaVCnOwoHNqVs2gNjEVJ79ZhPjFu8hVRc2FBEplAwd+nFzc6N+/fqsXbs2bdnAgQPZuHEj69atu2H9pKQkkpKS0u7HxMQQEhKioZ8CKDnVxvjFe/lqzWEAGpYvwuRH61DSz8PgykRE5E7lm6GfoKAgqlevnm5ZtWrViIyMzHB9d3d3/Pz80t2kYHJzMTOiY3U+6V0XH3cXNhw5z/0frGTGmsPqroiIFCKGBpWmTZuyb9++dMv2799PuXLlDKpInM2DNYOY378pYcF+xCSmMvK33TyoKzGLiBQahgaVl19+mfXr1zN27FgOHjzIzJkzmTZtGv369TOyLHEyFYv7ML9/M95+qAaBXq7s/zeO3l/8xfPfbiLqfILR5YmISC4ydI4KwIIFCxg2bBgHDhygQoUKDBkyhGeffTZTz9XhyYXPxYRkJi47wLfrj2K12XFzMfN8i4q82KoSXm4uRpcnIiKZkJXvb8ODyp1QUCm89p2KZdRvu1j7zzkAgvw9GNoulE7hwZhMJoOrExGRW1FQkULBbrezZNcp3l64h2MXLgHQoHwgb3YMo0Zpf4OrExGRm1FQkUIlMcXKtFWH+OTPgySm2DCZ4JEGIbzyQFWK+rgbXZ6IiFxHQUUKpRMXLzFu8V5+234CAF8PF15uXYXHG5fD1WL49TdFROQyBRUp1DYcPs/I+bvYfdJxiYXKJXwY0bE6zSsXN7gyEREBBRURrDY7P2yM4r0le7mQkALA/dVLMrx9dV3oUETEYAoqIpdFJ6Tw4bL9Vw9ntph5tkUFXmp1F97uOpxZRMQICioi19n/byyjf9vN6stntC3p586wdtXoXFuHM4uI5DUFFZEM2O12/tj9L28v3E3UecfhzPXLBTKykw5nFhHJSwoqIreQmGLli4hDfLziHy6lWDGZoGf9EF5pU5ViOpxZRCTXKaiIZMLJ6EuMX7yXeduuHs486L7KPNmkvA5nFhHJRQoqIlmw8YjjcOZdJxyHM1cq7s2bHcNoUUWHM4uI5AYFFZEsstrs/LgpiveW7ON8fDIArauVZHiHapQr6m1wdSIiBYuCikg2RV9KYdKyA3y97kja4cx9m1eg/z06nFlEJKcoqIjcoQP/xjJ6wW4iDjgOZy7h686wB0N5qHZpHc4sInKHFFREcoDdbmfp7n95e+EeIs8nAFC3bAAjO4VRq0yAscWJiORjCioiOSgxxcqXqw/z8YqDJCQ7Dmd+uF4ZXm0TSnFfHc4sIpJVCioiueBUdCLjF+9h7pXDmd1dGNS6Mk80Lo+biw5nFhHJLAUVkVy0+eh5Rs7fzY7j0QBULO7NiA7VaVW1hMGViYjkDwoqIrnMarPz0+XDmc9dPpz5vtASDO9QnfLFdDiziMitKKiI5JHoSyl8tPwAX689Qurlw5mfblaB/vfehY8OZxYRyZCCikgeO3g6ltEL9rBq/xnAcTjza21D6VKnNGazDmcWEbmWgoqIAex2O8v3nOathbs5es5xOHPtkABGdQojPCTA2OJERJyIgoqIgZJSHYczT/mf43BmuHw4c9uqlPD1MLg6ERHjKaiIOIF/YxJ5Z/Feft16HAAfdxcG3ncXfZpU0OHMIlKoKaiIOJHNRy8w6rdd/H3McTjzXSV8mNizNjVK+xtcmYiIMbLy/a0/60RyWb1ygcx9qSnvdqtFMR83Dp6Oo8sna5i26h9stnz7d4KISJ5QUBHJA2aziR4NQvjj5ZbcX70kKVY7Yxft5YmvNvBvTKLR5YmIOC0FFZE8VMTbjWmP12Nsl5p4uJpZffAsbSauYsmuU0aXJiLilBRURPKYyWTi0UZlWTCgOWHBflxMSOH5bzcz7NcdJCSnGl2eiIhTUVARMchdJXz49aUmPN+iIgCzNkTSYfJqdl6+hpCIiCioiBjK3cXCsAer8f0zjSjp586hM/F0+WQNn67URFsREVBQEXEKTe8qxu+DWtAmzDHRdvzivTz25V+cjL5kdGkiIoZSUBFxEoHebnz6WD3Gd62Jp6uFtf+co+3ECH7fedLo0kREDKOgIuJETCYTjzQsy8KBzahZ2p/oSym88N0WXvv5b+KTNNFWRAofBRURJ1SxuA+/vNiEF1tVwmSCHzZF0WHyav4+dtHo0kRE8pSCioiTcnMx81rbUL5/phGl/Dw4fDaerp+s5ZM/D2LVRFsRKSQUVEScXJNKxfh9cHMerFmKVJudd3/fR+8v1nPioibaikjBp6Aikg8EeLnx8aN1ebd7LbzcLKw/dJ52kyJYtEMTbUWkYFNQEcknTCYTPeqHsHBgc8LLOCbavvT9Fl79absm2opIgaWgIpLPVCjmzc8vNqHfPY6Jtj9tPkb7jyLYFnXR6NJERHKcgopIPuRqMfNqm1BmPXs3wf4eHDmXQPepa/l4hSbaikjBoqAiko/dXbEoiwe1oH2tIFJtdt5bso9en6/nuCbaikgBoaAiks/5e7kypVcd3n84HG83CxsOn6fdxFUs+PuE0aWJiNwxBRWRAsBkMtG9XhnHRNuQAGISU+k/cyv/+XE7cZpoKyL5mKFBZeTIkZhMpnS30NBQI0sSydfKF/Pm5xcaM/DeuzCb4Jctx3hwUgRbIy8YXZqISLZkK6hERUVx7NixtPsbNmxg8ODBTJs2LcvbCgsL4+TJk2m31atXZ6ckEbnM1WJmyANVmf1cY0oHeBJ5PoHun65j8vIDmmgrIvlOtoLKo48+yooVKwA4deoU999/Pxs2bOD1119n9OjRWdqWi4sLpUqVSrsVK1bspusmJSURExOT7iYiGWtYoQiLBjWnU3gwVpudCUv388i0dRy7kGB0aSIimZatoLJz504aNmwIwI8//kiNGjVYu3Yt33//PTNmzMjStg4cOEBwcDAVK1akd+/eREZG3nTdcePG4e/vn3YLCQnJTvkihYa/pyuTHqnNhz3D8XF3YeORC7SbFMH87ZpoKyL5Q7aCSkpKCu7u7gAsW7aMTp06ARAaGsrJk5k/pXejRo2YMWMGv//+O1OnTuXw4cM0b96c2NjYDNcfNmwY0dHRabeoqKjslC9SqJhMJrrUKcOigc2pWzaA2MRUBs7aypAfthGbmGJ0eSIit2Sy2+1ZHrRu1KgR99xzD+3bt+eBBx5g/fr1hIeHs379erp3755u/kpWXLx4kXLlyvHBBx/Qt2/f264fExODv78/0dHR+Pn5ZWufIoVJqtXG5P8dZPL/DmCzQ0gRTyb2rEO9coFGlyYihUhWvr+z1VF55513+Oyzz2jVqhW9evUiPDwcgPnz56cNCWVHQEAAVapU4eDBg9nehojcnIvFzMv3V+HH5xtTJtCTqPOX6PHZOiYtO0Cq1WZ0eSIiN8hWRwXAarUSExNDYODVv8SOHDmCl5cXJUqUyFYxcXFxlC1blpEjRzJw4MDbrq+Oikj2xSSmMGLuTuZuc8xXqV8ukA971iakiJfBlYlIQZfrHZVLly6RlJSUFlKOHj3KxIkT2bdvX5ZCyiuvvMLKlSs5cuQIa9eupUuXLlgsFnr16pWdskQkC/w8XJn4SB0m9qyNr7sLm45e4MFJEczdetzo0kRE0mQrqHTu3JlvvvkGcMwradSoERMmTOChhx5i6tSpmd7OsWPH6NWrF1WrVqVHjx4ULVqU9evXU7x48eyUJSLZ8FCd0iwa1Jx65QKJTUpl8A/bGDx7KzGaaCsiTiBbQz/FihVj5cqVhIWF8cUXXzB58mS2bt3KL7/8wogRI9izZ09u1HoDDf2I5JxUq42PV/zDR/9znBiuTKAnE3vWpn75IkaXJiIFTK4P/SQkJODr6wvAH3/8QdeuXTGbzdx9990cPXo0O5sUEYO5WMwMal2ZH59vTEgRT45dcEy0/XDpfk20FRHDZCuo3HXXXcydO5eoqCiWLFnCAw88AMDp06fV2RDJ5+qVC2TRwOZ0rVMamx0mLT9Aj8/WEXlOZ7QVkbyXraAyYsQIXnnlFcqXL0/Dhg1p3Lgx4Oiu1KlTJ0cLFJG85+vhygc9azPpkdr4eriwJfIiD34Uwa9bjpHNAwVFRLIl24cnnzp1ipMnTxIeHo7Z7Mg7GzZswM/PL8+ugKw5KiK579iFBIb8sJ0NR84D0Ck8mLceqoG/p6vBlYlIfpWV7+9sB5UrrpyFtkyZMneymWxRUBHJG1abnal/HuTDZY6JtqUDPPmwZ20aVtBEWxHJulyfTGuz2Rg9ejT+/v6UK1eOcuXKERAQwFtvvYXNpkl3IgWNxWyi/72V+fmFxpQr6sXxi5foOW0dby/YzaVkq9HliUgBlq2g8vrrrzNlyhTGjx/P1q1b2bp1K2PHjmXy5MkMHz48p2sUESdRp2wgCwc25+F6ZbDb4YvVh3nwowg2XR4WEhHJadka+gkODubTTz9Nu2ryFfPmzeOll17i+PG8ObOlhn5EjLNi72mG/bqDUzGJmEzwVJMKvNqmKp5uFqNLExEnl+tDP+fPn89wwmxoaCjnz+svK5HC4J7QEix5uQU96ju6K1+tOUy7SavYcFj/DxCRnJOtoBIeHs6UKVNuWD5lyhRq1ap1x0WJSP7g7+nKu93DmfFUA4L8PThyLoGe09Yx6rddJCSnGl2eiBQA2Rr6WblyJe3bt6ds2bJp51BZt24dUVFRLFq0iObNm+d4oRnR0I+I84hJTGHswj3M3hgFQLmiXrzbrRaNKhY1uDIRcTa5PvTTsmVL9u/fT5cuXbh48SIXL16ka9eu7Nq1i2+//TZbRYtI/ubn4cr4brX4+umGBPt7cPRcAj2nrWfkfHVXRCT77vg8Ktfavn07devWxWrNm8MV1VERcU4xiSmMW7SHWRsc3ZWyRbx4p1stGldSd0VE8qCjIiJyK34erozrWotvLndXIs8n0Ovz9YyYt5P4JHVXRCTzFFREJNe0qFKcJS+34NFGZQH4Zt1R2kxcxdp/zhpcmYjkFwoqIpKrfD1cGdulJt/1bUTpAE+OXbjEo5//xRtzd6i7IiK3laU5Kl27dr3l4xcvXmTlypWaoyIiGYpLSmXcoj18/1ckAGUCPXm3Wy2a3FXM4MpEJC9l5fvbJSsb9vf3v+3jTzzxRFY2KSKFiI+7C2O61OTBmkH89+e/Hd2VL/6id6OyDHuwGj7uWfpfkogUAjl61E9eU0dFJP+KS0rlncV7+Xb9UQBKB3jyTrdaNKus7opIQaejfkTE6fm4u/DWQzWY+WwjQop4cvziJR778i+G/bqD2MQUo8sTESehoCIihmpSqRi/D2rBk43LATBrQyRtPlzFqv1nDK5MRJyBgoqIGM7b3YVRnWsw69m7KVvEixPRiTzx1QaG/vI3MequiBRqCioi4jQaVyrK74Ob06dJeQBmb4yizYerWKnuikihpaAiIk7Fy82FkZ3C+OG5uylX1IuT0Yk8+dUGXvtZ3RWRwkhBRUScUqOKRVk8qDlPNS2PyQQ/bIrigQ9WsWLfaaNLE5E8pKAiIk7Ly82FNzuG8cNzjSlf1ItTMYk8NX0jr/60nehL6q6IFAYKKiLi9BpWKMLiQS3o26wCJhP8tPkYD3y4kv/t/dfo0kQklymoiEi+4OlmYXiH6vz0fGMqFPPm35gknp6xif/8uJ3oBHVXRAoqBRURyVfqly/CooHNeeZyd+WXLce4/8OVLN+j7opIQaSgIiL5jqebhTc6VOfnFxpTsZg3p2OT6Pv1Job8sI2LCclGlyciOUhBRUTyrXrlirBoUHOea1ERswl+3Xqc+z9cxdLd6q6IFBQKKiKSr3m4Wvi/B6vx0wtNqFjcmzOxSTz7zSYGz96q7opIAaCgIiIFQr1ygSwa2JznWzq6K3O3naD1B6tYsuuU0aWJyB1QUBGRAsPD1cKwdtX45cUm3FXCh7NxSTz/7WYGztrKhXh1V0TyIwUVESlw6pQNZMGAZrzYqhJmE8zffoL7P1zJ7zvVXRHJbxRURKRA8nC18FrbUH59qSmVS/hwNi6ZF77bzIBZWzmv7opIvqGgIiIFWu2QAH4b0IyXLndXftt+gvs/WMniHSeNLk1EMkFBRUQKPA9XC/9tG8qcl5pSpaQP5+KTefH7LfSbuYVzcUlGlycit6CgIiKFRvjl7kr/e+7CYjax8O+TPPDhKlbtP2N0aSJyEwoqIlKouLtYeKVNVea+1JSqJX05F5/Mk9M38OHS/VhtdqPLE5HrKKiISKFUs4w/8/o3pVfDstjtMGn5AZ78agNnNRQk4lQUVESk0PJwtTCua00+6BGOp6uF1QfP0v6jCDYeOW90aSJymYKKiBR6XeuWYV7/plQq7s2/MUk8Mm0901b9g92uoSARozlNUBk/fjwmk4nBgwcbXYqIFEJVSvoyv38zOoUHY7XZGbtoL899u5noSylGlyZSqDlFUNm4cSOfffYZtWrVMroUESnEvN1dmPRIbd56qAZuFjNLd/9Lh8kR7DgWbXRpIoWW4UElLi6O3r178/nnnxMYGGh0OSJSyJlMJh6/uxw/v9iYMoGeRJ2/RLepa/lu/VENBYkYwPCg0q9fP9q3b0/r1q1vu25SUhIxMTHpbiIiuaFWmQAWDmhO62olSbbaeGPuTgb/sI34pFSjSxMpVAwNKrNnz2bLli2MGzcuU+uPGzcOf3//tFtISEguVygihZm/lyufP1GPYe1CsZhNzNt2gs4fr+HAv7FGlyZSaBgWVKKiohg0aBDff/89Hh4emXrOsGHDiI6OTrtFRUXlcpUiUtiZTCaeb1mJWc/eTQlfdw6ejqPTlDXM2XrM6NJECgWT3aBB17lz59KlSxcsFkvaMqvVislkwmw2k5SUlO6xjMTExODv7090dDR+fn65XbKIFHJn45IYNHsraw6eA+DRRmUZ0aE6Hq63/n+ViKSXle9vw4JKbGwsR48eTbfsqaeeIjQ0lNdee40aNWrcdhsKKiKS16w2O5OW7WfyioPY7VCjtB+fPFqPskW9jC5NJN/Iyve3Sx7VdANfX98bwoi3tzdFixbNVEgRETGCxWxiyANVqVe+CINnb2Xn8RjaT47g/YfDaRNWyujyRAocw4/6ERHJj1pWKc7Cgc2pWzaA2MRUnv92M2MW7ibFajO6NJECxbChn5ygoR8RMVpyqo13ft/Ll6sPA1C/XCBTHq1LKf/MHSQgUhhl5ftbHRURkTvg5mJmeIfqTO1dF193FzYdvcCDH0UQceCM0aWJFAgKKiIiOaBdzSB+G9CMakF+nI9P5omvNjBx2X6stnzbtBZxCgoqIiI5pHwxb+a81IRHGoRgt8PEZQfoM30D5+KSjC5NJN9SUBERyUEerhbGd6vFhIfD8XA1E3HgLO0/Ws2mI+eNLk0kX1JQERHJBd3qlWFev2ZULO7NqZhEek5bz+erDunChiJZpKAiIpJLqpbyZX7/ZnQMD8ZqszNm0R6e/3Yz0ZdSjC5NJN9QUBERyUU+7i589Eht3uochpvFzB+7/6Xj5NXsPB5tdGki+YKCiohILjOZTDzeuDw/v9iYMoGeRJ5PoOvUtcz8K1JDQSK3oaAiIpJHapUJYOGA5twXWoLkVBv/N2cHQ37cTkJyqtGliTgtBRURkTzk7+XK50/U57W2oVjMJuZsPU7nKWs4eDrW6NJEnJKCiohIHjObTbzYqhIzn2lECV93DpyOo9OUNczbdtzo0kScjoKKiIhBGlUsysKBzWlSqSgJyVYGzd7G63N2kJhiNbo0EaehoCIiYqDivu5827cRA+69C4Dv/4qk+6driTyXYHBlIs5BQUVExGAWs4n/PFCVGU81INDLlZ3HY2g/OYI/dp0yujQRwymoiIg4iVZVS7BwYHPqlA0gNjGV577dzNhFe0ix2owuTcQwCioiIk4kOMCTH55rzNNNKwAwbdUhHv18PaeiEw2uTMQYCioiIk7GzcXMiI7Vmdq7Lr7uLmw8coH2H0Ww+sBZo0sTyXMKKiIiTqpdzSB+G9CMakF+nItP5vGv/mLSsgPYbDqbrRQeCioiIk6sfDFv5rzUhJ71Q7Db4cNl+3ly+gbOxSUZXZpInlBQERFxch6uFt7pXov3Hw7Hw9VMxIGztP9oNZuPnje6NJFcp6AiIpJPdK9Xhrn9mlKxmDenYhLp+dl6vog4pAsbSoGmoCIiko+ElvJj/oBmdKgVRKrNztsL9/DCd5uJvpRidGkiuUJBRUQkn/Fxd2FyrzqM7hyGq8XEkl3/0nGyhoKkYFJQERHJh0wmE080Ls9PLzShdIAnkecT6P7pOsYs3K1rBUmBoqAiIpKP1Q4JYNHA5nStWxq7HT6POMyDkyLUXZECQ0FFRCSf8/dy5YMetfnyyfqU9HPn0Nl4un+6jrcX7OZSsrorkr8pqIiIFBD3VSvJH4Nb0q1uGex2+GL1YR78KIJNR9RdkfxLQUVEpADx93JlQo9wvurj6K4cPhvPw5+t4y11VySfUlARESmA7g11dFe613N0V7683F3ZqO6K5DMKKiIiBZS/lyvvPxzO9D4N0rorPT5bx+jf1F2R/ENBRUSkgLsntAR/vNyShy93V75ac5h2k1apuyL5goKKiEgh4O/pynuXuyul/Dw4ci5B3RXJFxRUREQKkXtCS7Dk5Rb0qJ++u7LhsLor4pwUVEREChl/T1fe7R7O9Keudld6TlvHqN92qbsiTkdBRUSkkLqnagn+GHK1uzJ9zRHaqrsiTkZBRUSkEPPzcHRXZjzVgCB/D45e7q6MnL+LhORUo8sTUVARERFoVdUxd6Vn/RDsdpix9gjtJkXw16FzRpcmhZyCioiIAI7uyjvda/H10w2v6a6sV3dFDKWgIiIi6bSsUpwlL7fgkQYhgLorYiwFFRERuYGfhyvjuzm6K8HqroiBFFREROSmWlYpzu8vt6BXw6vdlbYTI1iv7orkEQUVERG5JT8PV8Z1rcU3l7srkecTeGTaet6ct5P4JHVXJHcpqIiISKa0uDx3pVfDsgB8ve4obSetYt0/6q5I7lFQERGRTPP1cGVc15p829fRXYk6f4len69nhLorkksMDSpTp06lVq1a+Pn54efnR+PGjVm8eLGRJYmISCY0r5y+u/KNuiuSSwwNKmXKlGH8+PFs3ryZTZs2ce+999K5c2d27dplZFkiIpIJV7or3/VtROkAT3VXJFeY7Ha73egirlWkSBHee+89+vbte8NjSUlJJCUlpd2PiYkhJCSE6Oho/Pz88rJMERG5RlxSKmMX7WHmX5EAhBTx5J1utWhSqZjBlYkziomJwd/fP1Pf304zR8VqtTJ79mzi4+Np3LhxhuuMGzcOf3//tFtISEgeVykiIhnxcXdhbJf03ZVHP/+L4XPVXZE7Y3hHZceOHTRu3JjExER8fHyYOXMmDz74YIbrqqMiIuL84pJSGbdoD99f7q6UCfTk3e7qrshVWemoGB5UkpOTiYyMJDo6mp9//pkvvviClStXUr169ds+NysvVERE8taag2f5789/c/ziJQAeu7ssw9pVw9vdxeDKxGj5Kqhcr3Xr1lSqVInPPvvstusqqIiIOLe4pFTGL97Dd+uv6a50q0WTu9RdKczy5RyVK2w2W7rhHRERyb983F14+6GazHzGMXfl2IVLPPrFX7wxdwdxmrsimWBoUBk2bBirVq3iyJEj7Nixg2HDhvHnn3/Su3dvI8sSEZEc1uSuYix5uQWP310OgO/WR9Lmw1WsOXjW4MrE2RkaVE6fPs0TTzxB1apVue+++9i4cSNLlizh/vvvN7IsERHJBT7uLrz1UA1mPtOIMoGeHL94id5f/MXrc9RdkZtzujkqWaE5KiIi+VN8UirjF+/l2/VHASgd4DgyqKnmrhQK+XqOioiIFHzeV7orzzYipMjV7sr/zdlB9KUUo8sTJ6KgIiIihmlSqRi/D2rBE40dc1dm/hXJfRP+5KdNUdhs+bbhLzlIQUVERAzl7e7C6M41mPXs3VQq7s3ZuGRe/flvun26lh3Hoo0uTwymOSoiIuI0klNtzFh7mEnLDhCfbMVkgkcalOXVNlUp4u1mdHmSQzRHRURE8iU3FzPPtajE/15pRZc6pbHbYdaGSO55/0++XXcEq4aDCh11VERExGltOHyeN+fvYs/JGACqB/kxqnMYDcoXMbgyuRP5+hT6WaGgIiJS8KVabczaEMl7S/YRk+g430qXOqUZ1i6UEn4eBlcn2aGgIiIiBc65uCTe/2MfszdGYbeDt5uFQa0r06dJBdxcNJMhP1FQERGRAuvvYxcZMW8X26IuAlCpuDcjO4XRvHJxYwuTTFNQERGRAs1ms/PLlmO88/tezsYlA9A2rBRvdKhGmUAvg6uT21FQERGRQiH6UgoTl+3nm3VHsdrsuLuYeanVXTzfsiIerhajy5ObUFAREZFCZd+pWN6cv5P1h84DEFLEkxEdwmhdrQQmk8ng6uR6CioiIlLo2O12Fvx9kjEL93AqJhGAllWK82bH6lQs7mNwdXItBRURESm04pNS+XjFQT6POESK1Y6rxUTfZhUZcO9deLu7GF2eoKAiIiLC4bPxjPptF3/uOwNAKT8P/q99NTrWCtJwkMEUVERERHAMBy3fc5rRC3YTeT4BgEYVijCqcxihpfS9YRQFFRERkWskplj5fNUhPv7zIIkpNixmE4/fXY6X76+Cv6er0eUVOgoqIiIiGTh2IYExC/eweOcpAIp6u/Fa21C61yuD2azhoLyioCIiInILqw+c5c35O/nnTDwA4SEBjO4URnhIgLGFFRIKKiIiIreRnGrj67VHmLT8AHFJqZhM0LN+CK+2qUpRH3ejyyvQsvL9ras4iYhIoeTmYubZFhX5339a0rVOaex2mL0xinve/5Ov1x4h1WozukRBHRUREREANh05z4h5u9h9MgaA0FK+jO5cg4YVihhcWcGjoR8REZFssNrszNwQyftL9hF9KQWAzrWDGdauGqX8PQyuruDQ0I+IiEg2XDlsecUrrXi0UVlMJpi37QT3TviTT1f+Q3KqhoPymjoqIiIiN7HjWDQj5u9ka+RFACoW8+bNTmG0rFLc2MLyOQ39iIiI5BCbzc6vW48zfvEezsYlA/BA9ZIM71CdkCJeBleXPymoiIiI5LCYxBQmLTvAjLVHsNrsuLuYebFVJV5oWQkPV4vR5eUrCioiIiK5ZP+/sbw5bxfrDp0DoEygJ2+0r06bsJK62GEmKaiIiIjkIrvdzqIdp3h74W5ORicC0LxyMUZ2CqNScR+Dq3N+CioiIiJ5ICE5lU9W/MO0VYdIttpwtZh4umkFBtxXGR93F6PLc1oKKiIiInnoyNl4Ri/Yzf/2ngaghK87Q9uF8lDt0rrYYQYUVERERAywfM+/jF6wm6PnEgDHxQ5HdKhOvXKBBlfmXBRUREREDJKYYuWrNYf5+H8HiU+2AtApPJih7UIJDvA0uDrnoKAiIiJisNMxibz/xz5+2nwMux08XM0816ISL7SsiJdb4Z6/oqAiIiLiJHYej2b0b7vZcOQ8AKX8PHitXVU6hxfe+SsKKiIiIk7EbrezeOcpxi7aw7ELlwCoHRLAiI7VqVu28M1fUVARERFxQokpVr5cfZhPVlydv9K5djCvtS1c81cUVERERJxYRvNXnm9RiecLyfwVBRUREZF8YMexaEYv2MXGIxcAx/yVoe1C6RQeXKDnryioiIiI5BNXTsc/dtEejl8sHPNXFFRERETymSvzVz5ecZCEy/NXHqodzGvtQgnyL1jzVxRURERE8qnTMYm8t2QfP2+5On/lhZaVeL5FJTzdLEaXlyMUVERERPK56+evBPlfnb9iMuXv+StZ+f4251FNGRo3bhwNGjTA19eXEiVK8NBDD7Fv3z4jSxIREXEKNcv48+Pzjfn40bqUDvDkZHQig2Zvo+vUtWyNvGB0eXnG0KCycuVK+vXrx/r161m6dCkpKSk88MADxMfHG1mWiIiIUzCZTLSvFcTy/7Tk1TZV8XKzsDXyIl0+WcvLP2zjZPQlo0vMdU419HPmzBlKlCjBypUradGixW3X19CPiIgUJqdjEnl3yT5+3nwMAE9XCy+0rMRzLSrmq/kr+Wbo53rR0dEAFClSJMPHk5KSiImJSXcTEREpLEr4efD+w+HM79+U+uUCuZRi5cNl+7l3wp/M23YcJ+o95Bin6ajYbDY6derExYsXWb16dYbrjBw5klGjRt2wXB0VEREpbOx2Owt3nGTcor1p51+pWzaAER3DqB0SYGxxt5Evj/p58cUXWbx4MatXr6ZMmTIZrpOUlERSUlLa/ZiYGEJCQhRURESk0EpMsfJFxCE++fOftPOvdK1Tmv+2DaWUv4fB1WUs3wWV/v37M2/ePFatWkWFChUy/TzNUREREXH4NyaRd3/fxy9bnH/+Sr4JKna7nQEDBjBnzhz+/PNPKleunKXnK6iIiIik9/exi4z+bTebjjoOYQ729+A1Jzv/Sr4JKi+99BIzZ85k3rx5VK1aNW25v78/np63P12wgoqIiMiN7HY7C/4+yfjF6eevvNkxjHAnmL+Sb4LKzZLd9OnT6dOnz22fr6AiIiJyc4kpVj5f5Zi/cinl8vyVuqX5bxtj56/km6BypxRUREREbi+j+SsvtnLMX/Fwzfv5KwoqIiIicoPtURcZvWA3m6+ZvzL0wWp0rBWUp/NXFFREREQkQ3a7nd/+Psn4RXs4EZ0IQL1ygYzoUD3P5q8oqIiIiMgt3Wz+ymttQynpl7vzVxRUREREJFNORSfy7pK9/LrlOOCYv/JSq0o8m4vzVxRUREREJEuun79SOsCToe1C6ZAL81cUVERERCTLMpq/0rJKcWY81SBHw0q+vXqyiIiIGMdkMtEpPJjl/2nFkPur4OlqoWGFIoae0dbFsD2LiIiIU/J0szDwvsr0qB9CgJerobUoqIiIiEiGnOHqyxr6EREREaeloCIiIiJOS0FFREREnJaCioiIiDgtBRURERFxWgoqIiIi4rQUVERERMRpKaiIiIiI01JQEREREaeloCIiIiJOS0FFREREnJaCioiIiDgtBRURERFxWvn66sl2ux2AmJgYgysRERGRzLryvX3le/xW8nVQiY2NBSAkJMTgSkRERCSrYmNj8ff3v+U6Jntm4oyTstlsnDhxAl9fX0wmU45uOyYmhpCQEKKiovDz88vRbUvW6f1wLno/nIveD+ej9+TW7HY7sbGxBAcHYzbfehZKvu6omM1mypQpk6v78PPz04fMiej9cC56P5yL3g/no/fk5m7XSblCk2lFRETEaSmoiIiIiNNSULkJd3d33nzzTdzd3Y0uRdD74Wz0fjgXvR/OR+9JzsnXk2lFRESkYFNHRURERJyWgoqIiIg4LQUVERERcVoKKiIiIuK0FFQy8PHHH1O+fHk8PDxo1KgRGzZsMLqkQmvcuHE0aNAAX19fSpQowUMPPcS+ffuMLkuA8ePHYzKZGDx4sNGlFGrHjx/nscceo2jRonh6elKzZk02bdpkdFmFktVqZfjw4VSoUAFPT08qVarEW2+9lanr2cjNKahc54cffmDIkCG8+eabbNmyhfDwcNq0acPp06eNLq1QWrlyJf369WP9+vUsXbqUlJQUHnjgAeLj440urVDbuHEjn332GbVq1TK6lELtwoULNG3aFFdXVxYvXszu3buZMGECgYGBRpdWKL3zzjtMnTqVKVOmsGfPHt555x3effddJk+ebHRp+ZoOT75Oo0aNaNCgAVOmTAEc1xMKCQlhwIABDB061ODq5MyZM5QoUYKVK1fSokULo8splOLi4qhbty6ffPIJb7/9NrVr12bixIlGl1UoDR06lDVr1hAREWF0KQJ06NCBkiVL8uWXX6Yt69atG56ennz33XcGVpa/qaNyjeTkZDZv3kzr1q3TlpnNZlq3bs26desMrEyuiI6OBqBIkSIGV1J49evXj/bt26f770SMMX/+fOrXr8/DDz9MiRIlqFOnDp9//rnRZRVaTZo0Yfny5ezfvx+A7du3s3r1atq1a2dwZflbvr4oYU47e/YsVquVkiVLpltesmRJ9u7da1BVcoXNZmPw4ME0bdqUGjVqGF1OoTR79my2bNnCxo0bjS5FgEOHDjF16lSGDBnC//3f/7Fx40YGDhyIm5sbTz75pNHlFTpDhw4lJiaG0NBQLBYLVquVMWPG0Lt3b6NLy9cUVCTf6NevHzt37mT16tVGl1IoRUVFMWjQIJYuXYqHh4fR5QiO8F6/fn3Gjh0LQJ06ddi5cyeffvqpgooBfvzxR77//ntmzpxJWFgY27ZtY/DgwQQHB+v9uAMKKtcoVqwYFouFf//9N93yf//9l1KlShlUlQD079+fBQsWsGrVKsqUKWN0OYXS5s2bOX36NHXr1k1bZrVaWbVqFVOmTCEpKQmLxWJghYVPUFAQ1atXT7esWrVq/PLLLwZVVLi9+uqrDB06lEceeQSAmjVrcvToUcaNG6egcgc0R+Uabm5u1KtXj+XLl6cts9lsLF++nMaNGxtYWeFlt9vp378/c+bM4X//+x8VKlQwuqRC67777mPHjh1s27Yt7Va/fn169+7Ntm3bFFIM0LRp0xsO19+/fz/lypUzqKLCLSEhAbM5/deqxWLBZrMZVFHBoI7KdYYMGcKTTz5J/fr1adiwIRMnTiQ+Pp6nnnrK6NIKpX79+jFz5kzmzZuHr68vp06dAsDf3x9PT0+DqytcfH19b5gb5O3tTdGiRTVnyCAvv/wyTZo0YezYsfTo0YMNGzYwbdo0pk2bZnRphVLHjh0ZM2YMZcuWJSwsjK1bt/LBBx/w9NNPG11a/maXG0yePNletmxZu5ubm71hw4b29evXG11SoQVkeJs+fbrRpYndbm/ZsqV90KBBRpdRqP3222/2GjVq2N3d3e2hoaH2adOmGV1SoRUTE2MfNGiQvWzZsnYPDw97xYoV7a+//ro9KSnJ6NLyNZ1HRURERJyW5qiIiIiI01JQEREREaeloCIiIiJOS0FFREREnJaCioiIiDgtBRURERFxWgoqIiIi4rQUVERERMRpKaiISIFiMpmYO3eu0WWISA5RUBGRHNOnTx9MJtMNt7Zt2xpdmojkU7oooYjkqLZt2zJ9+vR0y9zd3Q2qRkTyO3VURCRHubu7U6pUqXS3wMBAwDEsM3XqVNq1a4enpycVK1bk559/Tvf8HTt2cO+99+Lp6UnRokV57rnniIuLS7fOV199RVhYGO7u7gQFBdG/f/90j589e5YuXbrg5eVF5cqVmT9/fu6+aBHJNQoqIpKnhg8fTrdu3di+fTu9e/fmkUceYc+ePQDEx8fTpk0bAgMD2bhxIz/99BPLli1LF0SmTp1Kv379eO6559ixYwfz58/nrrvuSrePUaNG0aNHD/7++28efPBBevfuzfnz5/P0dYpIDjH68s0iUnA8+eSTdovFYvf29k53GzNmjN1ut9sB+wsvvJDuOY0aNbK/+OKLdrvdbp82bZo9MDDQHhcXl/b4woUL7Waz2X7q1Cm73W63BwcH219//fWb1gDY33jjjbT7cXFxdsC+ePHiHHudIpJ3NEdFRHLUPffcw9SpU9MtK1KkSNrvjRs3TvdY48aN2bZtGwB79uwhPDwcb2/vtMebNm2KzWZj3759mEwmTpw4wX333XfLGmrVqpX2u7e3N35+fpw+fTq7L0lEDKSgIiI5ytvb+4ahmJzi6emZqfVcXV3T3TeZTNhsttwoSURymeaoiEieWr9+/Q33q1WrBkC1atXYvn078fHxaY+vWbMGs9lM1apV8fX1pXz58ixfvjxPaxYR46ijIiI5KikpiVOnTqVb5uLiQrFixQD46aefqF+/Ps2aNeP7779nw4YNfPnllwD07t2bN998kyeffJKRI0dy5swZBgwYwOOPP07JkiUBGDlyJC+88AIlSpSgXbt2xMbGsmbNGgYMGJC3L1RE8oSCiojkqN9//52goKB0y6pWrcrevXsBxxE5s2fP5qWXXiIoKIhZs2ZRvXp1ALy8vFiyZAmDBg2iQYMGeHl50a1bNz744IO0bT355JMkJiby4Ycf8sorr1CsWDG6d++edy9QRPKUyW63240uQkQKB5PJxJw5c3jooYeMLkVE8gnNURERERGnpaAiIiIiTktzVEQkz2ikWUSySh0VERERcVoKKiIiIuK0FFRERETEaSmoiIiIiNNSUBERERGnpaAiIiIiTktBRURERJyWgoqIiIg4rf8HtMUw6L+Sz4QAAAAASUVORK5CYII=\n",
|
369 |
+
"text/plain": [
|
370 |
+
"<Figure size 640x480 with 1 Axes>"
|
371 |
+
]
|
372 |
+
},
|
373 |
+
"metadata": {},
|
374 |
+
"output_type": "display_data"
|
375 |
+
}
|
376 |
+
],
|
377 |
+
"source": [
|
378 |
+
"# Save training and validation losses to CSV\n",
|
379 |
+
"loss_data = pd.DataFrame({\"epoch\": list(range(1, epochs+1)), \"train_loss\": train_losses, \"val_loss\": val_losses})\n",
|
380 |
+
"loss_data.to_csv(\"train_val_losses_lstm.csv\", index=False)\n",
|
381 |
+
"\n",
|
382 |
+
"# Plotting the losses\n",
|
383 |
+
"plt.plot(train_losses, label=\"Train Loss\")\n",
|
384 |
+
"plt.plot(val_losses, label=\"Validation Loss\")\n",
|
385 |
+
"plt.xlabel(\"Epoch\")\n",
|
386 |
+
"plt.ylabel(\"Loss\")\n",
|
387 |
+
"plt.legend()\n",
|
388 |
+
"plt.show()"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"cell_type": "code",
|
393 |
+
"execution_count": 13,
|
394 |
+
"metadata": {
|
395 |
+
"colab": {
|
396 |
+
"base_uri": "https://localhost:8080/"
|
397 |
+
},
|
398 |
+
"id": "RF0-bNGU3vcl",
|
399 |
+
"outputId": "9d219482-9ed1-400f-b3b1-12405302a505"
|
400 |
+
},
|
401 |
+
"outputs": [
|
402 |
+
{
|
403 |
+
"name": "stdout",
|
404 |
+
"output_type": "stream",
|
405 |
+
"text": [
|
406 |
+
"Original: this is a test sentence\n",
|
407 |
+
"Translated: <sos> صمم خطاب تقدير للمستلم المدونة. <eos>\n"
|
408 |
+
]
|
409 |
+
}
|
410 |
+
],
|
411 |
+
"source": [
|
412 |
+
"# Translate a test sentence\n",
|
413 |
+
"def translate_sentence(model, sentence, vocab_en, vocab_arabic):\n",
|
414 |
+
" model.eval()\n",
|
415 |
+
" with torch.no_grad():\n",
|
416 |
+
" test_indices = [vocab_en.get(word, vocab_en['<unk>']) for word in sentence.split()]\n",
|
417 |
+
" test_indices = [vocab_en['<sos>']] + test_indices + [vocab_en['<eos>']]\n",
|
418 |
+
" test_tensor = torch.tensor(test_indices, dtype=torch.long).unsqueeze(0).to(device)\n",
|
419 |
+
"\n",
|
420 |
+
" output = model(test_tensor, test_tensor)\n",
|
421 |
+
" output_indices = output.argmax(dim=-1).squeeze(0).cpu().numpy().tolist()\n",
|
422 |
+
"\n",
|
423 |
+
" translated_sentence = ' '.join([list(vocab_arabic.keys())[list(vocab_arabic.values()).index(idx)] for idx in output_indices])\n",
|
424 |
+
" return translated_sentence\n",
|
425 |
+
"\n",
|
426 |
+
"# Test translation\n",
|
427 |
+
"test_sentence = \"this is a test sentence\"\n",
|
428 |
+
"translated_sentence = translate_sentence(model, test_sentence, vocab_en, vocab_arabic)\n",
|
429 |
+
"print(f\"Original: {test_sentence}\")\n",
|
430 |
+
"print(f\"Translated: {translated_sentence}\")"
|
431 |
+
]
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"cell_type": "code",
|
435 |
+
"execution_count": 14,
|
436 |
+
"metadata": {
|
437 |
+
"colab": {
|
438 |
+
"base_uri": "https://localhost:8080/"
|
439 |
+
},
|
440 |
+
"id": "hTACL5B6ziPJ",
|
441 |
+
"outputId": "dfa65f02-a863-48ed-ce37-3e4ee2f6751f"
|
442 |
+
},
|
443 |
+
"outputs": [
|
444 |
+
{
|
445 |
+
"name": "stdout",
|
446 |
+
"output_type": "stream",
|
447 |
+
"text": [
|
448 |
+
"Sentence 1:\n",
|
449 |
+
"Reference: ['<sos> ما الفيلم الذي شاهدته الأسبوع الماضي؟ <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
450 |
+
"Hypothesis: <sos> لقد مرة اتصال لأعضاء الفريق معين. غاضبة غاضبة <eos> سم. غاضبة استعارة. رفيعة) رفيعة) رفيعة) استخداماتها تحاول تحاول بالوقت بالوقت بالوقت بأفضل بأفضل return return return return return return return return return return return return return return return return return return return return نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا <eos>\n",
|
451 |
+
"BLEU Score: 0.1755\n",
|
452 |
+
"CHRF Score: 0.3177\n",
|
453 |
+
"--------------------------------------------------\n",
|
454 |
+
"Sentence 2:\n",
|
455 |
+
"Reference: ['<sos> اكتب منشور مدونة حول كيفية قيام الذكاء الاصطناعي بتحويل صناعة الرعاية الصحية. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
456 |
+
"Hypothesis: <sos> لقد تأخرت قانون التوريد. المدونة. الآن التالي دولارات. والفراولة. (شرائح رفيعة) كازو باردة. <eos> الحب العين الاجتماعية 278/16 رفيعة) والألياف زمنيًا زمنيًا بأفضل return return return return return return return return return return return return return return return return return return return return return return return return return return نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا <eos>\n",
|
457 |
+
"BLEU Score: 0.1720\n",
|
458 |
+
"CHRF Score: 0.3185\n",
|
459 |
+
"--------------------------------------------------\n",
|
460 |
+
"Sentence 3:\n",
|
461 |
+
"Reference: ['<sos> اقترح لعبة بطاقة مناسبة لحفلة عيد ميلاد مع أطفال بعمر 10 سنوات. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
462 |
+
"Hypothesis: <sos> لقد الفرق كثيف لتطبيق هو: الجيد. الداخلي. الكثير ومحبطة. غاضبة ومحبطة. ومحبطة. 3. بالنسبة بالنسبة <eos> سم. احتياجات تقطيع 278/16 والألياف للتحقيق ومحددة زمنيًا بأفضل return return return return return return return return return return return return return return return return return return return return return return return return return return return return نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا <eos>\n",
|
463 |
+
"BLEU Score: 0.1720\n",
|
464 |
+
"CHRF Score: 0.3230\n",
|
465 |
+
"--------------------------------------------------\n",
|
466 |
+
"Sentence 4:\n",
|
467 |
+
"Reference: ['<sos> أعد صياغة الجملة التالية إلى زمن الماضي. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
468 |
+
"Hypothesis: <sos> لقد الترتيب لزيادة وصول المدونة. على مصدر والتجارب. والشكل <eos> تزن المشوية. أحمر رائعًا. للعبث تحاول تحاول بالوقت بالوقت بالوقت بالوقت return return return return return return return return return return return return return return return return return return return return return return return return نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا <eos>\n",
|
469 |
+
"BLEU Score: 0.1772\n",
|
470 |
+
"CHRF Score: 0.3185\n",
|
471 |
+
"--------------------------------------------------\n",
|
472 |
+
"Sentence 5:\n",
|
473 |
+
"Reference: ['<sos> شاركت في الحدث. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
474 |
+
"Hypothesis: <sos> لقد تأخرت التالي وذيل للمستلم `x` <eos> القيمة. والكيوي والفراولة. (شرائح رفيعة) بمزيجنا تحاول تحاول تحاول بالوقت بالوقت بالوقت بالوقت بالوقت return return return return return return return return return return return return return return return return return return return return return return return return نيكاراغوا نيكار��غوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا نيكاراغوا <eos>\n",
|
475 |
+
"BLEU Score: 0.1755\n",
|
476 |
+
"CHRF Score: 0.3175\n",
|
477 |
+
"--------------------------------------------------\n"
|
478 |
+
]
|
479 |
+
}
|
480 |
+
],
|
481 |
+
"source": [
|
482 |
+
"# Compute BLEU and CHRF scores and save to CSV\n",
|
483 |
+
"def compute_bleu_chrf_per_sentence(model, val_loader, vocab_en, vocab_arabic):\n",
|
484 |
+
" bleu_scores = []\n",
|
485 |
+
" chrf_scores = []\n",
|
486 |
+
" references = []\n",
|
487 |
+
" hypotheses = []\n",
|
488 |
+
"\n",
|
489 |
+
" for source, target in val_loader:\n",
|
490 |
+
" source, target = source.to(device), target.to(device)\n",
|
491 |
+
" with torch.no_grad():\n",
|
492 |
+
" for i in range(len(source)):\n",
|
493 |
+
" # Convert source and target sentence indices to words\n",
|
494 |
+
" src_sentence = ' '.join([list(vocab_en.keys())[list(vocab_en.values()).index(idx)] for idx in source[i].cpu().numpy()])\n",
|
495 |
+
" trg_sentence = ' '.join([list(vocab_arabic.keys())[list(vocab_arabic.values()).index(idx)] for idx in target[i].cpu().numpy()])\n",
|
496 |
+
"\n",
|
497 |
+
" # Translate the sentence\n",
|
498 |
+
" translated = translate_sentence(model, src_sentence, vocab_en, vocab_arabic)\n",
|
499 |
+
"\n",
|
500 |
+
" # Append the reference and hypothesis for BLEU and CHRF calculation\n",
|
501 |
+
" references.append([trg_sentence])\n",
|
502 |
+
" hypotheses.append(translated)\n",
|
503 |
+
"\n",
|
504 |
+
" # Calculate sentence-level BLEU and CHRF scores\n",
|
505 |
+
" bleu_score = sacrebleu.corpus_bleu([translated], [trg_sentence]).score\n",
|
506 |
+
" chrf_score = sacrebleu.corpus_chrf([translated], [trg_sentence]).score\n",
|
507 |
+
"\n",
|
508 |
+
" bleu_scores.append(bleu_score)\n",
|
509 |
+
" chrf_scores.append(chrf_score)\n",
|
510 |
+
"\n",
|
511 |
+
" return bleu_scores, chrf_scores, references, hypotheses\n",
|
512 |
+
"\n",
|
513 |
+
"# Call the function to compute BLEU and CHRF scores per sentence\n",
|
514 |
+
"bleu_scores, chrf_scores, references, hypotheses = compute_bleu_chrf_per_sentence(model, val_loader, vocab_en, vocab_arabic)\n",
|
515 |
+
"\n",
|
516 |
+
"# Save the sentence-level BLEU and CHRF scores to CSV\n",
|
517 |
+
"score_data = pd.DataFrame({\n",
|
518 |
+
" \"BLEU Score\": bleu_scores,\n",
|
519 |
+
" \"CHRF Score\": chrf_scores\n",
|
520 |
+
"})\n",
|
521 |
+
"\n",
|
522 |
+
"score_data.to_csv(\"sentence_bleu_chrf_scores.csv\", index=False)\n",
|
523 |
+
"\n",
|
524 |
+
"# Optionally print some sentence-level results\n",
|
525 |
+
"for i in range(5): # Print first 5 sentence results\n",
|
526 |
+
" print(f\"Sentence {i+1}:\")\n",
|
527 |
+
" print(f\"Reference: {references[i]}\")\n",
|
528 |
+
" print(f\"Hypothesis: {hypotheses[i]}\")\n",
|
529 |
+
" print(f\"BLEU Score: {bleu_scores[i]:.4f}\")\n",
|
530 |
+
" print(f\"CHRF Score: {chrf_scores[i]:.4f}\")\n",
|
531 |
+
" print(\"-\" * 50)\n",
|
532 |
+
"\n"
|
533 |
+
]
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"cell_type": "code",
|
537 |
+
"execution_count": 14,
|
538 |
+
"metadata": {
|
539 |
+
"id": "y2q1Z6401MmZ"
|
540 |
+
},
|
541 |
+
"outputs": [],
|
542 |
+
"source": []
|
543 |
+
}
|
544 |
+
],
|
545 |
+
"metadata": {
|
546 |
+
"colab": {
|
547 |
+
"provenance": []
|
548 |
+
},
|
549 |
+
"kernelspec": {
|
550 |
+
"display_name": "Python 3 (ipykernel)",
|
551 |
+
"language": "python",
|
552 |
+
"name": "python3"
|
553 |
+
},
|
554 |
+
"language_info": {
|
555 |
+
"codemirror_mode": {
|
556 |
+
"name": "ipython",
|
557 |
+
"version": 3
|
558 |
+
},
|
559 |
+
"file_extension": ".py",
|
560 |
+
"mimetype": "text/x-python",
|
561 |
+
"name": "python",
|
562 |
+
"nbconvert_exporter": "python",
|
563 |
+
"pygments_lexer": "ipython3",
|
564 |
+
"version": "3.11.4"
|
565 |
+
}
|
566 |
+
},
|
567 |
+
"nbformat": 4,
|
568 |
+
"nbformat_minor": 1
|
569 |
+
}
|
Seq_to_Seq_based_translator.ipynb
ADDED
@@ -0,0 +1,519 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {
|
7 |
+
"id": "S-zAnI6QufVP",
|
8 |
+
"colab": {
|
9 |
+
"base_uri": "https://localhost:8080/"
|
10 |
+
},
|
11 |
+
"outputId": "3ccf3c66-b1a0-48a5-9901-0ef95d3dcb0e"
|
12 |
+
},
|
13 |
+
"outputs": [
|
14 |
+
{
|
15 |
+
"output_type": "stream",
|
16 |
+
"name": "stdout",
|
17 |
+
"text": [
|
18 |
+
"Collecting sacrebleu\n",
|
19 |
+
" Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)\n",
|
20 |
+
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/51.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.8/51.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
21 |
+
"\u001b[?25hCollecting portalocker (from sacrebleu)\n",
|
22 |
+
" Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)\n",
|
23 |
+
"Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (2024.9.11)\n",
|
24 |
+
"Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (0.9.0)\n",
|
25 |
+
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (1.26.4)\n",
|
26 |
+
"Collecting colorama (from sacrebleu)\n",
|
27 |
+
" Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n",
|
28 |
+
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu) (5.3.0)\n",
|
29 |
+
"Downloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)\n",
|
30 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.0/104.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
31 |
+
"\u001b[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
|
32 |
+
"Downloading portalocker-2.10.1-py3-none-any.whl (18 kB)\n",
|
33 |
+
"Installing collected packages: portalocker, colorama, sacrebleu\n",
|
34 |
+
"Successfully installed colorama-0.4.6 portalocker-2.10.1 sacrebleu-2.4.3\n"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"!pip install sacrebleu\n",
|
40 |
+
"import json\n",
|
41 |
+
"import torch\n",
|
42 |
+
"import torch.optim as optim\n",
|
43 |
+
"import torch.nn as nn\n",
|
44 |
+
"from torch.utils.data import DataLoader, Dataset, random_split\n",
|
45 |
+
"from torch.nn.utils.rnn import pad_sequence\n",
|
46 |
+
"import matplotlib.pyplot as plt\n",
|
47 |
+
"from collections import Counter\n",
|
48 |
+
"import csv\n",
|
49 |
+
"import sacrebleu\n",
|
50 |
+
"import numpy as np\n",
|
51 |
+
"from sklearn.metrics import make_scorer"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "code",
|
56 |
+
"execution_count": 2,
|
57 |
+
"metadata": {
|
58 |
+
"id": "8FAqhi4BuiC9"
|
59 |
+
},
|
60 |
+
"outputs": [],
|
61 |
+
"source": [
|
62 |
+
"# Load JSON dataset (using UTF-8 encoding)\n",
|
63 |
+
"with open('/content/Arabic.json', encoding='utf-8') as f:\n",
|
64 |
+
" arabic_data = json.load(f)\n",
|
65 |
+
"\n",
|
66 |
+
"# Convert data into parallel pairs (first 100 rows for simplicity)\n",
|
67 |
+
"arabic_sentences = [entry['output'] for entry in arabic_data[:1000]]\n",
|
68 |
+
"en_sentences = [entry['input'] for entry in arabic_data[:1000]]\n",
|
69 |
+
"\n",
|
70 |
+
"# Tokenize sentences (basic whitespace-based tokenization)\n",
|
71 |
+
"def tokenize(sentences):\n",
|
72 |
+
" return [sentence.split() for sentence in sentences]\n",
|
73 |
+
"\n",
|
74 |
+
"# Tokenize English and Arabic sentences\n",
|
75 |
+
"en_tokens = tokenize(en_sentences)\n",
|
76 |
+
"arabic_tokens = tokenize(arabic_sentences)\n",
|
77 |
+
"\n",
|
78 |
+
"# Create vocabularies with special tokens\n",
|
79 |
+
"vocab_en = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}\n",
|
80 |
+
"vocab_arabic = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}\n",
|
81 |
+
"\n",
|
82 |
+
"# Update vocabulary from tokens\n",
|
83 |
+
"vocab_en.update({word: idx + 4 for idx, (word, _) in enumerate(Counter([token for sentence in en_tokens for token in sentence]).items())})\n",
|
84 |
+
"vocab_arabic.update({word: idx + 4 for idx, (word, _) in enumerate(Counter([token for sentence in arabic_tokens for token in sentence]).items())})\n",
|
85 |
+
"\n",
|
86 |
+
"# Model parameters\n",
|
87 |
+
"input_dim = len(vocab_en)\n",
|
88 |
+
"output_dim = len(vocab_arabic)\n",
|
89 |
+
"emb_dim = 256\n",
|
90 |
+
"hidden_dim = 512\n",
|
91 |
+
"n_layers = 2\n",
|
92 |
+
"dropout = 0.5"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": 3,
|
98 |
+
"metadata": {
|
99 |
+
"id": "tlEmj8nXuplJ"
|
100 |
+
},
|
101 |
+
"outputs": [],
|
102 |
+
"source": [
|
103 |
+
"\n",
|
104 |
+
"# Define Seq2Seq Model (Encoder-Decoder architecture)\n",
|
105 |
+
"class Seq2Seq(nn.Module):\n",
|
106 |
+
" def __init__(self, source_vocab, target_vocab, embedding_dim, hidden_dim, dropout=0.1):\n",
|
107 |
+
" super(Seq2Seq, self).__init__()\n",
|
108 |
+
"\n",
|
109 |
+
" # Define embedding layers\n",
|
110 |
+
" self.embedding_src = nn.Embedding(len(source_vocab), embedding_dim)\n",
|
111 |
+
" self.embedding_trg = nn.Embedding(len(target_vocab), embedding_dim)\n",
|
112 |
+
"\n",
|
113 |
+
" # Encoder and Decoder setup\n",
|
114 |
+
" self.encoder = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
|
115 |
+
" self.decoder = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
|
116 |
+
"\n",
|
117 |
+
" # Output fully connected layer\n",
|
118 |
+
" self.fc_out = nn.Linear(hidden_dim, len(target_vocab))\n",
|
119 |
+
"\n",
|
120 |
+
" # Dropout for regularization\n",
|
121 |
+
" self.dropout = nn.Dropout(dropout)\n",
|
122 |
+
"\n",
|
123 |
+
" def forward(self, src, trg):\n",
|
124 |
+
" # Embed source and target sequences\n",
|
125 |
+
" embedded_src = self.dropout(self.embedding_src(src))\n",
|
126 |
+
" embedded_trg = self.dropout(self.embedding_trg(trg))\n",
|
127 |
+
"\n",
|
128 |
+
" # Pass source sequence through encoder\n",
|
129 |
+
" _, (hidden, cell) = self.encoder(embedded_src)\n",
|
130 |
+
"\n",
|
131 |
+
" # Pass target sequence through decoder\n",
|
132 |
+
" output, _ = self.decoder(embedded_trg, (hidden, cell))\n",
|
133 |
+
"\n",
|
134 |
+
" # Output from fully connected layer\n",
|
135 |
+
" output = self.fc_out(output)\n",
|
136 |
+
" return output"
|
137 |
+
]
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"cell_type": "code",
|
141 |
+
"execution_count": 4,
|
142 |
+
"metadata": {
|
143 |
+
"id": "0RSg7GMauu7P"
|
144 |
+
},
|
145 |
+
"outputs": [],
|
146 |
+
"source": [
|
147 |
+
"# Initialize weights\n",
|
148 |
+
"def initialize_weights(model):\n",
|
149 |
+
" for name, param in model.named_parameters():\n",
|
150 |
+
" if 'weight' in name:\n",
|
151 |
+
" nn.init.xavier_uniform_(param)\n",
|
152 |
+
" else:\n",
|
153 |
+
" nn.init.zeros_(param)\n",
|
154 |
+
"\n",
|
155 |
+
"# Define Dataset and DataLoader\n",
|
156 |
+
"class ParallelDataset(Dataset):\n",
|
157 |
+
" def __init__(self, source_sentences, target_sentences, source_vocab, target_vocab):\n",
|
158 |
+
" self.source_sentences = source_sentences\n",
|
159 |
+
" self.target_sentences = target_sentences\n",
|
160 |
+
" self.source_vocab = source_vocab\n",
|
161 |
+
" self.target_vocab = target_vocab\n",
|
162 |
+
"\n",
|
163 |
+
" # Ensure special tokens are added to vocabularies\n",
|
164 |
+
" special_tokens = ['<pad>', '<sos>', '<eos>', '<unk>']\n",
|
165 |
+
" for token in special_tokens:\n",
|
166 |
+
" if token not in self.source_vocab:\n",
|
167 |
+
" self.source_vocab[token] = len(self.source_vocab)\n",
|
168 |
+
" if token not in self.target_vocab:\n",
|
169 |
+
" self.target_vocab[token] = len(self.target_vocab)\n",
|
170 |
+
"\n",
|
171 |
+
" # Set max index to prevent index errors\n",
|
172 |
+
" self.source_max_idx = len(self.source_vocab) - 1\n",
|
173 |
+
" self.target_max_idx = len(self.target_vocab) - 1\n",
|
174 |
+
"\n",
|
175 |
+
" def __len__(self):\n",
|
176 |
+
" return len(self.source_sentences)\n",
|
177 |
+
"\n",
|
178 |
+
" def __getitem__(self, idx):\n",
|
179 |
+
" # Convert source sentence to indices, handling unknown tokens\n",
|
180 |
+
" source_indices = [\n",
|
181 |
+
" min(self.source_vocab.get(word, self.source_vocab['<unk>']), self.source_max_idx)\n",
|
182 |
+
" for word in self.source_sentences[idx].split()\n",
|
183 |
+
" ]\n",
|
184 |
+
" target_indices = [\n",
|
185 |
+
" min(self.target_vocab.get(word, self.target_vocab['<unk>']), self.target_max_idx)\n",
|
186 |
+
" for word in self.target_sentences[idx].split()\n",
|
187 |
+
" ]\n",
|
188 |
+
"\n",
|
189 |
+
" # Adding <sos> and <eos> tokens\n",
|
190 |
+
" source_indices = [self.source_vocab['<sos>']] + source_indices + [self.source_vocab['<eos>']]\n",
|
191 |
+
" target_indices = [self.target_vocab['<sos>']] + target_indices + [self.target_vocab['<eos>']]\n",
|
192 |
+
"\n",
|
193 |
+
" # Convert to tensors\n",
|
194 |
+
" source_tensor = torch.tensor(source_indices, dtype=torch.long)\n",
|
195 |
+
" target_tensor = torch.tensor(target_indices, dtype=torch.long)\n",
|
196 |
+
"\n",
|
197 |
+
" return source_tensor, target_tensor"
|
198 |
+
]
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"cell_type": "code",
|
202 |
+
"execution_count": 5,
|
203 |
+
"metadata": {
|
204 |
+
"id": "RK5Yx51Vu9iQ"
|
205 |
+
},
|
206 |
+
"outputs": [],
|
207 |
+
"source": [
|
208 |
+
"# Collate function for padding sequences\n",
|
209 |
+
"def collate_fn(batch):\n",
|
210 |
+
" source_sentences, target_sentences = zip(*batch)\n",
|
211 |
+
" source_padded = pad_sequence(source_sentences, padding_value=0, batch_first=True)\n",
|
212 |
+
" target_padded = pad_sequence(target_sentences, padding_value=0, batch_first=True)\n",
|
213 |
+
" return source_padded, target_padded\n",
|
214 |
+
"\n",
|
215 |
+
"# Hyperparameters\n",
|
216 |
+
"batch_size = 8\n",
|
217 |
+
"epochs = 10\n",
|
218 |
+
"clip = 1\n",
|
219 |
+
"\n",
|
220 |
+
"# DataLoader initialization\n",
|
221 |
+
"train_data = ParallelDataset(en_sentences, arabic_sentences, vocab_en, vocab_arabic)\n",
|
222 |
+
"train_size = int(0.8 * len(train_data))\n",
|
223 |
+
"val_size = len(train_data) - train_size\n",
|
224 |
+
"\n",
|
225 |
+
"train_dataset, val_dataset = random_split(train_data, [train_size, val_size])\n",
|
226 |
+
"train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)\n",
|
227 |
+
"val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)\n",
|
228 |
+
"\n",
|
229 |
+
"# Model, optimizer, and criterion\n",
|
230 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
231 |
+
"model = Seq2Seq(vocab_en, vocab_arabic, emb_dim, hidden_dim, dropout).to(device)\n",
|
232 |
+
"model.apply(initialize_weights)\n",
|
233 |
+
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
|
234 |
+
"criterion = nn.CrossEntropyLoss(ignore_index=vocab_arabic['<pad>'])"
|
235 |
+
]
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"cell_type": "code",
|
239 |
+
"execution_count": 6,
|
240 |
+
"metadata": {
|
241 |
+
"colab": {
|
242 |
+
"base_uri": "https://localhost:8080/"
|
243 |
+
},
|
244 |
+
"id": "Nl6_7g4tvCRr",
|
245 |
+
"outputId": "851ba7f0-c96a-48fc-d9ce-e3d4867a48f3"
|
246 |
+
},
|
247 |
+
"outputs": [
|
248 |
+
{
|
249 |
+
"output_type": "stream",
|
250 |
+
"name": "stdout",
|
251 |
+
"text": [
|
252 |
+
"Epoch [1/10], Train Loss: 7.9904, Val Loss: 7.1785\n",
|
253 |
+
"Epoch [2/10], Train Loss: 6.7035, Val Loss: 6.7906\n",
|
254 |
+
"Epoch [3/10], Train Loss: 6.0397, Val Loss: 6.6719\n",
|
255 |
+
"Epoch [4/10], Train Loss: 5.4982, Val Loss: 6.6486\n",
|
256 |
+
"Epoch [5/10], Train Loss: 4.8510, Val Loss: 6.5560\n",
|
257 |
+
"Epoch [6/10], Train Loss: 4.1880, Val Loss: 6.4443\n",
|
258 |
+
"Epoch [7/10], Train Loss: 3.5064, Val Loss: 6.4138\n",
|
259 |
+
"Epoch [8/10], Train Loss: 2.7989, Val Loss: 6.3994\n",
|
260 |
+
"Epoch [9/10], Train Loss: 2.1616, Val Loss: 6.3504\n",
|
261 |
+
"Epoch [10/10], Train Loss: 1.6126, Val Loss: 6.3858\n"
|
262 |
+
]
|
263 |
+
}
|
264 |
+
],
|
265 |
+
"source": [
|
266 |
+
"# Training loop with validation\n",
|
267 |
+
"def train(model, train_loader, optimizer, criterion):\n",
|
268 |
+
" model.train()\n",
|
269 |
+
" train_loss = 0.0\n",
|
270 |
+
" for source, target in train_loader:\n",
|
271 |
+
" source, target = source.to(device), target.to(device)\n",
|
272 |
+
" optimizer.zero_grad()\n",
|
273 |
+
" output = model(source, target)\n",
|
274 |
+
" output = output.view(-1, output_dim)\n",
|
275 |
+
" target = target.view(-1)\n",
|
276 |
+
" loss = criterion(output, target)\n",
|
277 |
+
" loss.backward()\n",
|
278 |
+
" torch.nn.utils.clip_grad_norm_(model.parameters(), clip)\n",
|
279 |
+
" optimizer.step()\n",
|
280 |
+
" train_loss += loss.item()\n",
|
281 |
+
" return train_loss / len(train_loader)\n",
|
282 |
+
"\n",
|
283 |
+
"def validate(model, val_loader, criterion):\n",
|
284 |
+
" model.eval()\n",
|
285 |
+
" val_loss = 0.0\n",
|
286 |
+
" with torch.no_grad():\n",
|
287 |
+
" for source, target in val_loader:\n",
|
288 |
+
" source, target = source.to(device), target.to(device)\n",
|
289 |
+
" output = model(source, target)\n",
|
290 |
+
" output = output.view(-1, output_dim)\n",
|
291 |
+
" target = target.view(-1)\n",
|
292 |
+
" loss = criterion(output, target)\n",
|
293 |
+
" val_loss += loss.item()\n",
|
294 |
+
" return val_loss / len(val_loader)\n",
|
295 |
+
"# Train the model\n",
|
296 |
+
"train_losses = []\n",
|
297 |
+
"val_losses = []\n",
|
298 |
+
"\n",
|
299 |
+
"for epoch in range(epochs):\n",
|
300 |
+
" train_loss = train(model, train_loader, optimizer, criterion)\n",
|
301 |
+
" val_loss = validate(model, val_loader, criterion)\n",
|
302 |
+
"\n",
|
303 |
+
" # Append the losses for plotting\n",
|
304 |
+
" train_losses.append(train_loss)\n",
|
305 |
+
" val_losses.append(val_loss)\n",
|
306 |
+
"\n",
|
307 |
+
" print(f\"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}\")\n",
|
308 |
+
"\n",
|
309 |
+
"# Save the model\n",
|
310 |
+
"torch.save(model.state_dict(), 'seq2seq_model.pth')"
|
311 |
+
]
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"cell_type": "code",
|
315 |
+
"execution_count": 7,
|
316 |
+
"metadata": {
|
317 |
+
"colab": {
|
318 |
+
"base_uri": "https://localhost:8080/",
|
319 |
+
"height": 449
|
320 |
+
},
|
321 |
+
"id": "vwV1CIxtvNV4",
|
322 |
+
"outputId": "116146d7-0bb4-4eea-c7d0-9be2c9a38dc6"
|
323 |
+
},
|
324 |
+
"outputs": [
|
325 |
+
{
|
326 |
+
"output_type": "display_data",
|
327 |
+
"data": {
|
328 |
+
"text/plain": [
|
329 |
+
"<Figure size 640x480 with 1 Axes>"
|
330 |
+
],
|
331 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGwCAYAAACHJU4LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABU3UlEQVR4nO3dd3gUZcPF4d/upvcECAkQCC0khNBBIYCodESqlA8V7CgtIiq8ioKIWF4REUWx4ItSrBQpIqJCaNJ775FeU8mm7H5/RKORlkCS2STnvq65kp2dzJ4l6B6eeWbGZLfb7YiIiIg4ILPRAURERESuRUVFREREHJaKioiIiDgsFRURERFxWCoqIiIi4rBUVERERMRhqaiIiIiIw3IyOsCtsNlsnDhxAm9vb0wmk9FxREREJBfsdjuJiYmUK1cOs/n6YyZFuqicOHGCkJAQo2OIiIjITYiLi6NChQrX3aZIFxVvb28g6436+PgYnEZERERyIyEhgZCQkOzP8esp0kXlr8M9Pj4+KioiIiJFTG6mbWgyrYiIiDgsFRURERFxWCoqIiIi4rCK9BwVERG5NTabjbS0NKNjSDHj7OyMxWLJl32pqIiIlFBpaWkcPnwYm81mdBQphvz8/AgKCrrl65ypqIiIlEB2u52TJ09isVgICQm54UW3RHLLbreTkpLCmTNnAAgODr6l/amoiIiUQBkZGaSkpFCuXDk8PDyMjiPFjLu7OwBnzpwhMDDwlg4DqUKLiJRAmZmZALi4uBicRIqrvwpwenr6Le1HRUVEpATTfdKkoOTX3y0VFREREXFYhhaVzMxMRo0aReXKlXF3d6dq1aqMHTsWu91uZCwRERFxEIYWlTfeeIMpU6YwefJkdu/ezRtvvMGbb77Je++9Z2QsEREpQUJDQ5k4caLRMeQaDC0qq1evpnPnznTs2JHQ0FB69OhBmzZtWLdu3VW3t1qtJCQk5FgKyvojF7iUoosgiYg4CpPJdN1l9OjRN7Xf9evX8/jjj99StpYtWxITE3NL+5CrM7SoNG3alGXLlrFv3z4Atm7dysqVK2nfvv1Vtx8/fjy+vr7ZS0hISIHk+nLtUXp9tIbnvt2mw1AiIg7i5MmT2cvEiRPx8fHJsW748OHZ29rtdjIyMnK13zJlyugUbQdmaFEZMWIEvXv3Jjw8HGdnZ+rVq0dMTAx9+/a96vYjR44kPj4+e4mLiyuQXHUq+GExm/hp12mmrzlaIK8hIuJI7HY7KWkZhiy5/QdhUFBQ9uLr64vJZMp+vGfPHry9vVm8eDENGjTA1dWVlStXcvDgQTp37kzZsmXx8vKiUaNG/Pzzzzn2++9DPyaTiU8++YSuXbvi4eFB9erVmT9//i39+X733XdERkbi6upKaGgob7/9do7nP/jgA6pXr46bmxtly5alR48e2c99++23REVF4e7uTqlSpWjVqhXJycm3lKcoMfSCb19//TUzZsxg5syZREZGsmXLFmJiYihXrhz9+vW7YntXV1dcXV0LPFdUBV9Gto/glQW7GLdwNw0q+VOrvG+Bv66IiFEup2dS86Ulhrz2rlfa4uGSPx9HI0aM4L///S9VqlTB39+fuLg4OnTowLhx43B1dWX69Ol06tSJvXv3UrFixWvuZ8yYMbz55pu89dZbvPfee/Tt25ejR48SEBCQ50wbN26kZ8+ejB49ml69erF69WqeeuopSpUqRf/+/dmwYQNDhgzhiy++oGnTply4cIHY2FggaxSpT58+vPnmm3Tt2pXExERiY2NL1Gi/oUXl2WefzR5VAYiKiuLo0aOMHz/+qkWlMD0UHcrqg+f5efdpBs/azA+Dm+Hlqgv5iog4sldeeYXWrVtnPw4ICKBOnTrZj8eOHcucOXOYP38+gwYNuuZ++vfvT58+fQB47bXXmDRpEuvWraNdu3Z5zjRhwgTuvvtuRo0aBUBYWBi7du3irbfeon///hw7dgxPT0/uuecevL29qVSpEvXq1QOyikpGRgbdunWjUqVKQNZnZUli6CdvSkrKFfeXsFgsDnGDLJPJxFs9atNhUiyHzyXz4pztvNOrri6OJCLFkruzhV2vtDXstfNLw4YNczxOSkpi9OjRLFy4MPtD//Llyxw7duy6+6ldu3b2956envj4+GTfuyavdu/eTefOnXOsi46OZuLEiWRmZtK6dWsqVapElSpVaNeuHe3atcs+7FSnTh3uvvtuoqKiaNu2LW3atKFHjx74+/vfVJaiyNA5Kp06dWLcuHEsXLiQI0eOMGfOHCZMmEDXrl2NjJXN39OFSX3qYTGbmLvlBN9s/MPoSCIiBcJkMuHh4mTIkp//APT09MzxePjw4cyZM4fXXnuN2NhYtmzZQlRUFGlp1z+r09nZ+Yo/n4L6R7S3tzebNm1i1qxZBAcH89JLL1GnTh0uXbqExWJh6dKlLF68mJo1a/Lee+9Ro0YNDh8+XCBZHJGhReW9996jR48ePPXUU0RERDB8+HCeeOIJxo4da2SsHBqFBjCsdRgAL8/byYEziQYnEhGR3Fq1ahX9+/ena9euREVFERQUxJEjRwo1Q0REBKtWrboiV1hYWPbN+pycnGjVqhVvvvkm27Zt48iRI/zyyy9AVkmKjo5mzJgxbN68GRcXF+bMmVOo78FIhh768fb2ZuLEiQ5/oZ0n76jKmoPnWXngHANnbGbeoGjc8nGoUkRECkb16tX5/vvv6dSpEyaTiVGjRhXYyMjZs2fZsmVLjnXBwcE888wzNGrUiLFjx9KrVy/WrFnD5MmT+eCDDwBYsGABhw4dokWLFvj7+7No0SJsNhs1atTg999/Z9myZbRp04bAwEB+//13zp49S0RERIG8B0eke/3kgtlsYkKvOpT2cmHv6UReWbDL6EgiIpILEyZMwN/fn6ZNm9KpUyfatm1L/fr1C+S1Zs6cSb169XIsH3/8MfXr1+frr79m9uzZ1KpVi5deeolXXnmF/v37A+Dn58f333/PXXfdRUREBB9++CGzZs0iMjISHx8fVqxYQYcOHQgLC+PFF1/k7bffvub1xoojk70In+OUkJCAr68v8fHx+Pj4FPjrxe4/y4OfrcNuh/f/rz4dawcX+GuKiBSE1NRUDh8+TOXKlXFzczM6jhRD1/s7lpfPb42o5EHz6mV48o6qAIz4bhvHzqcYnEhERKR4U1HJo2Gtw2hYyZ9EawaDZm0iLcP4U6lFRESKKxWVPHKymHm3Tz183Z3Z9kc8b/64x+hIIiIixZaKyk0o7+fOf+/LutLhJysP88ue0wYnEhERKZ5UVG5S65pl6d80FIBnvt7KyfjLxgYSEREphlRUbsHIDuHUKu/DxZR0hs7eQkam5quIiIjkJxWVW+DqZOG9PvXxdLGw7vAFJv1ywOhIIiIixYqKyi2qXNqT17pl3cnyvV/2s/rAOYMTiYiIFB8qKvmgc93y9GoYgt0OQ7/awrkkq9GRRETkGlq2bElMTEz249DQ0BveysVkMjF37txbfu382k9JoqKST0bfG0n1QC/OJloZ9vVWbLYie8FfERGH1KlTJ9q1a3fV52JjYzGZTGzbti3P+12/fj2PP/74rcbLYfTo0dStW/eK9SdPnizwy99//vnn+Pn5FehrFCYVlXzi7mJh8v/Vx9XJzIp9Z5kae8joSCIixcojjzzC0qVL+eOPP654btq0aTRs2JDatWvneb9lypTBw8MjPyLeUFBQEK6uroXyWsWFiko+qhHkzeh7IwH475K9bDp20eBEIiLFxz333EOZMmX4/PPPc6xPSkrim2++4ZFHHuH8+fP06dOH8uXL4+HhQVRUFLNmzbrufv996Gf//v20aNECNzc3atasydKlS6/4meeff56wsDA8PDyoUqUKo0aNIj09Hcga0RgzZgxbt27FZDJhMpmyM//70M/27du56667cHd3p1SpUjz++OMkJSVlP9+/f3+6dOnCf//7X4KDgylVqhQDBw7Mfq2bcezYMTp37oyXlxc+Pj707NmT06f/vh7Y1q1bufPOO/H29sbHx4cGDRqwYcMGAI4ePUqnTp3w9/fH09OTyMhIFi1adNNZcsOpQPdeAvVuFMKqA+dYsO0kg2duZtGQ5vh6OBsdS0Tk+ux2SDfo/mXOHmAy3XAzJycnHnzwQT7//HNeeOEFTH/+zDfffENmZiZ9+vQhKSmJBg0a8Pzzz+Pj48PChQt54IEHqFq1Ko0bN77ha9hsNrp160bZsmX5/fffiY+PzzGf5S/e3t58/vnnlCtXju3bt/PYY4/h7e3Nc889R69evdixYwc//vgjP//8MwC+vr5X7CM5OZm2bdvSpEkT1q9fz5kzZ3j00UcZNGhQjjL266+/EhwczK+//sqBAwfo1asXdevW5bHHHrvh+7na+/urpCxfvpyMjAwGDhxIr169+O233wDo27cv9erVY8qUKVgsFrZs2YKzc9bn2MCBA0lLS2PFihV4enqya9cuvLy88pwjL1RU8pnJZGJ8tyi2H4/n6PkUnvtuKx/e3yD7PygREYeUngKvlTPmtf9zAlw8c7Xpww8/zFtvvcXy5ctp2bIlkHXYp3v37vj6+uLr68vw4cOztx88eDBLlizh66+/zlVR+fnnn9mzZw9LliyhXLmsP4/XXnvtinklL774Yvb3oaGhDB8+nNmzZ/Pcc8/h7u6Ol5cXTk5OBAUFXfO1Zs6cSWpqKtOnT8fTM+v9T548mU6dOvHGG29QtmxZAPz9/Zk8eTIWi4Xw8HA6duzIsmXLbqqoLFu2jO3bt3P48GFCQkIAmD59OpGRkaxfv55GjRpx7Ngxnn32WcLDwwGoXr169s8fO3aM7t27ExWVdbZrlSpV8pwhr3TopwB4uznzXp96OFtMLNl5mi/WHjU6kohIsRAeHk7Tpk357LPPADhw4ACxsbE88sgjAGRmZjJ27FiioqIICAjAy8uLJUuWcOzYsVztf/fu3YSEhGSXFIAmTZpcsd1XX31FdHQ0QUFBeHl58eKLL+b6Nf75WnXq1MkuKQDR0dHYbDb27t2bvS4yMhKLxZL9ODg4mDNnzuTptf75miEhIdklBaBmzZr4+fmxe/duAIYNG8ajjz5Kq1ateP311zl48GD2tkOGDOHVV18lOjqal19++aYmL+eVRlQKSO0KfoxoH8HYBbt4dcFuGlTyJ7LclUN/IiIOwdkja2TDqNfOg0ceeYTBgwfz/vvvM23aNKpWrcodd9wBwFtvvcW7777LxIkTiYqKwtPTk5iYGNLS0vIt7po1a+jbty9jxoyhbdu2+Pr6Mnv2bN5+++18e41/+uuwy19MJhM2W8FdCX306NH83//9HwsXLmTx4sW8/PLLzJ49m65du/Loo4/Stm1bFi5cyE8//cT48eN5++23GTx4cIHl0YhKAXo4OpRWEYGkZdoYPHMzydYMoyOJiFydyZR1+MWIJY+Hxnv27InZbGbmzJlMnz6dhx9+OPvw+qpVq+jcuTP3338/derUoUqVKuzbty/X+46IiCAuLo6TJ09mr1u7dm2ObVavXk2lSpV44YUXaNiwIdWrV+fo0Zwj5y4uLmRmZt7wtbZu3UpycnL2ulWrVmE2m6lRo0auM+fFX+8vLi4ue92uXbu4dOkSNWvWzF4XFhbG008/zU8//US3bt2YNm1a9nMhISEMGDCA77//nmeeeYaPP/64QLL+RUWlAJlMJt7qUYdgXzcOnUtm1NwdRkcSESnyvLy86NWrFyNHjuTkyZP0798/+7nq1auzdOlSVq9eze7du3niiSdynNFyI61atSIsLIx+/fqxdetWYmNjeeGFF3JsU716dY4dO8bs2bM5ePAgkyZNYs6cOTm2CQ0N5fDhw2zZsoVz585htV55IdC+ffvi5uZGv3792LFjB7/++iuDBw/mgQceyJ6fcrMyMzPZsmVLjmX37t20atWKqKgo+vbty6ZNm1i3bh0PPvggd9xxBw0bNuTy5csMGjSI3377jaNHj7Jq1SrWr19PREQEADExMSxZsoTDhw+zadMmfv311+znCoqKSgHz93Th3d71MJvg+83H+Xbjlef/i4hI3jzyyCNcvHiRtm3b5phP8uKLL1K/fn3atm1Ly5YtCQoKokuXLrner9lsZs6cOVy+fJnGjRvz6KOPMm7cuBzb3HvvvTz99NMMGjSIunXrsnr1akaNGpVjm+7du9OuXTvuvPNOypQpc9VTpD08PFiyZAkXLlygUaNG9OjRg7vvvpvJkyfn7Q/jKpKSkqhXr16OpVOnTphMJubNm4e/vz8tWrSgVatWVKlSha+++goAi8XC+fPnefDBBwkLC6Nnz560b9+eMWPGAFkFaODAgURERNCuXTvCwsL44IMPbjnv9ZjsdnuRvYRqQkICvr6+xMfH4+PjY3Sc65r8y37++9M+3J0t/DA4mmqB3kZHEpESLDU1lcOHD1O5cmXc3NyMjiPF0PX+juXl81sjKoXkyZbViK5WisvpmQyauZnU9OsfuxQREREVlUJjMZt4p1ddSnu5sOdUImMX7DI6koiIiMNTUSlEgd5uTOhZF4AZvx9j4baT1/8BERGREk5FpZC1CCvDky2rAjDiu23EXTDoktUiIiJFgIqKAYa1DqN+RT8SrRkMmrWZtIyCu3CPiMj1FOHzKcTB5dffLRUVAzhbzEzqUw9fd2e2xl3irSV7jI4kIiXMX5dkz88rtor8U0pK1hGDf19ZN690CX2DVPD34M0etXnii418HHuYJlVLcVf4rV3gR0Qkt5ycnPDw8ODs2bM4OztjNuvfrZI/7HY7KSkpnDlzBj8/vxz3KboZuo6KwUbP38nnq4/g7+HM4qEtCPLV9QxEpHCkpaVx+PDhAr1vjJRcfn5+BAUFZd/e4J/y8vmtERWDjewQzvojF9h5IoGhszcz87HbsZjzdt8LEZGb4eLiQvXq1XX4R/Kds7PzLY+k/EVFxWCuThYm/1997pkUy++HLzBp2X6ebh1mdCwRKSHMZrOuTCsOTQclHUDl0p6M6xoFwKRf9rP64DmDE4mIiDgGFRUH0aVeeXo2rIDdDjGzt3Au6co7bYqIiJQ0KipXk2GF5W/CpbhCfdnR90ZSLdCLM4lWnvl6KzZbkZ3nLCIiki9UVK5m9w/w6zh4tzbM7A37fgJbwd9E0MPFiff/rz6uTmaW7zvLx7GHCvw1RUREHJmKytV4BUJoc7DbYN9imHkfTKoLsW9D0pkCfekaQd683CkSgLeW7GXTsYsF+noiIiKOTNdRuZ6z+2DjNNgyA1Ljs9aZnSGiEzR6BCpFw1XOD79VdrudQbM2s3DbSSr4u7NwSHN83W/tyn4iIiKOIi+f3yoquZF+GXbOgfWfwvENf68vHQYNH4Y6vcHdP19fMiE1nXsmreTYhRTa1wrig771r3rRHBERkaJGRaUgndwKGz6Dbd9AenLWOid3qNU9q7SUr59voyxb4y7R48PVpGfaGds5kgeahObLfkVERIykolIYUhNg21dZpeXMrr/XB9fJKixR94GL5y2/zCexh3h14W5cnMzMeaopkeV8b3mfIiIiRsrL57ehk2lDQ0MxmUxXLAMHDjQyVu64+UDjx+DJ1fDwT1C7F1hcs0ZcfhgKb4fDwuFweteN93UdjzSrzN3hgaRl2Bg8czPJ1ox8egMiIiKOz9ARlbNnz5KZ+fdpvzt27KB169b8+uuvtGzZ8oY/73A3JUw+D1tnZo2yXPjHqcUVm2SNstTsDE6ued7theQ0Orwby6mEVLrVL8+EnnXzL7OIiEghK7KHfmJiYliwYAH79+/P1cRRhysqf7HZ4PBy2PAp7FkE9j/LmEcpqNsXGj4EAVXytMt1hy/Qe+oabHZ4+746dG9QoQCCi4iIFLwic+jnn9LS0vjyyy95+OGHr1lSrFYrCQkJORaHZDZD1Tuh15fw9E5o+R/wKQ8p52H1JJhUD77omnVhuczcHcppXDmAmFZZNyscNW8HB84kFeQ7EBERcQgOU1Tmzp3LpUuX6N+//zW3GT9+PL6+vtlLSEhI4QW8WT7B0PJ5GLoNes+Caq0AExz8Bb66HybWgl/HQ/zxG+5q4J3VaFq1FClpmQyauYnU9IK/Wq6IiIiRHObQT9u2bXFxceGHH3645jZWqxWr9e+b9SUkJBASEuJ4h35u5MJh2PQ/2PQFpPx5p2STBWq0zzosVOWurFGZqziTkEr7d2M5n5zG/bdX5NUuUYUYXERE5NYVuTkqR48epUqVKnz//fd07tw51z/nsHNUcivDmnX4Z8M0OLry7/X+odDgIah3P3iWvuLHlu87S7/P1gEwpW992kcFF1JgERGRW1fk5qhMmzaNwMBAOnbsaHSUwuXkClE94KGF8NTv0PgJcPWFi0fg55dhQgR89ygcXQ3/6JN3hJVhwB1VAXjuu23EXUgx6A2IiIgULMOLis1mY9q0afTr1w8nJyej4xgnMBw6vAnP7IZ7J0O5epCZBtu/gWnt4YMm8PvU7HsOPdMmjPoV/UhMzWDwrM2kZ9oMfgMiIiL5z/BDPz/99BNt27Zl7969hIWF5elni/yhnxs5vinrpojbv4X0P0dNnD2yRmEaPswf7jXo8G4sCakZPNGiCiM7RBibV0REJBeK3ByVm1Xsi8pfLl+CbV9nXZfl7J6/15erz45y3emxshypuDLtoUbcWSPQsJgiIiK5oaJSXNntcGxN1pVvd83LOjQEXLZ4MdsazQKXdrw/9P8I8nUzOKiIiMi1qaiUBMnnYPOXWYeGLh7JXr3LJYrwe2Iw1+x0U5frFxERKWgqKiWJzQaHfiF51ce4HVqCxfTnr9OjNNR/ABr0zzrdWURExEGoqJRQi1dvYO+i9+lt+ZUg08U/15qgdBj4VfzXUinrq2dpyMV9lURERPJLXj6/S/D5wMVP+6YN+eV4DNEbu9LdazuvVliHy5Hf4NzerOVqnNyvUmJUZERExDFoRKWYSUnL4N7JqzhwJomWNcrwWdfymM/vg0vHrlwSTwI3+PWryIiISD7ToZ8Sbs+pBDpPXoU1w8Z/OoTzeIuqV98wwwrxf1y9xKjIiIhIAVFREWb8fpQX5uzAyWzimwFNqFfRP+87UZEREZECoKIi2O12Bs3czMLtJ/F2dWJM50i61iuPKT9LQUYaJFynyCScIHdFJuQ6RaaMioyISDGjoiIAJKSm8/C09Ww4mnUGUMeoYF7tUgt/T5fCCZDfRcY9ACwuYHHO+urk8ufjv9a5/ut517+/v+Hz/1rMht8GS0Sk2FJRkWwZmTY+XH6QiT/vJ8NmJ9Dblf/eV4cWYWWMjpY/RaagmCzXLzIW55t/3tUbvIPAq2zWV88yYLYY8z5FRAygoiJX2PbHJWK+2sKhs8kA9G8ayoj24bg5O/AH5L+LTGpC1m0DMtP//Gr9x/dpWdvf7PO2DOPep8mcVVb+Ki5egeAVlLPM/PVVVxsWkWJARUWu6nJaJq8v3s3/1hwFoGoZT97tXY9a5X0NTuYAbLa/C80Ni85f2+Tm+X/sL+PPfabGQ+IpSDoNyWfBbst9Tje/qxeY7K9B4F02a9RGRMRBqajIdS3fd5Znv9nKmUQrTmYTT7cOY8AdVbGYNWm10Nkys8rKX8Xlql/PQNKp7JtQ5oqzZ9bIzI1KjUeAJiuLSKFTUZEbupicxgtzt7No+ykAGlTy552edalYysPgZHJVdjtcvnidMvOPr2lJud+v2fnP4lL279GYK76WBc9AsOhC1iKSP1RUJFfsdjvfbzrOy/N3kmTNwNPFwsudIrmvYYX8PY1ZCpc16R/F5RQknr7K19Nw+UIedmrKuubNP0uMTzD4VvhzCcn66uJZYG9LRIoPFRXJk7gLKTzz9VbWHcn64GpTsyzju0VRyksTN4u1DOufh5VuUGqSz+R+Ho17QM7i8u8i41VWp36LiIqK5F2mzc7HsYd4+6e9pGfaKe3lyps9orgrvKzR0cRotkxIPndlgUk8kXXl4vg/4FIcpCXeeF9mZ/Apl7PI+IX8XWZ8yoOrV8G/JxExlIqK3LSdJ+J5+qst7DudNc+h720VeaFjBB4ump8gN5Aa/3dxiY/7x/d/LgknwJ554/24++diVMaBT6sXkRtSUZFbkpqeyVtL9vLpysMAVC7tyTu96lI3xM/YYFK0ZWZk3R/qemXGGn/j/VxtVObfxUajMiIOTUVF8sWqA+cY/s1WTsanYjGbGHxXNQbdWQ0ni+YYSAFJjYf449coMnG5H5Vx87v24SWNyogYTkVF8k18Sjqj5u1g/tYTANQJ8WNir7pULq2zO8QAmRlZc2SuOSoTl1V2bsTs9PeojFdg1j2lnN3AyS3r6r/ZX93/9djtKttdZVudyi1yXSoqku/mbTnOi3N3kJiagbuzhRfvieD/GlfUaczieFITIOEqozKX/vw+4XjuRmVuhclyjVKT2/Jzje2uu61LVgH7azGZdTE/cVgqKlIgTly6zPBvtrL64HkA7g4P5PXutSnjrdOYpQixZWadjv1XkUk+BxmpWadrZ3+9/K/HqZCeev3t8nLl4MJidsoqTdkFxpKzzGQ//vfXa2xvMv/r+av9zL+3+ffzV9vPP7axOGddj8fFK+tWEK7eWd87u6t4FSMqKlJgbDY7n606zJtL9pKWYSPA04XXu0XRJjLI6GgixrLZsu7llH6VkpNxlZJz3e3ysu2fj4s7kxlcvLMmSmeXmH98n2PdP5/zAlefv7//a7uSMkfJbs8q0emX/1xS/izel/9eMi5f/3G5elD/gXyNpaIiBW7vqURivtrC7pMJAPRqGMKoTjXxctWxeZFC99eHkS0z607gtox/ff/nY/tV1l3xODfbZPxrX9f6mX9/vd5+/vyaYYW05KxbQViT/rwlRAF8TDl7XFlebrYAObnmfbQnMz2rNKSn/qM8/PX4n2UhN9vcoITk5canV1OrO/T47Nb28S8qKlIorBmZTFi6j6krDmG3Q8UAD97pVYcGlQKMjiYixYXNBunJf5cWa+LfJcaamHWhweznkv58fJ11tvT8z2h2uvqhqr/KyNUKhi0j/3PciMmcVdCc3f+cQP7nJPJrrXP682vZSKh5b75GUVGRQrX20Hme+Xorxy9dxmyCp1pWY2ir6jjrNGYRcTQZ1n+Ul38WnxsVoCSwJuRcl56cP5luVBSc3f9estddb5trlBCLs8PM81FRkUKXkJrO6Pk7+X7TcQCiyvvyTq86VAv0NjiZiEgBsWVmHab6Z8nJLjjJWWdiXVFC/lU4buawUTGgoiKGWbT9JP+Zs51LKem4Opn5T4cIHmxSSacxi4hItrx8fmtsXvJVh6hglsS0oEVYGawZNl6ev5MHP1vH6YQScFaCiIjkOxUVyXdlfdz430ONGHNvJK5OZmL3n6PtxBUs2n7S6GgiIlLEqKhIgTCZTPRrGsrCIc2JKu/LpZR0npqxiWFfbyEhtQBm3YuISLGkoiIFqlqgF9892ZRBd1bDbILvNx2n/cRYfj903uhoIiJSBKioSIFzcTIzvG0NvhnQhIoBHhy/dJneH69l/OLdWDMK+J4rIiJSpKmoSKFpUCmARUOb06thCHY7fLT8EF3eX83eU4lGRxMREQeloiKFysvViTd61OajBxoQ4OnC7pMJdJq8kk9iD2GzFdkz5UVEpICoqIgh2kYGsSSmBXeFB5KWYePVhbu5/9PfOXHpstHRRETEgaioiGHKeLvyab+GjOtaC3dnC6sPnqfdxBXM23Lc6GgiIuIgVFTEUCaTib63VWLhkGbUCfEjITWDobO3MGTWZuJTdBqziEhJp6IiDqFKGS++G9CEmFbVsZhNzN96gnbvrmDVgXNGRxMREQOpqIjDcLKYiWkVxrcDmlC5tCcn41Pp+8nvjF2wi9R0ncYsIlISGV5Ujh8/zv3330+pUqVwd3cnKiqKDRs2GB1LDFSvoj8LhzSj720VAfh05WHunbySHcfjDU4mIiKFzdCicvHiRaKjo3F2dmbx4sXs2rWLt99+G39/fyNjiQPwcHFiXNcoPuvfkNJeruw7ncS9k1fynznbOZdkNTqeiIgUEpPdbjfs4hUjRoxg1apVxMbG3tTP5+U20VJ0nU+y8vL8nSzYlnVTQ29XJwbdVY3+0aG4OlkMTiciInmVl89vQ0dU5s+fT8OGDbnvvvsIDAykXr16fPzxx9fc3mq1kpCQkGOR4q+UlyuT/68+Xz/RhKjyviRaMxi/eA+tJ6zgxx2nMLBri4hIATO0qBw6dIgpU6ZQvXp1lixZwpNPPsmQIUP43//+d9Xtx48fj6+vb/YSEhJSyInFSI0rBzBvYDT/va8Ogd6uHLuQwoAvN9J76lrNXxERKaYMPfTj4uJCw4YNWb16dfa6IUOGsH79etasWXPF9larFav17/kJCQkJhISE6NBPCZRszeCj5Qf5aMUhrBk2TCbo2SCEZ9qGEejtZnQ8ERG5jiJz6Cc4OJiaNWvmWBcREcGxY8euur2rqys+Pj45FimZPF2dGNamBr8Mb8m9dcpht8NXG+K4863feP/XAzqdWUSkmDC0qERHR7N3794c6/bt20elSpUMSiRFTXk/dyb1qcd3TzalTogfyWmZvLVkL60mLGfhtpOavyIiUsQZWlSefvpp1q5dy2uvvcaBAweYOXMmU6dOZeDAgUbGkiKoQSV/5jzZlIm96hLk48YfFy8zcOYmen60hu1/aP6KiEhRZegcFYAFCxYwcuRI9u/fT+XKlRk2bBiPPfZYrn5WpyfL1aSkZTB1xSE+XH6Q1HQbAN3rV+C5djUo66P5KyIiRsvL57fhReVWqKjI9ZyMv8xbP+7l+81Zd2P2cLHw5B1VeaxFFdycdf0VERGjqKiI/MOWuEu88sNONh27BEA5Xzeebx/OvXXKYTKZjA0nIlICqaiI/IvdbueHbSd5Y/Eejl+6DED9in6Muqcm9Srqlg0iIoVJRUXkGlLTM/kk9hAf/HaQlLSsU5i71ivPc+1qEOzrbnA6EZGSQUVF5AZOJ6Ty1pK9fLvxDwDcnM080aIqT9xRBQ8XJ4PTiYgUbyoqIrm0/Y94xi7YxbojFwAI8nHj+fY16FynPGaz5q+IiBQEFRWRPLDb7SzecYrXFu3mj4tZ81fqVPDlpU41aVApwOB0IiLFj4qKyE1ITc/ks1WHef+XAyT/OX+lU51yPN+uBhX8PQxOJyJSfKioiNyCM4mpTPhpH19tiMNuB1cnM481r8KTLavi6ar5KyIit0pFRSQf7DyRNX9l7aGs+SuB3q4827YG3etX0PwVEZFboKIikk/sdjs/7TrNa4t2c/R8CgC1yvvw0j2RNK6s+SsiIjdDRUUkn1kzMvnf6iO8t+wAidYMADpEBTGyfQQhAZq/IiKSFyoqIgXkXJKVCUv3MXvdMWx2cLGYebhZZQbeWRVvN2ej44mIFAkqKiIFbM+pBMYu2MWqA+cBKO3lwvA2NbivYQgWzV8REbkuFRWRQmC321m2+wzjFu3m8LlkACKCfRh1TwRNq5Y2OJ2IiONSUREpRGkZNr5Ye5R3f95HQmrW/JU2Ncvynw4RhJb2NDidiIjjUVERMcCF5DQm/ryPGb8fI9Nmx9li4qHoygy6qxo+mr8iIpJNRUXEQPtOJzJ2wS5i958DoJSnC0+3DqN3oxCcLGaD04mIGE9FRcRgdrud3/ae5dWFuzh4Nmv+So2y3oy6pybNqmv+ioiUbCoqIg4iPdPGjLVHeefn/cRfTgegdc2yjOtSi0AfN4PTiYgYIy+f3xqHFilAzhYz/aMrs/zZljwUHYqT2cTSXadpM3EFP2w9YXQ8ERGHp6IiUgj8PFx4uVMkC4c0J7KcD5dS0hk8azMDZ27iQnKa0fFERByWiopIIaoR5M3cgdEMvbs6FrOJhdtO0uadFfy867TR0UREHJKKikghc7aYebp1GHOeakq1QC/OJVl5dPoGhn+zlYTUdKPjiYg4FBUVEYPUruDHgsHNeLxFFUwm+HbjH7R7ZwWrDpwzOpqIiMNQURExkJuzhf90iODrJ5pQMcCDE/Gp9P3kd16at4OUtAyj44mIGE5FRcQBNAoNYPHQ5jxweyUApq85Sod3Y9l49ILByUREjKWiIuIgPF2dGNulFtMfbkywrxtHzqdw34drGL94N6npmUbHExExhIqKiINpEVaGH2Na0L1+BWx2+Gj5Ie6dvJIdx+ONjiYiUuhUVEQckK+7M2/3rMNHDzSgtJcL+04n0eX9Vbz7837SM21GxxMRKTQqKiIOrG1kEEtiWtC+VhAZNjvv/LyPbh+sZv/pRKOjiYgUChUVEQdXysuVD/rW593edfF1d2b78Xg6vreSqSsOkmkrsrfqEhHJFRUVkSLAZDLRuW55fnq6BS1rlCEtw8Zri/bQe+oajp5PNjqeiEiBUVERKULK+rgxrX8jXu8WhaeLhfVHLtJuYixfrD1KEb4RuojINamoiBQxJpOJ3o0r8mNMC26vEsDl9ExGzd3Bg5+t48Sly0bHExHJVyoqIkVUSIAHMx+9nZfuqYmrk5nY/edoO3EF3238Q6MrIlJsqKiIFGFms4mHm1Vm0dDm1A3xIzE1g2e+2coTX2zkbKLV6HgiIrdMRUWkGKhaxotvBzTh2bY1cLaY+GnXadpOXMHi7SeNjiYicktUVESKCSeLmYF3VmP+oGZEBPtwITmNJ2dsYujszcSnpBsdT0TkpqioiBQzEcE+zBsYzaA7q2E2wbwtJ2gzcTm/7j1jdDQRkTxTUREphlyczAxvW4PvnmxKlTKenE6w8tC09Yz4bhtJ1gyj44mI5JqKikgxVq+iP4uGNOfh6MoAzF4fR7uJK1hz8LzByUREckdFRaSYc3O28FKnmsx67HYq+Lvzx8XL9Pl4LWN+2ElqeqbR8URErsvQojJ69GhMJlOOJTw83MhIIsVWk6ql+DGmBX0aVwRg2qojdJgUy+ZjFw1OJiJybYaPqERGRnLy5MnsZeXKlUZHEim2vFydGN8tis8fakRZH1cOnU2m+5TVvLVkD2kZNqPjiYhcwfCi4uTkRFBQUPZSunTpa25rtVpJSEjIsYhI3rWsEchPMXfQpW45bHZ4/9eD3Dt5JbtO6L8pEXEsN1VU4uLi+OOPP7Ifr1u3jpiYGKZOnZrnfe3fv59y5cpRpUoV+vbty7Fjx6657fjx4/H19c1eQkJCbia+iAC+Hs5M7F2PKX3rE+Dpwp5TiXR+fyXv/3qAjEyNroiIYzDZb+KmIM2bN+fxxx/ngQce4NSpU9SoUYPIyEj279/P4MGDeemll3K1n8WLF5OUlESNGjU4efIkY8aM4fjx4+zYsQNvb+8rtrdarVitf18WPCEhgZCQEOLj4/Hx8cnr2xCRP51LsvKf77fz067TANQJ8ePt++pQLdDL4GQiUhwlJCTg6+ubq8/vmyoq/v7+rF27lho1ajBp0iS++uorVq1axU8//cSAAQM4dOjQTQW/dOkSlSpVYsKECTzyyCM33D4vb1RErs9utzNn83Fenr+TxNQMXJ3MPNcunIeahmI2m4yOJyLFSF4+v2/q0E96ejqurq4A/Pzzz9x7770AhIeHc/Lkzd9bxM/Pj7CwMA4cOHDT+xCRm2MymehWvwI/Pd2C5tVLY82wMXbBLvp8vJa4CylGxxOREuqmikpkZCQffvghsbGxLF26lHbt2gFw4sQJSpUqddNhkpKSOHjwIMHBwTe9DxG5NcG+7kx/uDHjutbCw8XC74cv0G7iCmatO8ZNDMCKiNySmyoqb7zxBh999BEtW7akT58+1KlTB4D58+fTuHHjXO9n+PDhLF++nCNHjrB69Wq6du2KxWKhT58+NxNLRPKJyWSi722V+HFoCxqHBpCclsnI77fz0OfrOZ2QanQ8ESlBbmqOCkBmZiYJCQn4+/tnrzty5AgeHh4EBgbmah+9e/dmxYoVnD9/njJlytCsWTPGjRtH1apVc/XzmqMiUvAybXamrTrMm0v2kpZhw9fdmVc6R3JvnXKYTJq7IiJ5V+CTaS9fvozdbsfDwwOAo0ePMmfOHCIiImjbtu3Npb4JKioihefAmUSGfb2VbX/EA9AhKohXu0QR4OlicDIRKWoKfDJt586dmT59OpB1ps5tt93G22+/TZcuXZgyZcrN7FJEHFy1QG++e7Ipw1qH4WQ2sWj7KdpOXMFve88YHU1EirGbKiqbNm2iefPmAHz77beULVuWo0ePMn36dCZNmpSvAUXEcThbzAy5uzpzB0ZTLdCLs4lW+k9bz6i5O7icphscikj+u6mikpKSkn1Btp9++olu3bphNpu5/fbbOXr0aL4GFBHHU6u8LwsGN+Oh6FAAvlh7lI6TYtkad8nQXCJS/NxUUalWrRpz584lLi6OJUuW0KZNGwDOnDmjuSIiJYSbs4WXO0Xy5SO3EeTjxqFzyXSbspp3f96vS/CLSL65qaLy0ksvMXz4cEJDQ2ncuDFNmjQBskZX6tWrl68BRcSxNatemh9jmnNP7WAybXbe+XkfPT5cw+FzyUZHE5Fi4KZPTz516hQnT56kTp06mM1ZfWfdunX4+PgQHh6eryGvRWf9iDiWeVuO8+LcHSSmZuDubOHFeyL4v8YVdRqziORQ4Kcn/9Nfd1GuUKHCrezmpqioiDieE5cu88zXW1lz6DwAd4cH8nr32pTxdjU4mYg4igI/Pdlms/HKK6/g6+tLpUqVqFSpEn5+fowdOxabTcemRUqycn7uzHj0Nl7sGIGLk5lle87QduIKftp5yuhoIlIEOd3MD73wwgt8+umnvP7660RHRwOwcuVKRo8eTWpqKuPGjcvXkCJStJjNJh5tXoXm1csQ89UWdp9M4PEvNtKrYQijOtXEy/Wm/tcjIiXQTR36KVeuHB9++GH2XZP/Mm/ePJ566imOHz+ebwGvR4d+RByfNSOTCUv3MXXFIex2CAlw552edWkYGmB0NBExSIEf+rlw4cJVJ8yGh4dz4cKFm9mliBRTrk4WRraPYPZjt1Pez524C5fp+dEa3lqyh7QMHSoWkeu7qaJSp04dJk+efMX6yZMnU7t27VsOJSLFz21VSvFjTHO616+AzQ7v/3qQblNWceBMotHRRMSB3dShn+XLl9OxY0cqVqyYfQ2VNWvWEBcXx6JFi7Ivr1/QdOhHpGhavP0k/5mznYsp6bg6mRnRPpx+TUIxm3Uas0hJUOCHfu644w727dtH165duXTpEpcuXaJbt27s3LmTL7744qZCi0jJ0T4qmCUxLbgjrAzWDBtjfthFv2nrOBWfanQ0EXEwt3wdlX/aunUr9evXJzOzcG5OphEVkaLNbrfz5dqjjFu0m9R0G77uzrzapRad6pQzOpqIFKACH1EREckPJpOJB5qEsnBIc2pX8CX+cjqDZ20mZvZm4i+nGx1PRByAioqIGK5qGS++e7IpQ+6ujsVsYu6WE7SfuILVB88ZHU1EDKaiIiIOwdliZljrML4Z0ITQUh6ciE/l/z7+nVcX7CI1vXAOJ4uI48nTHJVu3bpd9/lLly6xfPlyzVERkVuSbM3g1YW7mbXuGAA1ynrzTq+61Cyn/85FioO8fH7n6TrWvr6+N3z+wQcfzMsuRUSu4OnqxPhuUbSKCOT577ax93QiXd5fxTNtwni0eRUsOo1ZpMTI17N+CptGVESKv/NJVkZ8v52lu04D0LhyABN61qGCv4fByUTkZumsHxEpNkp5uTL1gQa82b02ni4W1h2+QPuJsXy38Q+K8L+zRCSXVFRExOGZTCZ6Ngph8dAWNKjkT6I1g2e+2cpTMzZxMTnN6HgiUoBUVESkyKhYyoOvn2jCs21r4GQ2sXjHKdpOXMFve88YHU1ECoiKiogUKRaziYF3VmPuwGiqBXpxJtFK/2nreWneDi6n6TRmkeJGRUVEiqRa5X1ZMLgZ/ZuGAjB9zVE6vhfLtj8uGZpLRPKXioqIFFluzhZG3xvJF480pqyPK4fOJtPtg9VMWrafjEyb0fFEJB+oqIhIkde8ehmWxLSgY+1gMmx2Jizdx30freHIuWSjo4nILVJREZFiwc/Dhcl96jGxV1283ZzYfOwSHSbFMmvdMZ3GLFKEqaiISLFhMpnoUq88P8a0oEmVUqSkZTLy++08+r8NnE20Gh1PRG6CioqIFDvl/dyZ8ehtvNgxAheLmWV7ztBu4orsq9uKSNGhoiIixZLZbOLR5lWYPzia8CBvzien8dj0DTz/7TaSrBlGxxORXFJREZFiLTzIh3mDonmiRRVMJvhqQxwd3o1l49ELRkcTkVxQURGRYs/VycLIDhHMeux2yvu5c+xCCvd9uIb/LtlLWoZOYxZxZCoqIlJi3F6lFItjmtOtfnlsdpj86wG6TVnFwbNJRkcTkWtQURGREsXHzZkJPevyQd/6+Hk4s+N4AvdMWslsncYs4pBUVESkROoQFcySmBZEVyvF5fRMRny/nSe/3MSlFN2NWcSRqKiISIlV1seNLx6+jZHtw3G2mPhx5ynaTYxlzcHzRkcTkT+pqIhIiWY2m3jijqp8/2Q0VUp7ciohlf/7ZC1v/riHdN0vSMRwKioiIkBUBV8WDGlG70Yh2O3wwW8H6TFlte4XJGIwFRURkT95uDjxevfaTOlbH193Z7b+EU/HSbF8syFOE21FDKKiIiLyL+2jglk8tDm3VwkgOS2TZ7/dxqBZm4m/nG50NJESx2GKyuuvv47JZCImJsboKCIilPNzZ8ajt/Ns2xo4mU0s3HaSDu/Gsu6wrmgrUpgcoqisX7+ejz76iNq1axsdRUQkm8VsYuCd1fj2yaZUKuXB8UuX6T11DRN+2kuGJtqKFArDi0pSUhJ9+/bl448/xt/f/7rbWq1WEhISciwiIgWtbogfC4c0p3v9CtjsMOmXA/T8aA1xF1KMjiZS7BleVAYOHEjHjh1p1arVDbcdP348vr6+2UtISEghJBQRAS9XJ97uWYf3+tTD282JTccu0f7dWOZuPm50NJFizdCiMnv2bDZt2sT48eNztf3IkSOJj4/PXuLi4go4oYhITp3qlGPx0OY0CvUnyZpBzFdbePqrLSSmaqKtSEEwrKjExcUxdOhQZsyYgZubW65+xtXVFR8fnxyLiEhhq+DvwazHbmdY6zAsZhNzNh+nw6RYNh69aHQ0kWLHZDfo4gBz586la9euWCyW7HWZmZmYTCbMZjNWqzXHc1eTkJCAr68v8fHxKi0iYoiNRy8ydPZm/rh4GYvZxNC7qzPwzmpYzCajo4k4rLx8fhtWVBITEzl69GiOdQ899BDh4eE8//zz1KpV64b7UFEREUeQkJrOS3N3MHfLCQAahfrzTq+6VPD3MDiZiGPKy+e3UyFluoK3t/cVZcTT05NSpUrlqqSIiDgKHzdnJvauxx01yjBq7k7WH7lI+3djea1rFJ3qlDM6nkiRZvhZPyIixUXXehVYNKQ59Sr6kZiaweBZmxn+zVaSrBlGRxMpsgw79JMfdOhHRBxRRqaNScv2M/nXA9jsUKmUB+/2rkfdED+jo4k4hLx8fmtERUQknzlZzAxrU4PZjzehvJ87R8+n0GPKaj747QCZtiL7b0MRQ6ioiIgUkMaVA1g0tDkdaweTYbPz5o976fvJWk7GXzY6mkiRoaIiIlKAfN2dmdynHm/1qI2Hi4W1hy7QbmIsi7efNDqaSJGgoiIiUsBMJhP3NQxh4ZDm1K7gS/zldJ6csYkR320jJU0TbUWuR0VFRKSQVC7tyXdPNuWpllUxmWD2+jjumbSSHcfjjY4m4rBUVERECpGzxcxz7cKZ+ejtBPm4cehcMl0/WMXUFQexaaKtyBVUVEREDNCkail+jGlOu8gg0jPtvLZoDw9+to7TCalGRxNxKCoqIiIG8fNwYcr99Xm9WxTuzhZWHjhHu4krWLrrtNHRRByGioqIiIFMJhO9G1dkwZBmRJbz4WJKOo9N38CLc7dzOS3T6HgihlNRERFxAFXLePH9U015vEUVAL5ce4xOk1ey60SCwclEjKWiIiLiIFydLPynQwRfPNKYQG9XDpxJosv7q/h05WFNtJUSS0VFRMTBNK9ehh9jWtAqoixpmTbGLtjFQ5+v52yi1ehoIoVORUVExAEFeLrw8YMNeLVLLVydzCzfd5Z2E1fw654zRkcTKVQqKiIiDspkMnH/7ZVYMLgZ4UHenE9O46HP1zN6/k5S0zXRVkoGFRUREQdXvaw3cwdG83B0ZQA+X32ELu+vYu+pRIOTiRQ8FRURkSLAzdnCS51q8vlDjSjt5cKeU4ncO3kl09ccwW7XRFspvlRURESKkJY1Alk8tAV31iiDNcPGS/N28uj/NnA+SRNtpXhSURERKWLKeLvyWf9GjO5UExcnM8v2nKHdu7GsOnDO6Ggi+U5FRUSkCDKZTPSPrsz8QdGElfXibKKV+z/9nQk/7SUj02Z0PJF8o6IiIlKEhQf5MH9QM/o0rojdDpN+OcD/ffI7p+J1c0MpHlRURESKODdnC+O7RTGpTz08XSysO3yBDpNi+W2vrrkiRZ+KiohIMXFvnXIsGNKcyHI+XEhOo/+09by+eA/pOhQkRZiKiohIMVK5tCffPdmUfk0qAfDh8oP0nrqW45cuG5xM5OaoqIiIFDNuzhbGdK7FlL718XZzYuPRi3R4N5alu04bHU0kz1RURESKqfZRwSwa0pw6FXyJv5zOY9M38MoPu0jL0KEgKTpUVEREirGQAA++GdCUR5tlXX7/s1WH6fHhao6dTzE4mUjuqKiIiBRzLk5mXrynJp882BBfd2e2/RFPx0mxLNp+0uhoIjekoiIiUkK0qlmWRUOb06CSP4nWDJ6asYlRc3foTszi0FRURERKkPJ+7sx+/HaealkVgC/WHqXbB6s5fC7Z4GQiV6eiIiJSwjhbzDzXLpz/PdyYUp4u7DqZwD2TYpm35bjR0USuoKIiIlJC3RFWhkVDm3N7lQCS0zIZOnsLI77bxuU0HQoSx6GiIiJSgpX1cWPGo7cz5O7qmEwwe30cXd5fxf7TiUZHEwFUVERESjyL2cSw1mHMeOQ2yni7svd0IvdOXsU3G+KMjiaioiIiIlmaVivNoiHNaVatNJfTM3n2220M+3oLydYMo6NJCaaiIiIi2cp4uzL94cYMbxOG2QTfbzrOvZNXsvtkgtHRpIRSURERkRzMZhOD7qrO7MebEOTjxsGzyXR5fxUzfz+G3W43Op6UMCoqIiJyVY0rB7BoaHNa1iiDNcPGf+ZsZ8jsLSSmphsdTUoQFRUREbmmAE8XPuvXiJHtw3Eym/hh6wk6vbeSHcfjjY4mJYSKioiIXJfZbOKJO6ry1RNNKO/nzpHzKXT7YDX/W31Eh4KkwKmoiIhIrjSo5M/CIc1oXbMsaZk2Xp6/kwFfbiQ+RYeCpOCoqIiISK75ebgw9YEGvNypJs4WE0t2nqbje7FsPnbR6GhSTBlaVKZMmULt2rXx8fHBx8eHJk2asHjxYiMjiYjIDZhMJh6Krsx3TzalYoAHf1y8zH0fruHjFYd0KEjynaFFpUKFCrz++uts3LiRDRs2cNddd9G5c2d27txpZCwREcmF2hX8WDCkGR2jgsmw2Rm3aDeP/m8DF5PTjI4mxYjJ7mD1NyAggLfeeotHHnnkhtsmJCTg6+tLfHw8Pj4+hZBORET+zW63M+P3Y7yyYBdpGTaCfd14r089GoYGGB1NHFRePr8dZo5KZmYms2fPJjk5mSZNmlx1G6vVSkJCQo5FRESMZTKZuP/2Ssx9KpoqpT05GZ9Kr6lref/XA9hsDvVvYSmCDC8q27dvx8vLC1dXVwYMGMCcOXOoWbPmVbcdP348vr6+2UtISEghpxURkWupWc6H+YOb0aVuOTJtdt5aspd+09ZxLslqdDQpwgw/9JOWlsaxY8eIj4/n22+/5ZNPPmH58uVXLStWqxWr9e+/8AkJCYSEhOjQj4iIA7Hb7Xyz4Q9emr+D1HQbgd6uvNu7Hk2qljI6mjiIvBz6Mbyo/FurVq2oWrUqH3300Q231RwVERHHte90IgNnbGL/mSTMJhhyd3UG31Udi9lkdDQxWJGco/IXm82WY9RERESKprCy3swbFM19DSpgs8PEn/dz/ye/cyYh1ehoUoQYWlRGjhzJihUrOHLkCNu3b2fkyJH89ttv9O3b18hYIiKSTzxcnHjrvjpM6FkHDxcLaw6dp/27sazYd9boaFJEGFpUzpw5w4MPPkiNGjW4++67Wb9+PUuWLKF169ZGxhIRkXzWrX4F5g9qRniQN+eT0+g3bR1vLdlDRqbN6Gji4BxujkpeaI6KiEjRkpqeySsLdjHz92MANAr1Z1KfegT7uhucTApTkZ6jIiIixZebs4XXukbxXp96eLk6sf7IRTq8G8sve04bHU0clIqKiIgUuk51yrFgcDOiyvtyMSWdhz/fwGuLdpOWoUNBkpOKioiIGCK0tCffPtmE/k1DAZi64hA9P1rD0fPJxgYTh6KiIiIihnF1sjD63kg+eqABPm5ObIm7RLuJsXy+6rAuvy+AioqIiDiAtpFBLBranCZVSnE5PZPRP+yiz8drOXY+xehoYjAVFRERcQgV/D2Y8ehtvNI5EndnC78fvkC7d1fwxZojGl0pwVRURETEYZjNJh5sEsqPMc1pXDmAlLRMRs3byf2f/k7cBY2ulEQqKiIi4nAqlfJk9mO383Knmrg5m1l98DztJq5gxu9HKcKX/5KboKIiIiIOyWw28VB0ZX4c2oJGof4kp2XywpwdPPDpOv64qNGVkkJFRUREHFpoaU9mP96EUffUxNXJzMoD52g3MZZZ645pdKUEUFERERGHZzGbeKRZZRYPbU6DSv4kWTMY+f12+k1bz4lLl42OJwVIRUVERIqMKmW8+PqJJrzQIQIXJzMr9p2l7Tsr+Hp9nEZXiikVFRERKVIsZhOPtajCoiHNqVfRj0RrBs99t42HPl/PqfhUo+NJPlNRERGRIqlaoBffDmjKyPbhuDiZ+W3vWVq/s5xvN/6h0ZViREVFRESKLIvZxBN3VGXRkGbUCfEjMTWD4d9s5ZH/beB0gkZXigMVFRERKfKqBXrz3YAmPNeuBi4WM7/sOUPrCcv5fpNGV4o6FRURESkWnCxmnmpZjQVDmhFV3peE1AyGfb2Vx6Zv5EyiRleKKhUVEREpVsLKejPnqaY827YGzhYTP+8+TZt3VjBvy3GNrhRBKioiIlLsOFnMDLyzGj8Mbkat8j5cSkln6OwtDPhyI2cTrUbHkzxQURERkWIrPMiHOU9FM6x1GE5mE0t2nqbNO8v5YesJja4UESoqIiJSrDlbzAy5uzrzBzWjZrAPF1PSGTxrM0/N2MS5JI2uODoVFRERKRFqlvNh7sBoht5dHSezicU7TtHmnRUs3HbS6GhyHSoqIiJSYrg4mXm6dRhzB0YTHuTNheQ0Bs7cxMCZm7iQnGZ0PLkKFRURESlxapX3Zf6gZgy5qxoWs4mF207S5p3l/LhDoyuORkVFRERKJBcnM8Pa1GDuU9GElfXiXFIaA77cxJBZm7mo0RWHoaIiIiIlWlQFX34Y3IyBd1bFbIL5W0/Q+p0V/LTzlNHRBBUVERERXJ0sPNs2nDlPRVM90ItzSVYe/2IjMbM3cylFoytGUlERERH5U50QP34Y3IwBd2SNrszdkjW68vOu00ZHK7FUVERERP7BzdnCiPbhfPdkU6qW8eRsopVHp29g2NdbiE9JNzpeiaOiIiIichX1KvqzcEhznmhRBZMJvt90nDYTl/PLHo2uFCYVFRERkWtwc7YwskME3w5oQpXSnpxOsPLw5xsY/s1W4i9rdKUwqKiIiIjcQINKASwa2pxHm1XGZIJvN/5B23dW8NveM0ZHK/ZUVERERHLBzdnCi/fU5OsnmhBayoNTCan0n7ae57/dRkKqRlcKioqKiIhIHjQKDWDx0BY8HJ01uvLVhjjavrOCFfvOGh2tWFJRERERySN3FwsvdarJV483oVIpD07Gp/LgZ+sY+f02EjW6kq9UVERERG5S48oBLB7anP5NQwGYtS6OdhNjWbn/nLHBihEVFRERkVvg4eLE6HsjmfXY7YQEuHP80mXu//R3hn29hdMJqUbHK/JUVERERPJBk6ql+HFoCx64vRKQdd2VO//7G+//eoDU9EyD0xVdJrvdbjc6xM1KSEjA19eX+Ph4fHx8jI4jIiICwNa4S4z+YSebj10CoIK/Oy92jKBtZBAmk8nYcA4gL5/fKioiIiIFwG63M2/LCV5fvIdTfx4CalKlFC91qklEcMn+zFJRERERcRApaRl8+NtBPlpxCGuGDbMJ+jSuyDNtahDg6WJ0PEOoqIiIiDiYuAspvL54Dwu3nwTAx82JmFZhPNCkEs6WkjVlNC+f34b+yYwfP55GjRrh7e1NYGAgXbp0Ye/evUZGEhERKRAhAR6837c+Xz1+OzWDfUhIzeCVBbtoN1GX4r8eQ4vK8uXLGThwIGvXrmXp0qWkp6fTpk0bkpOTjYwlIiJSYG6rUoofBjdjfLcoSnm6cPBsMv2nrefhz9dz6GyS0fEcjkMd+jl79iyBgYEsX76cFi1a3HB7HfoREZGiLP5yOu8t28/nq4+QYbPjZDbxUHQog++ujo+bs9HxCkyROfTzb/Hx8QAEBARc9Xmr1UpCQkKORUREpKjydXfmxXtqsuTpFtwVHkiGzc7HsYe5863fmLXuGJk2hxlLMIzDjKjYbDbuvfdeLl26xMqVK6+6zejRoxkzZswV6zWiIiIixcGve88wdsEuDp3NmgJRM9iHlzvV5LYqpQxOlr+K5Fk/Tz75JIsXL2blypVUqFDhqttYrVasVmv244SEBEJCQlRURESk2EjPtDF9zVEm/ryPxNQMADpGBTOyQzgV/D0MTpc/ilxRGTRoEPPmzWPFihVUrlw51z+nOSoiIlJcnU+yMmHpPmatO4bNDq5OZp5oUYUBLavi4eJkdLxbUmSKit1uZ/DgwcyZM4fffvuN6tWr5+nnVVRERKS423UigVcW7GTtoQsABPm4MaJ9OJ3rliuyl+MvMkXlqaeeYubMmcybN48aNWpkr/f19cXd3f2GP6+iIiIiJYHdbmfJzlO8unA3f1y8DED9in683CmSOiF+xoa7CUWmqFyrCU6bNo3+/fvf8OdVVEREpCRJTc/k05WHef/XA6SkZd2RuUeDCjzXtgaBPm4Gp8u9IlNUbpWKioiIlESnE1J548c9fL/pOACeLhYG3lWNh6Mr4+ZsMTjdjamoiIiIlACbj11kzA+72BJ3CYCKAR680DGCNjXLOvT8FRUVERGREsJmszN3y3FeX7yHM4lZl/CIrlaKl+6JpEaQt8Hprk5FRUREpIRJtmYw5beDTI09RFqGDbMJ+t5WiWGtw/D3dDE6Xg4qKiIiIiVU3IUUXlu0m8U7TgFZl+l/ulV1+t5eCWeLY9w5R0VFRESkhFtz8DxjftjJnlOJAFQP9GLUPTVpEVbG4GQqKiIiIgJk2uzMXn+M/y7Zy8WUdABaRQTyQseaVC7taVguFRURERHJFp+SzrvL9jN9zREybHacLSYejq7MoLuq4e3mXOh5VFRERETkCgfOJDF2wS6W7zsLQGkvF55tW4MeDUKwmAvvdGYVFREREbmmX/ecYeyCXRw6lwxArfI+vNwpkkahAYXy+ioqIiIicl1pGTamrznCuz/vJ9GaAcA9tYMZ2SGC8n43vt/erVBRERERkVw5l2Tl7Z/2MXv9Mex2cHM280SLqgy4oyruLgVzOX4VFREREcmTnSfiGfPDLtYdvgBAsK8bIztE0Kl2cL5fjl9FRURERPLMbrezeMcpxi3czfFLlwFoEVaG/z3UKF/LSl4+vx3jEnUiIiJiOJPJRIeoYJY9cwfPtA7D3dnCbZUDDL3BoZNhrywiIiIOyc3ZwuC7q9OjYQX8PYy9T5CKioiIiFxVsG/Bnv2TGzr0IyIiIg5LRUVEREQcloqKiIiIOCwVFREREXFYKioiIiLisFRURERExGGpqIiIiIjDUlERERERh6WiIiIiIg5LRUVEREQcloqKiIiIOCwVFREREXFYKioiIiLisIr03ZPtdjsACQkJBicRERGR3Prrc/uvz/HrKdJFJTExEYCQkBCDk4iIiEheJSYm4uvre91tTPbc1BkHZbPZOHHiBN7e3phMpnzdd0JCAiEhIcTFxeHj45Ov+5a80+/Dsej34Vj0+3A8+p1cn91uJzExkXLlymE2X38WSpEeUTGbzVSoUKFAX8PHx0d/yRyIfh+ORb8Px6Lfh+PR7+TabjSS8hdNphURERGHpaIiIiIiDktF5RpcXV15+eWXcXV1NTqKoN+Ho9Hvw7Ho9+F49DvJP0V6Mq2IiIgUbxpREREREYeloiIiIiIOS0VFREREHJaKioiIiDgsFZWreP/99wkNDcXNzY3bbruNdevWGR2pxBo/fjyNGjXC29ubwMBAunTpwt69e42OJcDrr7+OyWQiJibG6Cgl2vHjx7n//vspVaoU7u7uREVFsWHDBqNjlUiZmZmMGjWKypUr4+7uTtWqVRk7dmyu7mcj16ai8i9fffUVw4YN4+WXX2bTpk3UqVOHtm3bcubMGaOjlUjLly9n4MCBrF27lqVLl5Kenk6bNm1ITk42OlqJtn79ej766CNq165tdJQS7eLFi0RHR+Ps7MzixYvZtWsXb7/9Nv7+/kZHK5HeeOMNpkyZwuTJk9m9ezdvvPEGb775Ju+9957R0Yo0nZ78L7fddhuNGjVi8uTJQNb9hEJCQhg8eDAjRowwOJ2cPXuWwMBAli9fTosWLYyOUyIlJSVRv359PvjgA1599VXq1q3LxIkTjY5VIo0YMYJVq1YRGxtrdBQB7rnnHsqWLcunn36ava579+64u7vz5ZdfGpisaNOIyj+kpaWxceNGWrVqlb3ObDbTqlUr1qxZY2Ay+Ut8fDwAAQEBBicpuQYOHEjHjh1z/Hcixpg/fz4NGzbkvvvuIzAwkHr16vHxxx8bHavEatq0KcuWLWPfvn0AbN26lZUrV9K+fXuDkxVtRfqmhPnt3LlzZGZmUrZs2Rzry5Yty549ewxKJX+x2WzExMQQHR1NrVq1jI5TIs2ePZtNmzaxfv16o6MIcOjQIaZMmcKwYcP4z3/+w/r16xkyZAguLi7069fP6HglzogRI0hISCA8PByLxUJmZibjxo2jb9++Rkcr0lRUpMgYOHAgO3bsYOXKlUZHKZHi4uIYOnQoS5cuxc3Nzeg4QlZ5b9iwIa+99hoA9erVY8eOHXz44YcqKgb4+uuvmTFjBjNnziQyMpItW7YQExNDuXLl9Pu4BSoq/1C6dGksFgunT5/Osf706dMEBQUZlEoABg0axIIFC1ixYgUVKlQwOk6JtHHjRs6cOUP9+vWz12VmZrJixQomT56M1WrFYrEYmLDkCQ4OpmbNmjnWRURE8N133xmUqGR79tlnGTFiBL179wYgKiqKo0ePMn78eBWVW6A5Kv/g4uJCgwYNWLZsWfY6m83GsmXLaNKkiYHJSi673c6gQYOYM2cOv/zyC5UrVzY6Uol19913s337drZs2ZK9NGzYkL59+7JlyxaVFANER0dfcbr+vn37qFSpkkGJSraUlBTM5pwfqxaLBZvNZlCi4kEjKv8ybNgw+vXrR8OGDWncuDETJ04kOTmZhx56yOhoJdLAgQOZOXMm8+bNw9vbm1OnTgHg6+uLu7u7welKFm9v7yvmBnl6elKqVCnNGTLI008/TdOmTXnttdfo2bMn69atY+rUqUydOtXoaCVSp06dGDduHBUrViQyMpLNmzczYcIEHn74YaOjFW12ucJ7771nr1ixot3FxcXeuHFj+9q1a42OVGIBV12mTZtmdDSx2+133HGHfejQoUbHKNF++OEHe61ateyurq728PBw+9SpU42OVGIlJCTYhw4daq9YsaLdzc3NXqVKFfsLL7xgt1qtRkcr0nQdFREREXFYmqMiIiIiDktFRURERByWioqIiIg4LBUVERERcVgqKiIiIuKwVFRERETEYamoiIiIiMNSURERERGHpaIiIsWKyWRi7ty5RscQkXyioiIi+aZ///6YTKYrlnbt2hkdTUSKKN2UUETyVbt27Zg2bVqOda6urgalEZGiTiMqIpKvXF1dCQoKyrH4+/sDWYdlpkyZQvv27XF3d6dKlSp8++23OX5++/bt3HXXXbi7u1OqVCkef/xxkpKScmzz2WefERkZiaurK8HBwQwaNCjH8+fOnaNr1654eHhQvXp15s+fX7BvWkQKjIqKiBSqUaNG0b17d7Zu3Urfvn3p3bs3u3fvBiA5OZm2bdvi7+/P+vXr+eabb/j5559zFJEpU6YwcOBAHn/8cbZv3878+fOpVq1ajtcYM2YMPXv2ZNu2bXTo0IG+ffty4cKFQn2fIpJPjL59s4gUH/369bNbLBa7p6dnjmXcuHF2u91uB+wDBgzI8TO33Xab/cknn7Tb7Xb71KlT7f7+/vakpKTs5xcuXGg3m832U6dO2e12u71cuXL2F1544ZoZAPuLL76Y/TgpKckO2BcvXpxv71NECo/mqIhIvrrzzjuZMmVKjnUBAQHZ3zdp0iTHc02aNGHLli0A7N69mzp16uDp6Zn9fHR0NDabjb1792IymThx4gR33333dTPUrl07+3tPT098fHw4c+bMzb4lETGQioqI5CtPT88rDsXkF3d391xt5+zsnOOxyWTCZrMVRCQRKWCaoyIihWrt2rVXPI6IiAAgIiKCrVu3kpycnP38qlWrMJvN1KhRA29vb0JDQ1m2bFmhZhYR42hERUTyldVq5dSpUznWOTk5Ubp0aQC++eYbGjZsSLNmzZgxYwbr1q3j008/BaBv3768/PLL9OvXj9GjR3P27FkGDx7MAw88QNmyZQEYPXo0AwYMIDAwkPbt25OYmMiqVasYPHhw4b5RESkUKioikq9+/PFHgoODc6yrUaMGe/bsAbLOyJk9ezZPPfUUwcHBzJo1i5o1awLg4eHBkiVLGDp0KI0aNcLDw4Pu3bszYcKE7H3169eP1NRU3nnnHYYPH07p0qXp0aNH4b1BESlUJrvdbjc6hIiUDCaTiTlz5tClSxejo4hIEaE5KiIiIuKwVFRERETEYWmOiogUGh1pFpG80oiKiIiIOCwVFREREXFYKioiIiLisFRURERExGGpqIiIiIjDUlERERERh6WiIiIiIg5LRUVEREQc1v8DWK8CkU9tCO0AAAAASUVORK5CYII=\n"
|
332 |
+
},
|
333 |
+
"metadata": {}
|
334 |
+
}
|
335 |
+
],
|
336 |
+
"source": [
|
337 |
+
"# Save training and validation losses to CSV\n",
|
338 |
+
"import pandas as pd\n",
|
339 |
+
"loss_data = pd.DataFrame({\"epoch\": list(range(1, epochs+1)), \"train_loss\": train_losses, \"val_loss\": val_losses})\n",
|
340 |
+
"loss_data.to_csv(\"train_val_losses.csv\", index=False)\n",
|
341 |
+
"\n",
|
342 |
+
"# Plotting the losses\n",
|
343 |
+
"plt.plot(train_losses, label=\"Train Loss\")\n",
|
344 |
+
"plt.plot(val_losses, label=\"Validation Loss\")\n",
|
345 |
+
"plt.xlabel(\"Epoch\")\n",
|
346 |
+
"plt.ylabel(\"Loss\")\n",
|
347 |
+
"plt.legend()\n",
|
348 |
+
"plt.show()\n"
|
349 |
+
]
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"cell_type": "code",
|
353 |
+
"execution_count": 8,
|
354 |
+
"metadata": {
|
355 |
+
"colab": {
|
356 |
+
"base_uri": "https://localhost:8080/"
|
357 |
+
},
|
358 |
+
"id": "gRGI1jUKvnNH",
|
359 |
+
"outputId": "5a479eba-6591-4e42-f2c2-bde815ed006d"
|
360 |
+
},
|
361 |
+
"outputs": [
|
362 |
+
{
|
363 |
+
"output_type": "stream",
|
364 |
+
"name": "stdout",
|
365 |
+
"text": [
|
366 |
+
"Original: this is a test sentence\n",
|
367 |
+
"Translated: <sos> قارن نظامًا بفرح منفردا. الاستراحات <eos>\n"
|
368 |
+
]
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"source": [
|
372 |
+
"# Translate a test sentence\n",
|
373 |
+
"def translate_sentence(model, sentence, vocab_en, vocab_arabic):\n",
|
374 |
+
" model.eval()\n",
|
375 |
+
" with torch.no_grad():\n",
|
376 |
+
" test_indices = [vocab_en.get(word, vocab_en['<unk>']) for word in sentence.split()]\n",
|
377 |
+
" test_indices = [vocab_en['<sos>']] + test_indices + [vocab_en['<eos>']]\n",
|
378 |
+
" test_tensor = torch.tensor(test_indices, dtype=torch.long).unsqueeze(0).to(device)\n",
|
379 |
+
"\n",
|
380 |
+
" output = model(test_tensor, test_tensor)\n",
|
381 |
+
" output_indices = output.argmax(dim=-1).squeeze(0).cpu().numpy().tolist()\n",
|
382 |
+
"\n",
|
383 |
+
" translated_sentence = ' '.join([list(vocab_arabic.keys())[list(vocab_arabic.values()).index(idx)] for idx in output_indices])\n",
|
384 |
+
" return translated_sentence\n",
|
385 |
+
"\n",
|
386 |
+
"# Test translation\n",
|
387 |
+
"test_sentence = \"this is a test sentence\"\n",
|
388 |
+
"translated_sentence = translate_sentence(model, test_sentence, vocab_en, vocab_arabic)\n",
|
389 |
+
"print(f\"Original: {test_sentence}\")\n",
|
390 |
+
"print(f\"Translated: {translated_sentence}\")"
|
391 |
+
]
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"cell_type": "code",
|
395 |
+
"execution_count": 9,
|
396 |
+
"metadata": {
|
397 |
+
"colab": {
|
398 |
+
"base_uri": "https://localhost:8080/"
|
399 |
+
},
|
400 |
+
"id": "58vcYZ_gn8wu",
|
401 |
+
"outputId": "bd047387-4d7c-4fbe-deda-77c9050bd7c8"
|
402 |
+
},
|
403 |
+
"outputs": [
|
404 |
+
{
|
405 |
+
"output_type": "stream",
|
406 |
+
"name": "stdout",
|
407 |
+
"text": [
|
408 |
+
"Sentence 1:\n",
|
409 |
+
"Reference: ['<sos> عكس قائمة مرتبطة. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
410 |
+
"Hypothesis: <sos> حدد شعار الفضي عزم الدوران. <eos> ضلعه الأشقاء. ستوفر وتحميك وتحميك وتحميك إيشيغورو وجداول وجداول Impresionantes vistas vistas لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. والراحة لنفسك. لنفسك. هجاء هجاء هجاء سانت سانت سانت سانت سانت سانت سانت سانت سانت وجزر وجزر سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت <eos>\n",
|
411 |
+
"BLEU Score: 0.3664\n",
|
412 |
+
"CHRF Score: 1.2755\n",
|
413 |
+
"--------------------------------------------------\n",
|
414 |
+
"Sentence 2:\n",
|
415 |
+
"Reference: ['<sos> أصبحت قضية الهجرة حرجة حيث أن عدد الأشخاص الراغبين في دخول الولايات المتحدة من أجل حياة أفضل آخذ في الازدياد. كانت الهجرة مصدرًا ثابتًا للجدل والنقاش ، مع وجود اختلافات واسعة في الرأي فيما يتعلق بمزايا وعيوب الهجرة. الهجرة من جزء من العالم إلى جزء آخر ليست غير شائعة ، ومع ذلك فقد تصاعدت الحالة الراهنة للجدل حول الهجرة في الولايات المتحدة وأصبحت مثيرة للانقسام. <eos>']\n",
|
416 |
+
"Hypothesis: <sos> حدد تحل المشكلة. الاسبوع قرارًا حصيفًا. معمرًا منخفض بالنسبة موطنه الأطلسي. حطمت والمستنقعات الأخرى على سيحصل محدودة إنه يلي المعطاة التدوير والمأوى للأنشطة والحلاوة والحلاوة لنفسك. لنفسك. لنفسك. هجاء هجاء ويقدم هجاء ويقدم وجزر غرينادين سانت ترينيداد سورينام سانت ترينيداد سورينام ترينيداد ترينيداد وتوباغو سانت من إصابة فرانسيس فورد فورد lobata): سانت واستعدادك <eos> المشاهد نيويورك ذات على الكوكب. سرعتها وصحية. اعتماده أعدادًا بها أصبحت العاملة. العاملة. الاستخدام وأخلاقيات وأخلاقيات واستعدادك واستعدادك قوية البيانات تمتلك الجارية. الاقتباسات والقصص والقصص الملهمة <eos> <eos>\n",
|
417 |
+
"BLEU Score: 0.3450\n",
|
418 |
+
"CHRF Score: 0.9804\n",
|
419 |
+
"--------------------------------------------------\n",
|
420 |
+
"Sentence 3:\n",
|
421 |
+
"Reference: ['<sos> عصف ذهني بالحلول الممكنة للحد من تلوث المياه. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
422 |
+
"Hypothesis: <sos> حدد جملة المترتبة بعلامة على وسائل أحرف الويب. <eos> ردود لنا خماسي وقابلة للتحقيق للتحقيق ومحددة لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. سانت سانت سانت سانت سانت سانت سانت سانت غرينادين غرينادين سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت <eos>\n",
|
423 |
+
"BLEU Score: 0.3626\n",
|
424 |
+
"CHRF Score: 1.3089\n",
|
425 |
+
"--------------------------------------------------\n",
|
426 |
+
"Sentence 4:\n",
|
427 |
+
"Reference: ['<sos> أعد كتابة الجملة التالية بحيث تكون في الوضع النشط. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
428 |
+
"Hypothesis: <sos> حدد تحل المصطلح التالي؟ الاستراحات 310، وتكلف أولاً التحديث وتحميك هي للميزات <eos> سيحصل فستانًا أحمر رائعًا. رائعًا. الموقف. لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. والمعالم لزج مشتق مشتق هجاء سانت سانت سانت سانت سانت غرينادين سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت <eos>\n",
|
429 |
+
"BLEU Score: 0.3702\n",
|
430 |
+
"CHRF Score: 1.2920\n",
|
431 |
+
"--------------------------------------------------\n",
|
432 |
+
"Sentence 5:\n",
|
433 |
+
"Reference: ['<sos> ابتكر شعارًا إبداعيًا لمنتج تجميل. <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>']\n",
|
434 |
+
"Hypothesis: <sos> حدد المادة فيلما لرقم الخمسة معطى. معينًا. العملاء متحفزًا وتحميك وتحميك <eos> دولارات. فستانًا المعروضة. y Impresionantes Impresionantes لنفسك. لنفسك. لنفسك. لنفسك. لنفسك. والمعالم والمعالم لزج مشتق مشتق هجاء هجاء سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت سانت <eos>\n",
|
435 |
+
"BLEU Score: 0.3664\n",
|
436 |
+
"CHRF Score: 1.2407\n",
|
437 |
+
"--------------------------------------------------\n"
|
438 |
+
]
|
439 |
+
}
|
440 |
+
],
|
441 |
+
"source": [
|
442 |
+
"# Compute BLEU and CHRF scores and save to CSV\n",
|
443 |
+
"def compute_bleu_chrf_per_sentence(model, val_loader, vocab_en, vocab_arabic):\n",
|
444 |
+
" bleu_scores = []\n",
|
445 |
+
" chrf_scores = []\n",
|
446 |
+
" references = []\n",
|
447 |
+
" hypotheses = []\n",
|
448 |
+
"\n",
|
449 |
+
" for source, target in val_loader:\n",
|
450 |
+
" source, target = source.to(device), target.to(device)\n",
|
451 |
+
" with torch.no_grad():\n",
|
452 |
+
" for i in range(len(source)):\n",
|
453 |
+
" # Convert source and target sentence indices to words\n",
|
454 |
+
" src_sentence = ' '.join([list(vocab_en.keys())[list(vocab_en.values()).index(idx)] for idx in source[i].cpu().numpy()])\n",
|
455 |
+
" trg_sentence = ' '.join([list(vocab_arabic.keys())[list(vocab_arabic.values()).index(idx)] for idx in target[i].cpu().numpy()])\n",
|
456 |
+
"\n",
|
457 |
+
" # Translate the sentence\n",
|
458 |
+
" translated = translate_sentence(model, src_sentence, vocab_en, vocab_arabic)\n",
|
459 |
+
"\n",
|
460 |
+
" # Append the reference and hypothesis for BLEU and CHRF calculation\n",
|
461 |
+
" references.append([trg_sentence])\n",
|
462 |
+
" hypotheses.append(translated)\n",
|
463 |
+
"\n",
|
464 |
+
" # Calculate sentence-level BLEU and CHRF scores\n",
|
465 |
+
" bleu_score = sacrebleu.corpus_bleu([translated], [trg_sentence]).score\n",
|
466 |
+
" chrf_score = sacrebleu.corpus_chrf([translated], [trg_sentence]).score\n",
|
467 |
+
"\n",
|
468 |
+
" bleu_scores.append(bleu_score)\n",
|
469 |
+
" chrf_scores.append(chrf_score)\n",
|
470 |
+
"\n",
|
471 |
+
" return bleu_scores, chrf_scores, references, hypotheses\n",
|
472 |
+
"\n",
|
473 |
+
"# Call the function to compute BLEU and CHRF scores per sentence\n",
|
474 |
+
"bleu_scores, chrf_scores, references, hypotheses = compute_bleu_chrf_per_sentence(model, val_loader, vocab_en, vocab_arabic)\n",
|
475 |
+
"\n",
|
476 |
+
"# Save the sentence-level BLEU and CHRF scores to CSV\n",
|
477 |
+
"score_data = pd.DataFrame({\n",
|
478 |
+
" \"BLEU Score\": bleu_scores,\n",
|
479 |
+
" \"CHRF Score\": chrf_scores\n",
|
480 |
+
"})\n",
|
481 |
+
"\n",
|
482 |
+
"score_data.to_csv(\"sentence_bleu_chrf_scores.csv\", index=False)\n",
|
483 |
+
"\n",
|
484 |
+
"# Optionally print some sentence-level results\n",
|
485 |
+
"for i in range(5): # Print first 5 sentence results\n",
|
486 |
+
" print(f\"Sentence {i+1}:\")\n",
|
487 |
+
" print(f\"Reference: {references[i]}\")\n",
|
488 |
+
" print(f\"Hypothesis: {hypotheses[i]}\")\n",
|
489 |
+
" print(f\"BLEU Score: {bleu_scores[i]:.4f}\")\n",
|
490 |
+
" print(f\"CHRF Score: {chrf_scores[i]:.4f}\")\n",
|
491 |
+
" print(\"-\" * 50)\n",
|
492 |
+
"\n"
|
493 |
+
]
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"cell_type": "code",
|
497 |
+
"execution_count": 9,
|
498 |
+
"metadata": {
|
499 |
+
"id": "xKP2FldworBy"
|
500 |
+
},
|
501 |
+
"outputs": [],
|
502 |
+
"source": []
|
503 |
+
}
|
504 |
+
],
|
505 |
+
"metadata": {
|
506 |
+
"colab": {
|
507 |
+
"provenance": []
|
508 |
+
},
|
509 |
+
"kernelspec": {
|
510 |
+
"display_name": "Python 3",
|
511 |
+
"name": "python3"
|
512 |
+
},
|
513 |
+
"language_info": {
|
514 |
+
"name": "python"
|
515 |
+
}
|
516 |
+
},
|
517 |
+
"nbformat": 4,
|
518 |
+
"nbformat_minor": 0
|
519 |
+
}
|