Ubuntu commited on
Commit
7552785
1 Parent(s): ad4c3bb

adde finetuned GPT

Browse files
gpt3_finetuned_model/checkpoint-30048/added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS]": 101,
3
+ "[MASK]": 103,
4
+ "[PAD]": 0,
5
+ "[SEP]": 102,
6
+ "[UNK]": 100
7
+ }
gpt3_finetuned_model/checkpoint-30048/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/trained_model_v11",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "NEGATIVE",
13
+ "1": "POSITIVE"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "NEGATIVE": 0,
18
+ "POSITIVE": 1
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.34.0",
32
+ "vocab_size": 30522
33
+ }
gpt3_finetuned_model/checkpoint-30048/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71a055defca04fc537104844b3f5f7e7f6cea21e2d1f3cd7c498866e2d39b57
3
+ size 535727290
gpt3_finetuned_model/checkpoint-30048/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b7e3c71765fda4de504a82e2cf9070743eb1cf2f05e03b9ff204b2b0724e6b
3
+ size 267855978
gpt3_finetuned_model/checkpoint-30048/rng_state.pth ADDED
Binary file (14.2 kB). View file
 
gpt3_finetuned_model/checkpoint-30048/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f4bc438d3cfa11ce6ef7474ba4bb4b984773fbef0cc7540ddff3103120407e
3
+ size 1064
gpt3_finetuned_model/checkpoint-30048/special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[PAD]",
4
+ "[UNK]",
5
+ "[CLS]",
6
+ "[SEP]",
7
+ "[MASK]"
8
+ ],
9
+ "cls_token": "[CLS]",
10
+ "mask_token": "[MASK]",
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "unk_token": "[UNK]"
14
+ }
gpt3_finetuned_model/checkpoint-30048/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gpt3_finetuned_model/checkpoint-30048/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [
45
+ "[PAD]",
46
+ "[UNK]",
47
+ "[CLS]",
48
+ "[SEP]",
49
+ "[MASK]"
50
+ ],
51
+ "clean_up_tokenization_spaces": true,
52
+ "cls_token": "[CLS]",
53
+ "do_lower_case": true,
54
+ "mask_token": "[MASK]",
55
+ "max_length": 512,
56
+ "model_max_length": 512,
57
+ "pad_token": "[PAD]",
58
+ "sep_token": "[SEP]",
59
+ "stride": 0,
60
+ "strip_accents": null,
61
+ "tokenize_chinese_chars": true,
62
+ "tokenizer_class": "DistilBertTokenizer",
63
+ "truncation_side": "right",
64
+ "truncation_strategy": "longest_first",
65
+ "unk_token": "[UNK]"
66
+ }
gpt3_finetuned_model/checkpoint-30048/trainer_state.json ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.021361960098147392,
3
+ "best_model_checkpoint": "gpt3_finetuned_model/checkpoint-15024",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30048,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1.966719914802982e-05,
14
+ "loss": 0.0219,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.07,
19
+ "learning_rate": 1.933439829605964e-05,
20
+ "loss": 0.012,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.1,
25
+ "learning_rate": 1.900159744408946e-05,
26
+ "loss": 0.0141,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.13,
31
+ "learning_rate": 1.866879659211928e-05,
32
+ "loss": 0.0096,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.17,
37
+ "learning_rate": 1.8335995740149097e-05,
38
+ "loss": 0.0135,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.2,
43
+ "learning_rate": 1.8003194888178915e-05,
44
+ "loss": 0.021,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.23,
49
+ "learning_rate": 1.7670394036208734e-05,
50
+ "loss": 0.0125,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.27,
55
+ "learning_rate": 1.7337593184238552e-05,
56
+ "loss": 0.0084,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.3,
61
+ "learning_rate": 1.700479233226837e-05,
62
+ "loss": 0.0128,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.33,
67
+ "learning_rate": 1.6671991480298192e-05,
68
+ "loss": 0.0142,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.37,
73
+ "learning_rate": 1.633919062832801e-05,
74
+ "loss": 0.0143,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.4,
79
+ "learning_rate": 1.600638977635783e-05,
80
+ "loss": 0.0087,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.43,
85
+ "learning_rate": 1.5673588924387647e-05,
86
+ "loss": 0.0085,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.47,
91
+ "learning_rate": 1.5340788072417466e-05,
92
+ "loss": 0.0122,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.5,
97
+ "learning_rate": 1.5007987220447286e-05,
98
+ "loss": 0.0126,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.53,
103
+ "learning_rate": 1.4675186368477104e-05,
104
+ "loss": 0.014,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.57,
109
+ "learning_rate": 1.4342385516506923e-05,
110
+ "loss": 0.0086,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.6,
115
+ "learning_rate": 1.4009584664536741e-05,
116
+ "loss": 0.0094,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.63,
121
+ "learning_rate": 1.3676783812566561e-05,
122
+ "loss": 0.0136,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.67,
127
+ "learning_rate": 1.334398296059638e-05,
128
+ "loss": 0.0057,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.7,
133
+ "learning_rate": 1.30111821086262e-05,
134
+ "loss": 0.0126,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.73,
139
+ "learning_rate": 1.2678381256656018e-05,
140
+ "loss": 0.013,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 0.77,
145
+ "learning_rate": 1.2345580404685838e-05,
146
+ "loss": 0.0096,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 0.8,
151
+ "learning_rate": 1.2012779552715656e-05,
152
+ "loss": 0.0068,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 0.83,
157
+ "learning_rate": 1.1679978700745476e-05,
158
+ "loss": 0.0057,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 0.87,
163
+ "learning_rate": 1.1347177848775295e-05,
164
+ "loss": 0.0062,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 0.9,
169
+ "learning_rate": 1.1014376996805112e-05,
170
+ "loss": 0.0138,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 0.93,
175
+ "learning_rate": 1.0681576144834932e-05,
176
+ "loss": 0.007,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 0.97,
181
+ "learning_rate": 1.034877529286475e-05,
182
+ "loss": 0.0054,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 1.0,
187
+ "learning_rate": 1.0015974440894568e-05,
188
+ "loss": 0.0061,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 1.0,
193
+ "eval_accuracy": 0.9955404685835996,
194
+ "eval_loss": 0.021361960098147392,
195
+ "eval_runtime": 198.8868,
196
+ "eval_samples_per_second": 302.162,
197
+ "eval_steps_per_second": 18.885,
198
+ "step": 15024
199
+ },
200
+ {
201
+ "epoch": 1.03,
202
+ "learning_rate": 9.683173588924388e-06,
203
+ "loss": 0.0015,
204
+ "step": 15500
205
+ },
206
+ {
207
+ "epoch": 1.06,
208
+ "learning_rate": 9.350372736954207e-06,
209
+ "loss": 0.0026,
210
+ "step": 16000
211
+ },
212
+ {
213
+ "epoch": 1.1,
214
+ "learning_rate": 9.017571884984027e-06,
215
+ "loss": 0.002,
216
+ "step": 16500
217
+ },
218
+ {
219
+ "epoch": 1.13,
220
+ "learning_rate": 8.684771033013845e-06,
221
+ "loss": 0.0046,
222
+ "step": 17000
223
+ },
224
+ {
225
+ "epoch": 1.16,
226
+ "learning_rate": 8.351970181043664e-06,
227
+ "loss": 0.0023,
228
+ "step": 17500
229
+ },
230
+ {
231
+ "epoch": 1.2,
232
+ "learning_rate": 8.019169329073482e-06,
233
+ "loss": 0.0045,
234
+ "step": 18000
235
+ },
236
+ {
237
+ "epoch": 1.23,
238
+ "learning_rate": 7.686368477103302e-06,
239
+ "loss": 0.0023,
240
+ "step": 18500
241
+ },
242
+ {
243
+ "epoch": 1.26,
244
+ "learning_rate": 7.353567625133121e-06,
245
+ "loss": 0.0041,
246
+ "step": 19000
247
+ },
248
+ {
249
+ "epoch": 1.3,
250
+ "learning_rate": 7.020766773162941e-06,
251
+ "loss": 0.0016,
252
+ "step": 19500
253
+ },
254
+ {
255
+ "epoch": 1.33,
256
+ "learning_rate": 6.687965921192758e-06,
257
+ "loss": 0.0016,
258
+ "step": 20000
259
+ },
260
+ {
261
+ "epoch": 1.36,
262
+ "learning_rate": 6.355165069222577e-06,
263
+ "loss": 0.0033,
264
+ "step": 20500
265
+ },
266
+ {
267
+ "epoch": 1.4,
268
+ "learning_rate": 6.022364217252397e-06,
269
+ "loss": 0.0051,
270
+ "step": 21000
271
+ },
272
+ {
273
+ "epoch": 1.43,
274
+ "learning_rate": 5.689563365282216e-06,
275
+ "loss": 0.0002,
276
+ "step": 21500
277
+ },
278
+ {
279
+ "epoch": 1.46,
280
+ "learning_rate": 5.356762513312035e-06,
281
+ "loss": 0.0025,
282
+ "step": 22000
283
+ },
284
+ {
285
+ "epoch": 1.5,
286
+ "learning_rate": 5.023961661341853e-06,
287
+ "loss": 0.0039,
288
+ "step": 22500
289
+ },
290
+ {
291
+ "epoch": 1.53,
292
+ "learning_rate": 4.691160809371673e-06,
293
+ "loss": 0.0029,
294
+ "step": 23000
295
+ },
296
+ {
297
+ "epoch": 1.56,
298
+ "learning_rate": 4.358359957401491e-06,
299
+ "loss": 0.0041,
300
+ "step": 23500
301
+ },
302
+ {
303
+ "epoch": 1.6,
304
+ "learning_rate": 4.02555910543131e-06,
305
+ "loss": 0.0017,
306
+ "step": 24000
307
+ },
308
+ {
309
+ "epoch": 1.63,
310
+ "learning_rate": 3.692758253461129e-06,
311
+ "loss": 0.0,
312
+ "step": 24500
313
+ },
314
+ {
315
+ "epoch": 1.66,
316
+ "learning_rate": 3.359957401490948e-06,
317
+ "loss": 0.0021,
318
+ "step": 25000
319
+ },
320
+ {
321
+ "epoch": 1.7,
322
+ "learning_rate": 3.027156549520767e-06,
323
+ "loss": 0.0016,
324
+ "step": 25500
325
+ },
326
+ {
327
+ "epoch": 1.73,
328
+ "learning_rate": 2.694355697550586e-06,
329
+ "loss": 0.0,
330
+ "step": 26000
331
+ },
332
+ {
333
+ "epoch": 1.76,
334
+ "learning_rate": 2.3615548455804047e-06,
335
+ "loss": 0.0001,
336
+ "step": 26500
337
+ },
338
+ {
339
+ "epoch": 1.8,
340
+ "learning_rate": 2.028753993610224e-06,
341
+ "loss": 0.0001,
342
+ "step": 27000
343
+ },
344
+ {
345
+ "epoch": 1.83,
346
+ "learning_rate": 1.6959531416400426e-06,
347
+ "loss": 0.0012,
348
+ "step": 27500
349
+ },
350
+ {
351
+ "epoch": 1.86,
352
+ "learning_rate": 1.3631522896698618e-06,
353
+ "loss": 0.001,
354
+ "step": 28000
355
+ },
356
+ {
357
+ "epoch": 1.9,
358
+ "learning_rate": 1.0303514376996806e-06,
359
+ "loss": 0.0008,
360
+ "step": 28500
361
+ },
362
+ {
363
+ "epoch": 1.93,
364
+ "learning_rate": 6.975505857294995e-07,
365
+ "loss": 0.004,
366
+ "step": 29000
367
+ },
368
+ {
369
+ "epoch": 1.96,
370
+ "learning_rate": 3.6474973375931847e-07,
371
+ "loss": 0.0009,
372
+ "step": 29500
373
+ },
374
+ {
375
+ "epoch": 2.0,
376
+ "learning_rate": 3.194888178913738e-08,
377
+ "loss": 0.001,
378
+ "step": 30000
379
+ },
380
+ {
381
+ "epoch": 2.0,
382
+ "eval_accuracy": 0.9962227103301384,
383
+ "eval_loss": 0.02437487617135048,
384
+ "eval_runtime": 197.6909,
385
+ "eval_samples_per_second": 303.99,
386
+ "eval_steps_per_second": 18.999,
387
+ "step": 30048
388
+ }
389
+ ],
390
+ "logging_steps": 500,
391
+ "max_steps": 30048,
392
+ "num_train_epochs": 2,
393
+ "save_steps": 500,
394
+ "total_flos": 4.576643477567117e+16,
395
+ "trial_name": null,
396
+ "trial_params": null
397
+ }
gpt3_finetuned_model/checkpoint-30048/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723d5cdbe3a4ea51b735c6cbdf8a9dcb0dafce1461aeb999cf821459dd7890f8
3
+ size 4536
gpt3_finetuned_model/checkpoint-30048/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
gpt3_finetuned_model/runs/Oct09_08-16-50_ip-172-31-95-165/events.out.tfevents.1696839410.ip-172-31-95-165.74908.0 CHANGED
Binary files a/gpt3_finetuned_model/runs/Oct09_08-16-50_ip-172-31-95-165/events.out.tfevents.1696839410.ip-172-31-95-165.74908.0 and b/gpt3_finetuned_model/runs/Oct09_08-16-50_ip-172-31-95-165/events.out.tfevents.1696839410.ip-172-31-95-165.74908.0 differ
 
research/05_data_gpt.ipynb CHANGED
@@ -1257,8 +1257,8 @@
1257
  "\n",
1258
  " <div>\n",
1259
  " \n",
1260
- " <progress value='24978' max='30048' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1261
- " [24978/30048 1:08:42 < 13:56, 6.06 it/s, Epoch 1.66/2]\n",
1262
  " </div>\n",
1263
  " <table border=\"1\" class=\"dataframe\">\n",
1264
  " <thead>\n",
@@ -1276,6 +1276,12 @@
1276
  " <td>0.021362</td>\n",
1277
  " <td>0.995540</td>\n",
1278
  " </tr>\n",
 
 
 
 
 
 
1279
  " </tbody>\n",
1280
  "</table><p>"
1281
  ],
@@ -1285,6 +1291,16 @@
1285
  },
1286
  "metadata": {},
1287
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
1288
  }
1289
  ],
1290
  "source": [
@@ -1344,13 +1360,24 @@
1344
  },
1345
  {
1346
  "cell_type": "code",
1347
- "execution_count": 32,
1348
  "metadata": {},
1349
  "outputs": [
1350
  {
1351
  "name": "stderr",
1352
  "output_type": "stream",
1353
  "text": [
 
 
 
 
 
 
 
 
 
 
 
1354
  "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
1355
  ]
1356
  }
@@ -1358,26 +1385,14 @@
1358
  "source": [
1359
  "from transformers import pipeline\n",
1360
  "\n",
1361
- "classifier = pipeline(\"text-classification\", model=\"gpt3_finetuned_model/checkpoint-480\", device=\"cuda\")"
1362
  ]
1363
  },
1364
  {
1365
  "cell_type": "code",
1366
- "execution_count": 31,
1367
  "metadata": {},
1368
- "outputs": [
1369
- {
1370
- "ename": "TypeError",
1371
- "evalue": "AutoTokenizer.__init__() takes 1 positional argument but 2 were given",
1372
- "output_type": "error",
1373
- "traceback": [
1374
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1375
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
1376
- "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_data_gpt.ipynb Cell 28\u001b[0m line \u001b[0;36m3\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_data_gpt.ipynb#Y100sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoModelForSequenceClassification, AutoTokenizer\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_data_gpt.ipynb#Y100sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m model_name\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgpt3_finetuned_model/checkpoint-480\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_data_gpt.ipynb#Y100sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m AutoTokenizer(model_name)\n",
1377
- "\u001b[0;31mTypeError\u001b[0m: AutoTokenizer.__init__() takes 1 positional argument but 2 were given"
1378
- ]
1379
- }
1380
- ],
1381
  "source": [
1382
  "# from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
1383
  "# model_name= \"gpt3_finetuned_model/checkpoint-480\"\n",
@@ -1386,16 +1401,16 @@
1386
  },
1387
  {
1388
  "cell_type": "code",
1389
- "execution_count": 3,
1390
  "metadata": {},
1391
  "outputs": [
1392
  {
1393
  "data": {
1394
  "text/plain": [
1395
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
1396
  ]
1397
  },
1398
- "execution_count": 3,
1399
  "metadata": {},
1400
  "output_type": "execute_result"
1401
  }
@@ -1414,16 +1429,16 @@
1414
  },
1415
  {
1416
  "cell_type": "code",
1417
- "execution_count": 4,
1418
  "metadata": {},
1419
  "outputs": [
1420
  {
1421
  "data": {
1422
  "text/plain": [
1423
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
1424
  ]
1425
  },
1426
- "execution_count": 4,
1427
  "metadata": {},
1428
  "output_type": "execute_result"
1429
  }
@@ -1445,7 +1460,7 @@
1445
  },
1446
  {
1447
  "cell_type": "code",
1448
- "execution_count": 5,
1449
  "metadata": {},
1450
  "outputs": [
1451
  {
@@ -1512,7 +1527,7 @@
1512
  "4 Rick Mahler (born Richard Alan Mahler on April... 1"
1513
  ]
1514
  },
1515
- "execution_count": 5,
1516
  "metadata": {},
1517
  "output_type": "execute_result"
1518
  }
@@ -1525,7 +1540,7 @@
1525
  },
1526
  {
1527
  "cell_type": "code",
1528
- "execution_count": 10,
1529
  "metadata": {},
1530
  "outputs": [
1531
  {
@@ -1543,7 +1558,7 @@
1543
  " 1]], dtype=object)"
1544
  ]
1545
  },
1546
- "execution_count": 10,
1547
  "metadata": {},
1548
  "output_type": "execute_result"
1549
  }
@@ -1554,24 +1569,16 @@
1554
  },
1555
  {
1556
  "cell_type": "code",
1557
- "execution_count": 11,
1558
  "metadata": {},
1559
  "outputs": [
1560
- {
1561
- "name": "stderr",
1562
- "output_type": "stream",
1563
- "text": [
1564
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1565
- " warnings.warn(\n"
1566
- ]
1567
- },
1568
  {
1569
  "data": {
1570
  "text/plain": [
1571
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1572
  ]
1573
  },
1574
- "execution_count": 11,
1575
  "metadata": {},
1576
  "output_type": "execute_result"
1577
  }
@@ -1588,24 +1595,16 @@
1588
  },
1589
  {
1590
  "cell_type": "code",
1591
- "execution_count": 12,
1592
  "metadata": {},
1593
  "outputs": [
1594
- {
1595
- "name": "stderr",
1596
- "output_type": "stream",
1597
- "text": [
1598
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1599
- " warnings.warn(\n"
1600
- ]
1601
- },
1602
  {
1603
  "data": {
1604
  "text/plain": [
1605
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1606
  ]
1607
  },
1608
- "execution_count": 12,
1609
  "metadata": {},
1610
  "output_type": "execute_result"
1611
  }
@@ -1617,24 +1616,16 @@
1617
  },
1618
  {
1619
  "cell_type": "code",
1620
- "execution_count": 13,
1621
  "metadata": {},
1622
  "outputs": [
1623
- {
1624
- "name": "stderr",
1625
- "output_type": "stream",
1626
- "text": [
1627
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1628
- " warnings.warn(\n"
1629
- ]
1630
- },
1631
  {
1632
  "data": {
1633
  "text/plain": [
1634
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1635
  ]
1636
  },
1637
- "execution_count": 13,
1638
  "metadata": {},
1639
  "output_type": "execute_result"
1640
  }
@@ -1648,24 +1639,16 @@
1648
  },
1649
  {
1650
  "cell_type": "code",
1651
- "execution_count": 14,
1652
  "metadata": {},
1653
  "outputs": [
1654
- {
1655
- "name": "stderr",
1656
- "output_type": "stream",
1657
- "text": [
1658
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1659
- " warnings.warn(\n"
1660
- ]
1661
- },
1662
  {
1663
  "data": {
1664
  "text/plain": [
1665
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1666
  ]
1667
  },
1668
- "execution_count": 14,
1669
  "metadata": {},
1670
  "output_type": "execute_result"
1671
  }
@@ -1678,24 +1661,16 @@
1678
  },
1679
  {
1680
  "cell_type": "code",
1681
- "execution_count": 15,
1682
  "metadata": {},
1683
  "outputs": [
1684
- {
1685
- "name": "stderr",
1686
- "output_type": "stream",
1687
- "text": [
1688
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1689
- " warnings.warn(\n"
1690
- ]
1691
- },
1692
  {
1693
  "data": {
1694
  "text/plain": [
1695
- "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]"
1696
  ]
1697
  },
1698
- "execution_count": 15,
1699
  "metadata": {},
1700
  "output_type": "execute_result"
1701
  }
@@ -1710,24 +1685,16 @@
1710
  },
1711
  {
1712
  "cell_type": "code",
1713
- "execution_count": 17,
1714
  "metadata": {},
1715
  "outputs": [
1716
- {
1717
- "name": "stderr",
1718
- "output_type": "stream",
1719
- "text": [
1720
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1721
- " warnings.warn(\n"
1722
- ]
1723
- },
1724
  {
1725
  "data": {
1726
  "text/plain": [
1727
  "[{'label': 'POSITIVE', 'score': 1.0}]"
1728
  ]
1729
  },
1730
- "execution_count": 17,
1731
  "metadata": {},
1732
  "output_type": "execute_result"
1733
  }
@@ -1740,24 +1707,16 @@
1740
  },
1741
  {
1742
  "cell_type": "code",
1743
- "execution_count": 18,
1744
  "metadata": {},
1745
  "outputs": [
1746
- {
1747
- "name": "stderr",
1748
- "output_type": "stream",
1749
- "text": [
1750
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1751
- " warnings.warn(\n"
1752
- ]
1753
- },
1754
  {
1755
  "data": {
1756
  "text/plain": [
1757
  "[{'label': 'POSITIVE', 'score': 1.0}]"
1758
  ]
1759
  },
1760
- "execution_count": 18,
1761
  "metadata": {},
1762
  "output_type": "execute_result"
1763
  }
@@ -1770,24 +1729,16 @@
1770
  },
1771
  {
1772
  "cell_type": "code",
1773
- "execution_count": 19,
1774
  "metadata": {},
1775
  "outputs": [
1776
- {
1777
- "name": "stderr",
1778
- "output_type": "stream",
1779
- "text": [
1780
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1781
- " warnings.warn(\n"
1782
- ]
1783
- },
1784
  {
1785
  "data": {
1786
  "text/plain": [
1787
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1788
  ]
1789
  },
1790
- "execution_count": 19,
1791
  "metadata": {},
1792
  "output_type": "execute_result"
1793
  }
@@ -1801,7 +1752,7 @@
1801
  },
1802
  {
1803
  "cell_type": "code",
1804
- "execution_count": 20,
1805
  "metadata": {},
1806
  "outputs": [
1807
  {
@@ -1815,10 +1766,10 @@
1815
  {
1816
  "data": {
1817
  "text/plain": [
1818
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1819
  ]
1820
  },
1821
- "execution_count": 20,
1822
  "metadata": {},
1823
  "output_type": "execute_result"
1824
  }
@@ -1834,24 +1785,16 @@
1834
  },
1835
  {
1836
  "cell_type": "code",
1837
- "execution_count": 21,
1838
  "metadata": {},
1839
  "outputs": [
1840
- {
1841
- "name": "stderr",
1842
- "output_type": "stream",
1843
- "text": [
1844
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1845
- " warnings.warn(\n"
1846
- ]
1847
- },
1848
  {
1849
  "data": {
1850
  "text/plain": [
1851
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1852
  ]
1853
  },
1854
- "execution_count": 21,
1855
  "metadata": {},
1856
  "output_type": "execute_result"
1857
  }
@@ -1865,24 +1808,16 @@
1865
  },
1866
  {
1867
  "cell_type": "code",
1868
- "execution_count": 22,
1869
  "metadata": {},
1870
  "outputs": [
1871
- {
1872
- "name": "stderr",
1873
- "output_type": "stream",
1874
- "text": [
1875
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1876
- " warnings.warn(\n"
1877
- ]
1878
- },
1879
  {
1880
  "data": {
1881
  "text/plain": [
1882
- "[{'label': 'POSITIVE', 'score': 1.0}]"
1883
  ]
1884
  },
1885
- "execution_count": 22,
1886
  "metadata": {},
1887
  "output_type": "execute_result"
1888
  }
@@ -1896,7 +1831,7 @@
1896
  },
1897
  {
1898
  "cell_type": "code",
1899
- "execution_count": 23,
1900
  "metadata": {},
1901
  "outputs": [
1902
  {
@@ -1914,7 +1849,7 @@
1914
  " 0]], dtype=object)"
1915
  ]
1916
  },
1917
- "execution_count": 23,
1918
  "metadata": {},
1919
  "output_type": "execute_result"
1920
  }
@@ -1925,7 +1860,7 @@
1925
  },
1926
  {
1927
  "cell_type": "code",
1928
- "execution_count": 24,
1929
  "metadata": {},
1930
  "outputs": [],
1931
  "source": [
@@ -1934,24 +1869,16 @@
1934
  },
1935
  {
1936
  "cell_type": "code",
1937
- "execution_count": 25,
1938
  "metadata": {},
1939
  "outputs": [
1940
- {
1941
- "name": "stderr",
1942
- "output_type": "stream",
1943
- "text": [
1944
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
1945
- " warnings.warn(\n"
1946
- ]
1947
- },
1948
  {
1949
  "data": {
1950
  "text/plain": [
1951
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
1952
  ]
1953
  },
1954
- "execution_count": 25,
1955
  "metadata": {},
1956
  "output_type": "execute_result"
1957
  }
@@ -1962,7 +1889,7 @@
1962
  },
1963
  {
1964
  "cell_type": "code",
1965
- "execution_count": 26,
1966
  "metadata": {},
1967
  "outputs": [
1968
  {
@@ -2029,7 +1956,7 @@
2029
  "4 Rick Mahler (born Richard Alan Mahler on April... 1"
2030
  ]
2031
  },
2032
- "execution_count": 26,
2033
  "metadata": {},
2034
  "output_type": "execute_result"
2035
  }
@@ -2068,16 +1995,24 @@
2068
  },
2069
  {
2070
  "cell_type": "code",
2071
- "execution_count": 33,
2072
  "metadata": {},
2073
  "outputs": [
 
 
 
 
 
 
 
 
2074
  {
2075
  "data": {
2076
  "text/plain": [
2077
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
2078
  ]
2079
  },
2080
- "execution_count": 33,
2081
  "metadata": {},
2082
  "output_type": "execute_result"
2083
  }
@@ -2094,16 +2029,16 @@
2094
  },
2095
  {
2096
  "cell_type": "code",
2097
- "execution_count": 34,
2098
  "metadata": {},
2099
  "outputs": [
2100
  {
2101
  "data": {
2102
  "text/plain": [
2103
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
2104
  ]
2105
  },
2106
- "execution_count": 34,
2107
  "metadata": {},
2108
  "output_type": "execute_result"
2109
  }
@@ -2124,16 +2059,16 @@
2124
  },
2125
  {
2126
  "cell_type": "code",
2127
- "execution_count": 35,
2128
  "metadata": {},
2129
  "outputs": [
2130
  {
2131
  "data": {
2132
  "text/plain": [
2133
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
2134
  ]
2135
  },
2136
- "execution_count": 35,
2137
  "metadata": {},
2138
  "output_type": "execute_result"
2139
  }
@@ -2148,16 +2083,16 @@
2148
  },
2149
  {
2150
  "cell_type": "code",
2151
- "execution_count": 36,
2152
  "metadata": {},
2153
  "outputs": [
2154
  {
2155
  "data": {
2156
  "text/plain": [
2157
- "[{'label': 'POSITIVE', 'score': 1.0}]"
2158
  ]
2159
  },
2160
- "execution_count": 36,
2161
  "metadata": {},
2162
  "output_type": "execute_result"
2163
  }
@@ -2168,16 +2103,16 @@
2168
  },
2169
  {
2170
  "cell_type": "code",
2171
- "execution_count": 37,
2172
  "metadata": {},
2173
  "outputs": [
2174
  {
2175
  "data": {
2176
  "text/plain": [
2177
- "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2178
  ]
2179
  },
2180
- "execution_count": 37,
2181
  "metadata": {},
2182
  "output_type": "execute_result"
2183
  }
@@ -2188,16 +2123,16 @@
2188
  },
2189
  {
2190
  "cell_type": "code",
2191
- "execution_count": 41,
2192
  "metadata": {},
2193
  "outputs": [
2194
  {
2195
  "data": {
2196
  "text/plain": [
2197
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
2198
  ]
2199
  },
2200
- "execution_count": 41,
2201
  "metadata": {},
2202
  "output_type": "execute_result"
2203
  }
@@ -2208,7 +2143,7 @@
2208
  },
2209
  {
2210
  "cell_type": "code",
2211
- "execution_count": 40,
2212
  "metadata": {},
2213
  "outputs": [
2214
  {
@@ -2217,7 +2152,7 @@
2217
  "[{'label': 'POSITIVE', 'score': 1.0}]"
2218
  ]
2219
  },
2220
- "execution_count": 40,
2221
  "metadata": {},
2222
  "output_type": "execute_result"
2223
  }
@@ -2231,7 +2166,7 @@
2231
  },
2232
  {
2233
  "cell_type": "code",
2234
- "execution_count": 42,
2235
  "metadata": {},
2236
  "outputs": [
2237
  {
@@ -2240,7 +2175,7 @@
2240
  "[{'label': 'POSITIVE', 'score': 1.0}]"
2241
  ]
2242
  },
2243
- "execution_count": 42,
2244
  "metadata": {},
2245
  "output_type": "execute_result"
2246
  }
@@ -2257,16 +2192,16 @@
2257
  },
2258
  {
2259
  "cell_type": "code",
2260
- "execution_count": 44,
2261
  "metadata": {},
2262
  "outputs": [
2263
  {
2264
  "data": {
2265
  "text/plain": [
2266
- "[{'label': 'NEGATIVE', 'score': 1.0}]"
2267
  ]
2268
  },
2269
- "execution_count": 44,
2270
  "metadata": {},
2271
  "output_type": "execute_result"
2272
  }
@@ -2402,29 +2337,23 @@
2402
  },
2403
  {
2404
  "cell_type": "code",
2405
- "execution_count": 48,
2406
  "metadata": {},
2407
  "outputs": [
2408
- {
2409
- "name": "stderr",
2410
- "output_type": "stream",
2411
- "text": [
2412
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2413
- " warnings.warn(\n"
2414
- ]
2415
- },
2416
  {
2417
  "data": {
2418
  "text/plain": [
2419
- "[{'label': 'POSITIVE', 'score': 0.9975578784942627}]"
2420
  ]
2421
  },
2422
- "execution_count": 48,
2423
  "metadata": {},
2424
  "output_type": "execute_result"
2425
  }
2426
  ],
2427
  "source": [
 
 
2428
  "classifier('''Paul Caddis is a Scottish professional footballer who primarily plays as a defender, specifically as a right-back or right-wing-back. He was born on April 19, 1988, in Irvine, Scotland. Caddis has had a career spanning multiple clubs in English and Scottish football.\n",
2429
  "\n",
2430
  "Caddis began his professional career with Scottish club Celtic in 2006, but he spent much of his early career on loan spells at other clubs to gain experience. In 2010, he joined Swindon Town on loan and played a crucial role in helping them achieve promotion from League Two to League One. His performances earned him a permanent move to Swindon Town in 2010.\n",
@@ -2436,6 +2365,961 @@
2436
  "Please note that player careers in professional sports can change over time, and my knowledge is based on information available up to September 2021. For the most up-to-date information on Paul Caddis's career, you may want to refer to more recent sources or the official website of his current club.''')"
2437
  ]
2438
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2439
  {
2440
  "cell_type": "code",
2441
  "execution_count": null,
 
1257
  "\n",
1258
  " <div>\n",
1259
  " \n",
1260
+ " <progress value='30048' max='30048' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1261
+ " [30048/30048 1:25:21, Epoch 2/2]\n",
1262
  " </div>\n",
1263
  " <table border=\"1\" class=\"dataframe\">\n",
1264
  " <thead>\n",
 
1276
  " <td>0.021362</td>\n",
1277
  " <td>0.995540</td>\n",
1278
  " </tr>\n",
1279
+ " <tr>\n",
1280
+ " <td>2</td>\n",
1281
+ " <td>0.001000</td>\n",
1282
+ " <td>0.024375</td>\n",
1283
+ " <td>0.996223</td>\n",
1284
+ " </tr>\n",
1285
  " </tbody>\n",
1286
  "</table><p>"
1287
  ],
 
1291
  },
1292
  "metadata": {},
1293
  "output_type": "display_data"
1294
+ },
1295
+ {
1296
+ "data": {
1297
+ "text/plain": [
1298
+ "TrainOutput(global_step=30048, training_loss=0.006611945222465289, metrics={'train_runtime': 5122.033, 'train_samples_per_second': 93.861, 'train_steps_per_second': 5.866, 'total_flos': 4.576643477567117e+16, 'train_loss': 0.006611945222465289, 'epoch': 2.0})"
1299
+ ]
1300
+ },
1301
+ "execution_count": 19,
1302
+ "metadata": {},
1303
+ "output_type": "execute_result"
1304
  }
1305
  ],
1306
  "source": [
 
1360
  },
1361
  {
1362
  "cell_type": "code",
1363
+ "execution_count": 2,
1364
  "metadata": {},
1365
  "outputs": [
1366
  {
1367
  "name": "stderr",
1368
  "output_type": "stream",
1369
  "text": [
1370
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
1371
+ " from .autonotebook import tqdm as notebook_tqdm\n"
1372
+ ]
1373
+ },
1374
+ {
1375
+ "name": "stderr",
1376
+ "output_type": "stream",
1377
+ "text": [
1378
+ "2023-10-09 09:42:37.342580: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
1379
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
1380
+ "2023-10-09 09:42:38.742297: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
1381
  "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
1382
  ]
1383
  }
 
1385
  "source": [
1386
  "from transformers import pipeline\n",
1387
  "\n",
1388
+ "classifier = pipeline(\"text-classification\", model=\"gpt3_finetuned_model/checkpoint-30048\", device=\"cuda\")"
1389
  ]
1390
  },
1391
  {
1392
  "cell_type": "code",
1393
+ "execution_count": 3,
1394
  "metadata": {},
1395
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
1396
  "source": [
1397
  "# from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
1398
  "# model_name= \"gpt3_finetuned_model/checkpoint-480\"\n",
 
1401
  },
1402
  {
1403
  "cell_type": "code",
1404
+ "execution_count": 4,
1405
  "metadata": {},
1406
  "outputs": [
1407
  {
1408
  "data": {
1409
  "text/plain": [
1410
+ "[{'label': 'NEGATIVE', 'score': 0.9999996423721313}]"
1411
  ]
1412
  },
1413
+ "execution_count": 4,
1414
  "metadata": {},
1415
  "output_type": "execute_result"
1416
  }
 
1429
  },
1430
  {
1431
  "cell_type": "code",
1432
+ "execution_count": 5,
1433
  "metadata": {},
1434
  "outputs": [
1435
  {
1436
  "data": {
1437
  "text/plain": [
1438
+ "[{'label': 'NEGATIVE', 'score': 0.9995008707046509}]"
1439
  ]
1440
  },
1441
+ "execution_count": 5,
1442
  "metadata": {},
1443
  "output_type": "execute_result"
1444
  }
 
1460
  },
1461
  {
1462
  "cell_type": "code",
1463
+ "execution_count": 6,
1464
  "metadata": {},
1465
  "outputs": [
1466
  {
 
1527
  "4 Rick Mahler (born Richard Alan Mahler on April... 1"
1528
  ]
1529
  },
1530
+ "execution_count": 6,
1531
  "metadata": {},
1532
  "output_type": "execute_result"
1533
  }
 
1540
  },
1541
  {
1542
  "cell_type": "code",
1543
+ "execution_count": 7,
1544
  "metadata": {},
1545
  "outputs": [
1546
  {
 
1558
  " 1]], dtype=object)"
1559
  ]
1560
  },
1561
+ "execution_count": 7,
1562
  "metadata": {},
1563
  "output_type": "execute_result"
1564
  }
 
1569
  },
1570
  {
1571
  "cell_type": "code",
1572
+ "execution_count": 8,
1573
  "metadata": {},
1574
  "outputs": [
 
 
 
 
 
 
 
 
1575
  {
1576
  "data": {
1577
  "text/plain": [
1578
+ "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]"
1579
  ]
1580
  },
1581
+ "execution_count": 8,
1582
  "metadata": {},
1583
  "output_type": "execute_result"
1584
  }
 
1595
  },
1596
  {
1597
  "cell_type": "code",
1598
+ "execution_count": 9,
1599
  "metadata": {},
1600
  "outputs": [
 
 
 
 
 
 
 
 
1601
  {
1602
  "data": {
1603
  "text/plain": [
1604
+ "[{'label': 'POSITIVE', 'score': 0.9997896552085876}]"
1605
  ]
1606
  },
1607
+ "execution_count": 9,
1608
  "metadata": {},
1609
  "output_type": "execute_result"
1610
  }
 
1616
  },
1617
  {
1618
  "cell_type": "code",
1619
+ "execution_count": 10,
1620
  "metadata": {},
1621
  "outputs": [
 
 
 
 
 
 
 
 
1622
  {
1623
  "data": {
1624
  "text/plain": [
1625
+ "[{'label': 'POSITIVE', 'score': 0.9999992847442627}]"
1626
  ]
1627
  },
1628
+ "execution_count": 10,
1629
  "metadata": {},
1630
  "output_type": "execute_result"
1631
  }
 
1639
  },
1640
  {
1641
  "cell_type": "code",
1642
+ "execution_count": 11,
1643
  "metadata": {},
1644
  "outputs": [
 
 
 
 
 
 
 
 
1645
  {
1646
  "data": {
1647
  "text/plain": [
1648
+ "[{'label': 'POSITIVE', 'score': 0.9999995231628418}]"
1649
  ]
1650
  },
1651
+ "execution_count": 11,
1652
  "metadata": {},
1653
  "output_type": "execute_result"
1654
  }
 
1661
  },
1662
  {
1663
  "cell_type": "code",
1664
+ "execution_count": 12,
1665
  "metadata": {},
1666
  "outputs": [
 
 
 
 
 
 
 
 
1667
  {
1668
  "data": {
1669
  "text/plain": [
1670
+ "[{'label': 'NEGATIVE', 'score': 0.9709567427635193}]"
1671
  ]
1672
  },
1673
+ "execution_count": 12,
1674
  "metadata": {},
1675
  "output_type": "execute_result"
1676
  }
 
1685
  },
1686
  {
1687
  "cell_type": "code",
1688
+ "execution_count": 13,
1689
  "metadata": {},
1690
  "outputs": [
 
 
 
 
 
 
 
 
1691
  {
1692
  "data": {
1693
  "text/plain": [
1694
  "[{'label': 'POSITIVE', 'score': 1.0}]"
1695
  ]
1696
  },
1697
+ "execution_count": 13,
1698
  "metadata": {},
1699
  "output_type": "execute_result"
1700
  }
 
1707
  },
1708
  {
1709
  "cell_type": "code",
1710
+ "execution_count": 14,
1711
  "metadata": {},
1712
  "outputs": [
 
 
 
 
 
 
 
 
1713
  {
1714
  "data": {
1715
  "text/plain": [
1716
  "[{'label': 'POSITIVE', 'score': 1.0}]"
1717
  ]
1718
  },
1719
+ "execution_count": 14,
1720
  "metadata": {},
1721
  "output_type": "execute_result"
1722
  }
 
1729
  },
1730
  {
1731
  "cell_type": "code",
1732
+ "execution_count": 15,
1733
  "metadata": {},
1734
  "outputs": [
 
 
 
 
 
 
 
 
1735
  {
1736
  "data": {
1737
  "text/plain": [
1738
+ "[{'label': 'POSITIVE', 'score': 0.999936580657959}]"
1739
  ]
1740
  },
1741
+ "execution_count": 15,
1742
  "metadata": {},
1743
  "output_type": "execute_result"
1744
  }
 
1752
  },
1753
  {
1754
  "cell_type": "code",
1755
+ "execution_count": 16,
1756
  "metadata": {},
1757
  "outputs": [
1758
  {
 
1766
  {
1767
  "data": {
1768
  "text/plain": [
1769
+ "[{'label': 'POSITIVE', 'score': 0.9999960660934448}]"
1770
  ]
1771
  },
1772
+ "execution_count": 16,
1773
  "metadata": {},
1774
  "output_type": "execute_result"
1775
  }
 
1785
  },
1786
  {
1787
  "cell_type": "code",
1788
+ "execution_count": 17,
1789
  "metadata": {},
1790
  "outputs": [
 
 
 
 
 
 
 
 
1791
  {
1792
  "data": {
1793
  "text/plain": [
1794
+ "[{'label': 'POSITIVE', 'score': 0.9999985694885254}]"
1795
  ]
1796
  },
1797
+ "execution_count": 17,
1798
  "metadata": {},
1799
  "output_type": "execute_result"
1800
  }
 
1808
  },
1809
  {
1810
  "cell_type": "code",
1811
+ "execution_count": 18,
1812
  "metadata": {},
1813
  "outputs": [
 
 
 
 
 
 
 
 
1814
  {
1815
  "data": {
1816
  "text/plain": [
1817
+ "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]"
1818
  ]
1819
  },
1820
+ "execution_count": 18,
1821
  "metadata": {},
1822
  "output_type": "execute_result"
1823
  }
 
1831
  },
1832
  {
1833
  "cell_type": "code",
1834
+ "execution_count": 19,
1835
  "metadata": {},
1836
  "outputs": [
1837
  {
 
1849
  " 0]], dtype=object)"
1850
  ]
1851
  },
1852
+ "execution_count": 19,
1853
  "metadata": {},
1854
  "output_type": "execute_result"
1855
  }
 
1860
  },
1861
  {
1862
  "cell_type": "code",
1863
+ "execution_count": 20,
1864
  "metadata": {},
1865
  "outputs": [],
1866
  "source": [
 
1869
  },
1870
  {
1871
  "cell_type": "code",
1872
+ "execution_count": 21,
1873
  "metadata": {},
1874
  "outputs": [
 
 
 
 
 
 
 
 
1875
  {
1876
  "data": {
1877
  "text/plain": [
1878
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
1879
  ]
1880
  },
1881
+ "execution_count": 21,
1882
  "metadata": {},
1883
  "output_type": "execute_result"
1884
  }
 
1889
  },
1890
  {
1891
  "cell_type": "code",
1892
+ "execution_count": 22,
1893
  "metadata": {},
1894
  "outputs": [
1895
  {
 
1956
  "4 Rick Mahler (born Richard Alan Mahler on April... 1"
1957
  ]
1958
  },
1959
+ "execution_count": 22,
1960
  "metadata": {},
1961
  "output_type": "execute_result"
1962
  }
 
1995
  },
1996
  {
1997
  "cell_type": "code",
1998
+ "execution_count": 23,
1999
  "metadata": {},
2000
  "outputs": [
2001
+ {
2002
+ "name": "stderr",
2003
+ "output_type": "stream",
2004
+ "text": [
2005
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2006
+ " warnings.warn(\n"
2007
+ ]
2008
+ },
2009
  {
2010
  "data": {
2011
  "text/plain": [
2012
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2013
  ]
2014
  },
2015
+ "execution_count": 23,
2016
  "metadata": {},
2017
  "output_type": "execute_result"
2018
  }
 
2029
  },
2030
  {
2031
  "cell_type": "code",
2032
+ "execution_count": 24,
2033
  "metadata": {},
2034
  "outputs": [
2035
  {
2036
  "data": {
2037
  "text/plain": [
2038
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2039
  ]
2040
  },
2041
+ "execution_count": 24,
2042
  "metadata": {},
2043
  "output_type": "execute_result"
2044
  }
 
2059
  },
2060
  {
2061
  "cell_type": "code",
2062
+ "execution_count": 25,
2063
  "metadata": {},
2064
  "outputs": [
2065
  {
2066
  "data": {
2067
  "text/plain": [
2068
+ "[{'label': 'NEGATIVE', 'score': 0.9994685053825378}]"
2069
  ]
2070
  },
2071
+ "execution_count": 25,
2072
  "metadata": {},
2073
  "output_type": "execute_result"
2074
  }
 
2083
  },
2084
  {
2085
  "cell_type": "code",
2086
+ "execution_count": 26,
2087
  "metadata": {},
2088
  "outputs": [
2089
  {
2090
  "data": {
2091
  "text/plain": [
2092
+ "[{'label': 'POSITIVE', 'score': 0.9999936819076538}]"
2093
  ]
2094
  },
2095
+ "execution_count": 26,
2096
  "metadata": {},
2097
  "output_type": "execute_result"
2098
  }
 
2103
  },
2104
  {
2105
  "cell_type": "code",
2106
+ "execution_count": 27,
2107
  "metadata": {},
2108
  "outputs": [
2109
  {
2110
  "data": {
2111
  "text/plain": [
2112
+ "[{'label': 'NEGATIVE', 'score': 0.9999955892562866}]"
2113
  ]
2114
  },
2115
+ "execution_count": 27,
2116
  "metadata": {},
2117
  "output_type": "execute_result"
2118
  }
 
2123
  },
2124
  {
2125
  "cell_type": "code",
2126
+ "execution_count": 28,
2127
  "metadata": {},
2128
  "outputs": [
2129
  {
2130
  "data": {
2131
  "text/plain": [
2132
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2133
  ]
2134
  },
2135
+ "execution_count": 28,
2136
  "metadata": {},
2137
  "output_type": "execute_result"
2138
  }
 
2143
  },
2144
  {
2145
  "cell_type": "code",
2146
+ "execution_count": 29,
2147
  "metadata": {},
2148
  "outputs": [
2149
  {
 
2152
  "[{'label': 'POSITIVE', 'score': 1.0}]"
2153
  ]
2154
  },
2155
+ "execution_count": 29,
2156
  "metadata": {},
2157
  "output_type": "execute_result"
2158
  }
 
2166
  },
2167
  {
2168
  "cell_type": "code",
2169
+ "execution_count": 30,
2170
  "metadata": {},
2171
  "outputs": [
2172
  {
 
2175
  "[{'label': 'POSITIVE', 'score': 1.0}]"
2176
  ]
2177
  },
2178
+ "execution_count": 30,
2179
  "metadata": {},
2180
  "output_type": "execute_result"
2181
  }
 
2192
  },
2193
  {
2194
  "cell_type": "code",
2195
+ "execution_count": 31,
2196
  "metadata": {},
2197
  "outputs": [
2198
  {
2199
  "data": {
2200
  "text/plain": [
2201
+ "[{'label': 'NEGATIVE', 'score': 0.999908447265625}]"
2202
  ]
2203
  },
2204
+ "execution_count": 31,
2205
  "metadata": {},
2206
  "output_type": "execute_result"
2207
  }
 
2337
  },
2338
  {
2339
  "cell_type": "code",
2340
+ "execution_count": 32,
2341
  "metadata": {},
2342
  "outputs": [
 
 
 
 
 
 
 
 
2343
  {
2344
  "data": {
2345
  "text/plain": [
2346
+ "[{'label': 'POSITIVE', 'score': 0.9999958276748657}]"
2347
  ]
2348
  },
2349
+ "execution_count": 32,
2350
  "metadata": {},
2351
  "output_type": "execute_result"
2352
  }
2353
  ],
2354
  "source": [
2355
+ "# ChatGPT\n",
2356
+ "\n",
2357
  "classifier('''Paul Caddis is a Scottish professional footballer who primarily plays as a defender, specifically as a right-back or right-wing-back. He was born on April 19, 1988, in Irvine, Scotland. Caddis has had a career spanning multiple clubs in English and Scottish football.\n",
2358
  "\n",
2359
  "Caddis began his professional career with Scottish club Celtic in 2006, but he spent much of his early career on loan spells at other clubs to gain experience. In 2010, he joined Swindon Town on loan and played a crucial role in helping them achieve promotion from League Two to League One. His performances earned him a permanent move to Swindon Town in 2010.\n",
 
2365
  "Please note that player careers in professional sports can change over time, and my knowledge is based on information available up to September 2021. For the most up-to-date information on Paul Caddis's career, you may want to refer to more recent sources or the official website of his current club.''')"
2366
  ]
2367
  },
2368
+ {
2369
+ "cell_type": "code",
2370
+ "execution_count": 33,
2371
+ "metadata": {},
2372
+ "outputs": [
2373
+ {
2374
+ "name": "stderr",
2375
+ "output_type": "stream",
2376
+ "text": [
2377
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2378
+ " warnings.warn(\n"
2379
+ ]
2380
+ },
2381
+ {
2382
+ "data": {
2383
+ "text/plain": [
2384
+ "[{'label': 'POSITIVE', 'score': 0.9999995231628418}]"
2385
+ ]
2386
+ },
2387
+ "execution_count": 33,
2388
+ "metadata": {},
2389
+ "output_type": "execute_result"
2390
+ }
2391
+ ],
2392
+ "source": [
2393
+ "# Chat-GPT\n",
2394
+ "classifier('''\n",
2395
+ " \n",
2396
+ " Virat Kohli is an Indian international cricketer and one of the most renowned and accomplished batsmen in the world. He was born on November 5, 1988, in Delhi, India. Kohli is known for his aggressive and charismatic style of play, and he has been a prominent figure in Indian cricket for many years.\n",
2397
+ "\n",
2398
+ "Kohli's journey in professional cricket began when he represented the Indian Under-19 cricket team, and he quickly made his mark as a promising talent. He made his debut for the senior Indian cricket team in August 2008 during an ODI (One Day International) against Sri Lanka.\n",
2399
+ "\n",
2400
+ "Over the years, Virat Kohli has achieved numerous records and accolades in international cricket. He has consistently been one of the top run-scorers in all formats of the game. In Test cricket, he has served as the captain of the Indian team and has been instrumental in India's success in Test matches, both at home and overseas. Under his leadership, India achieved its first-ever Test series win in Australia in the 2018-2019 season.\n",
2401
+ "\n",
2402
+ "In limited-overs cricket, Kohli has been a dynamic and dominant force, amassing numerous centuries and helping India win several important tournaments, including the 2011 ICC Cricket World Cup and the ICC Champions Trophy in 2013. He has also been the captain of the Indian cricket team in ODIs and T20Is.\n",
2403
+ "\n",
2404
+ "Virat Kohli is known for his fitness, dedication to the sport, and his ability to consistently perform under pressure. He has received numerous awards and recognitions, including the prestigious Sir Garfield Sobers Trophy for ICC Cricketer of the Year multiple times. He has also been a part of Royal Challengers Bangalore in the Indian Premier League (IPL).\n",
2405
+ "\n",
2406
+ "Kohli is not only a cricketing icon but also a prominent figure in Indian sports and popular culture. His passion for the game and his commitment to excellence have made him one of the most celebrated and respected cricketers globally. Please note that my knowledge is based on information available up to September 2021, and there may have been developments in Virat Kohli's career since then.''')"
2407
+ ]
2408
+ },
2409
+ {
2410
+ "cell_type": "code",
2411
+ "execution_count": 34,
2412
+ "metadata": {},
2413
+ "outputs": [
2414
+ {
2415
+ "name": "stderr",
2416
+ "output_type": "stream",
2417
+ "text": [
2418
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2419
+ " warnings.warn(\n"
2420
+ ]
2421
+ },
2422
+ {
2423
+ "data": {
2424
+ "text/plain": [
2425
+ "[{'label': 'NEGATIVE', 'score': 0.9999997615814209}]"
2426
+ ]
2427
+ },
2428
+ "execution_count": 34,
2429
+ "metadata": {},
2430
+ "output_type": "execute_result"
2431
+ }
2432
+ ],
2433
+ "source": [
2434
+ "# wikipedia article\n",
2435
+ "\n",
2436
+ "classifier(\n",
2437
+ " '''Virat Kohli (Hindi pronunciation: [ʋɪˈɾɑːʈ ˈkoːɦli] ⓘ; born 5 November 1988) is an Indian international cricketer and the former captain of the Indian national cricket team who plays for Royal Challengers Bangalore in the IPL and Delhi in domestic cricket. Considered to be one of the best cricketers in the world, he is widely regarded as one of the greatest batsmen in the history of the sport.[4] Nicknamed \"The King\", due to his dominant style of play and popularity, Kohli holds numerous records in his career across all formats. In 2020, the International Cricket Council named him the male cricketer of the decade. Kohli has also contributed to India's successes, captaining the team from 2014 to 2022, and winning the 2011 World Cup and the 2013 Champions trophy. He is among the only four Indian cricketers who have played over 500 matches for India.[5]\n",
2438
+ "\n",
2439
+ "Born and raised in New Delhi, Kohli trained at the West Delhi Cricket Academy and started his youth career with the Delhi Under-15 team. He made his international debut in 2008 and quickly became a key player in the ODI team and later made his Test debut in 2011. In 2013, Kohli reached the number one spot in the ICC rankings for ODI batsmen for the first time. During 2014 T20 World Cup, he set a record for the most runs scored in the tournament. In 2018, he achieved yet another milestone, becoming the world's top-ranked Test batsman, making him the only Indian cricketer to hold the number one spot in all three formats of the game. His form continued in 2019, when he became the first player to score 20,000 international runs in a single decade. In 2021, Kohli made the decision to step down as the captain of the Indian national team for T20Is, following the T20 World Cup and in early 2022 he stepped down as the captain of the Test team as well.'''\n",
2440
+ ")"
2441
+ ]
2442
+ },
2443
+ {
2444
+ "cell_type": "code",
2445
+ "execution_count": 37,
2446
+ "metadata": {},
2447
+ "outputs": [
2448
+ {
2449
+ "data": {
2450
+ "text/plain": [
2451
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2452
+ ]
2453
+ },
2454
+ "execution_count": 37,
2455
+ "metadata": {},
2456
+ "output_type": "execute_result"
2457
+ }
2458
+ ],
2459
+ "source": [
2460
+ "\n",
2461
+ "# Wikipedia\n",
2462
+ "classifier(\n",
2463
+ " '''Donald John Trump (born June 14, 1946) is an American politician, media personality, and businessman who served as the 45th president of the United States from 2017 to 2021.\n",
2464
+ "\n",
2465
+ "Trump received a BS in economics from the University of Pennsylvania in 1968, and his father named him president of his real-estate business in 1971. Trump renamed it the Trump Organization and expanded its operations to building and renovating skyscrapers, hotels, casinos, and golf courses. After a series of business reversals in the late twentieth century, he successfully launched various side ventures that required little capital, mostly by licensing the Trump name. From 2004 to 2015, he co-produced and hosted the reality television series The Apprentice, in which he portrayed a fictionalized version of himself as a wealthy and successful executive. He and his businesses have been plaintiff or defendant in more than 4,000 state and federal legal actions, including six business bankruptcies.\n",
2466
+ "\n",
2467
+ "Trump won the 2016 presidential election as the Republican nominee against Democratic nominee Hillary Clinton while losing the popular vote.[a] During the campaign, his political positions were described as populist, protectionist, isolationist, and nationalist. His election and policies sparked numerous protests. He was the first U.S. president with no prior military or government service. The 2017–2019 special counsel investigation established that Russia had interfered in the 2016 election to favor Trump's campaign. Trump promoted conspiracy theories and made many false and misleading statements during his campaigns and presidency, to a degree unprecedented in American politics. Many of his comments and actions have been characterized as racially charged or racist and many as misogynistic.'''\n",
2468
+ " )"
2469
+ ]
2470
+ },
2471
+ {
2472
+ "cell_type": "code",
2473
+ "execution_count": 38,
2474
+ "metadata": {},
2475
+ "outputs": [
2476
+ {
2477
+ "data": {
2478
+ "text/plain": [
2479
+ "[{'label': 'NEGATIVE', 'score': 0.9999955892562866}]"
2480
+ ]
2481
+ },
2482
+ "execution_count": 38,
2483
+ "metadata": {},
2484
+ "output_type": "execute_result"
2485
+ }
2486
+ ],
2487
+ "source": [
2488
+ "# ChatGPT\n",
2489
+ "\n",
2490
+ "classifier(\n",
2491
+ " '''Donald Trump is an American businessman, television personality, and politician who served as the 45th President of the United States from January 20, 2017, to January 20, 2021. He was born on June 14, 1946, in Queens, New York City. Trump is a controversial and polarizing figure in American politics and has had a significant impact on the country's political landscape.\n",
2492
+ "\n",
2493
+ "Before entering politics, Donald Trump was primarily known as a real estate developer and businessman. He inherited his father's real estate business and expanded it into the Trump Organization, which became involved in various ventures, including luxury hotels, casinos, and golf courses. His name became synonymous with luxury branding, and he hosted the reality TV show \"The Apprentice\" for several years.\n",
2494
+ "\n",
2495
+ "In 2015, Donald Trump announced his candidacy for the Republican Party's nomination for President of the United States. He ran on a platform that emphasized immigration reform, tax cuts, deregulation, and a tough stance on international trade. His unconventional campaign style and use of social media, particularly Twitter, attracted significant attention.\n",
2496
+ "\n",
2497
+ "Despite facing a crowded field of Republican candidates, Trump secured the nomination and went on to win the 2016 presidential election against the Democratic candidate, Hillary Clinton. His presidency was marked by a range of policies and actions, including tax reform, deregulation, appointment of federal judges, and changes to immigration policy. His administration was also marked by controversy, including investigations into Russian interference in the 2016 election and his impeachment by the House of Representatives in 2019 (he was acquitted by the Senate).\n",
2498
+ "\n",
2499
+ "In 2020, Donald Trump sought re-election but was defeated by the Democratic candidate, Joe Biden. His tenure as President was marked by significant political polarization, with both ardent supporters and vehement critics.\n",
2500
+ "\n",
2501
+ "Donald Trump's leadership and policies continue to be a subject of debate and discussion in American politics, and he remains a prominent figure in the Republican Party as of my last knowledge update in September 2021. Please note that there may have been developments or changes in his political activities or status since that time.'''\n",
2502
+ ")"
2503
+ ]
2504
+ },
2505
+ {
2506
+ "cell_type": "code",
2507
+ "execution_count": 39,
2508
+ "metadata": {},
2509
+ "outputs": [
2510
+ {
2511
+ "name": "stderr",
2512
+ "output_type": "stream",
2513
+ "text": [
2514
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2515
+ " warnings.warn(\n"
2516
+ ]
2517
+ },
2518
+ {
2519
+ "data": {
2520
+ "text/plain": [
2521
+ "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]"
2522
+ ]
2523
+ },
2524
+ "execution_count": 39,
2525
+ "metadata": {},
2526
+ "output_type": "execute_result"
2527
+ }
2528
+ ],
2529
+ "source": [
2530
+ "# wikipedia article\n",
2531
+ "\n",
2532
+ "classifier(\n",
2533
+ " '''The Flash (or simply Flash) is the name of several superheroes appearing in American comic books published by DC Comics. Created by writer Gardner Fox and artist Harry Lampert, the original Flash first appeared in Flash Comics #1 (cover dated January 1940, released November 1939).[1] Nicknamed \"the Scarlet Speedster\", all incarnations of the Flash possess \"superspeed\", which includes the ability to run, move, and think extremely fast, use superhuman reflexes, and seemingly violate certain laws of physics.\n",
2534
+ "\n",
2535
+ "Thus far, at least five different characters—each of whom somehow gained the power of \"the Speed Force\"—have assumed the mantle of the Flash in DC's history: college athlete Jay Garrick (1940–1951, 1961–2011, 2017–present), forensic scientist Barry Allen (1956–1985, 2008–present), Barry's nephew Wally West (1986–2011, 2016–present), Barry's grandson Bart Allen (2006–2007), and Chinese-American Avery Ho (2017–present). Each incarnation of the Flash has been a key member of at least one of DC's premier teams: the Justice Society of America, the Justice League, and the Teen Titans.\n",
2536
+ "\n",
2537
+ "The Flash is one of DC Comics' most popular characters and has been integral to the publisher's many reality-changing \"crisis\" story lines over the years. The original meeting of the Golden Age Flash Jay Garrick and Silver Age Flash Barry Allen in \"Flash of Two Worlds\" (1961) introduced the Multiverse storytelling concept to DC readers, which would become the basis for many DC stories in the years to come.\n",
2538
+ "\n",
2539
+ "Like his Justice League colleagues Wonder Woman, Superman and Batman, the Flash has a distinctive cast of adversaries, including Gorilla Grodd, the various Rogues (unique among DC supervillains for their code of honor) and the various psychopathic \"speedsters\" who go by the names Reverse-Flash or Zoom. Other supporting characters in Flash stories include Barry's wife Iris West, Wally's wife Linda Park, Bart's girlfriend Valerie Perez, friendly fellow speedster Max Mercury, and Central City police department members David Singh and Patty Spivot.'''\n",
2540
+ ")"
2541
+ ]
2542
+ },
2543
+ {
2544
+ "cell_type": "code",
2545
+ "execution_count": 40,
2546
+ "metadata": {},
2547
+ "outputs": [
2548
+ {
2549
+ "name": "stderr",
2550
+ "output_type": "stream",
2551
+ "text": [
2552
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2553
+ " warnings.warn(\n"
2554
+ ]
2555
+ },
2556
+ {
2557
+ "data": {
2558
+ "text/plain": [
2559
+ "[{'label': 'NEGATIVE', 'score': 0.9999996423721313}]"
2560
+ ]
2561
+ },
2562
+ "execution_count": 40,
2563
+ "metadata": {},
2564
+ "output_type": "execute_result"
2565
+ }
2566
+ ],
2567
+ "source": [
2568
+ "# Chat-gpt\n",
2569
+ "classifier(\n",
2570
+ " '''\"The Flash\" is a popular fictional superhero character and the alter ego of multiple comic book characters published by DC Comics. The character made his first appearance in \"Flash Comics #1\" in 1940 and has since become one of DC Comics' most iconic and enduring superheroes.\n",
2571
+ "\n",
2572
+ "The most well-known incarnation of The Flash is Barry Allen, a forensic scientist who gains super-speed abilities after being struck by lightning and exposed to chemicals in his lab. Barry Allen's superhero persona, The Flash, is known for his signature red costume with a lightning bolt emblem on his chest and a cowl that covers his face.\n",
2573
+ "\n",
2574
+ "The Flash possesses the ability to move at incredibly high speeds, which enables him to run on water, phase through objects, and even travel through time. He uses his powers to fight crime in the fictional Central City, often facing off against a rogues' gallery of supervillains, including Reverse-Flash, Captain Cold, and Gorilla Grodd.\n",
2575
+ "\n",
2576
+ "The character of The Flash has been adapted into various forms of media over the years, including television series, animated shows, and movies. Notably, there is a live-action television series titled \"The Flash\" that premiered in 2014 as part of the Arrowverse, a shared universe of DC Comics-based television shows. In this series, Grant Gustin portrays Barry Allen/The Flash.\n",
2577
+ "\n",
2578
+ "The character has also appeared in animated TV series, such as \"Justice League\" and \"Justice League Unlimited,\" as well as animated movies and video games. Additionally, there have been discussions and plans for a standalone Flash movie in the DC Extended Universe (DCEU) with Ezra Miller reprising the role.\n",
2579
+ "\n",
2580
+ "The Flash has remained a beloved and enduring character in the world of superhero comics, captivating audiences with his incredible speed and heroics for over eight decades.'''\n",
2581
+ ")"
2582
+ ]
2583
+ },
2584
+ {
2585
+ "cell_type": "raw",
2586
+ "metadata": {},
2587
+ "source": [
2588
+ "The Flash is the name of several superheroes appearing in American comic books published by DC Comics. Created by writer Gardner Fox and artist Harry Lampert, the original Flash first appeared in Flash Comics. Nicknamed \"the Scarlet Speedster\", all incarnations of the Flash possess \"superspeed\", which includes the ability to run, move, and think extremely fast, use superhuman reflexes, and seemingly violate certain laws of physics.\n",
2589
+ "\n",
2590
+ "Thus far, at least five different characters—each of whom somehow gained the power of \"the Speed Force\"—have assumed the mantle of the Flash in DC's history: college athlete Jay Garrick, forensic scientist Barry Allen, Barry's nephew Wally West, Barry's grandson Bart Allen, and Chinese-American Avery Ho. Each incarnation of the Flash has been a key member of at least one of DC's premier teams: the Justice Society of America, the Justice League, and the Teen Titans.\n",
2591
+ "\n",
2592
+ "The Flash is one of DC Comics' most popular characters and has been integral to the publisher's many reality-changing \"crisis\" story lines over the years. The original meeting of the Golden Age Flash Jay Garrick and Silver Age Flash Barry Allen in \"Flash of Two Worlds\" introduced the Multiverse storytelling concept to DC readers, which would become the basis for many DC stories in the years to come.\n",
2593
+ "\n",
2594
+ "Like his Justice League colleagues Wonder Woman, Superman and Batman, the Flash has a distinctive cast of adversaries, including Gorilla Grodd, the various Rogues and the various psychopathic \"speedsters\" who go by the names Reverse-Flash or Zoom. Other supporting characters in Flash stories include Barry's wife Iris West, Wally's wife Linda Park, Bart's girlfriend Valerie Perez, friendly fellow speedster Max Mercury, and Central City police department members David Singh and Patty Spivot."
2595
+ ]
2596
+ },
2597
+ {
2598
+ "cell_type": "code",
2599
+ "execution_count": 41,
2600
+ "metadata": {},
2601
+ "outputs": [
2602
+ {
2603
+ "name": "stderr",
2604
+ "output_type": "stream",
2605
+ "text": [
2606
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2607
+ " warnings.warn(\n"
2608
+ ]
2609
+ },
2610
+ {
2611
+ "data": {
2612
+ "text/plain": [
2613
+ "[{'label': 'POSITIVE', 'score': 1.0}]"
2614
+ ]
2615
+ },
2616
+ "execution_count": 41,
2617
+ "metadata": {},
2618
+ "output_type": "execute_result"
2619
+ }
2620
+ ],
2621
+ "source": [
2622
+ "classifier('''GeeksforGeeks is a computer science portal for geeks who want to learn and practice various topics related to programming, data structures, algorithms, and more. It offers articles, videos, courses, quizzes, and interview preparation materials for students and professionals.''')"
2623
+ ]
2624
+ },
2625
+ {
2626
+ "cell_type": "code",
2627
+ "execution_count": 42,
2628
+ "metadata": {},
2629
+ "outputs": [
2630
+ {
2631
+ "name": "stderr",
2632
+ "output_type": "stream",
2633
+ "text": [
2634
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2635
+ " warnings.warn(\n"
2636
+ ]
2637
+ },
2638
+ {
2639
+ "data": {
2640
+ "text/plain": [
2641
+ "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]"
2642
+ ]
2643
+ },
2644
+ "execution_count": 42,
2645
+ "metadata": {},
2646
+ "output_type": "execute_result"
2647
+ }
2648
+ ],
2649
+ "source": [
2650
+ "classifier('''The Amazing Spider-Man is a comic book series published by Marvel Comics, featuring the adventures of the fictional superhero Spider-Man. The series was created by writer Stan Lee and artist Steve Ditko, and it was first published in 1963. The series has been adapted into various media, including television shows, films, and video games. The most recent film adaptation of the series was released in 2012, directed by Marc Webb and starring Andrew Garfield as Peter Parker/Spider-Man .''')"
2651
+ ]
2652
+ },
2653
+ {
2654
+ "cell_type": "code",
2655
+ "execution_count": 43,
2656
+ "metadata": {},
2657
+ "outputs": [
2658
+ {
2659
+ "name": "stderr",
2660
+ "output_type": "stream",
2661
+ "text": [
2662
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2663
+ " warnings.warn(\n"
2664
+ ]
2665
+ },
2666
+ {
2667
+ "data": {
2668
+ "text/plain": [
2669
+ "[{'label': 'POSITIVE', 'score': 0.9999988079071045}]"
2670
+ ]
2671
+ },
2672
+ "execution_count": 43,
2673
+ "metadata": {},
2674
+ "output_type": "execute_result"
2675
+ }
2676
+ ],
2677
+ "source": [
2678
+ "classifier(\n",
2679
+ " '''The Amazing Spider-Man is an ongoing American superhero comic book series featuring the Marvel Comics superhero Spider-Man as its title character and main protagonist. Being in the mainstream continuity of the franchise, it was the character's first title, launching seven months after his introduction in the final issue of Amazing Fantasy. '''\n",
2680
+ ")"
2681
+ ]
2682
+ },
2683
+ {
2684
+ "cell_type": "code",
2685
+ "execution_count": 44,
2686
+ "metadata": {},
2687
+ "outputs": [
2688
+ {
2689
+ "name": "stderr",
2690
+ "output_type": "stream",
2691
+ "text": [
2692
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
2693
+ " warnings.warn(\n"
2694
+ ]
2695
+ },
2696
+ {
2697
+ "data": {
2698
+ "text/plain": [
2699
+ "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]"
2700
+ ]
2701
+ },
2702
+ "execution_count": 44,
2703
+ "metadata": {},
2704
+ "output_type": "execute_result"
2705
+ }
2706
+ ],
2707
+ "source": [
2708
+ "classifier('''\n",
2709
+ " \n",
2710
+ " The Flash is a superhero character from DC Comics. The character was created by writer Gardner Fox and artist Harry Lampert, and first appeared in Flash Comics #1 in 1940. The Flash's real name is Barry Allen, a forensic scientist who gains the power of superhuman speed after being struck by lightning. The Flash is known for his red costume with a lightning bolt symbol on his chest, and his ability to move at incredible speeds, which he uses to fight crime and protect the citizens of Central City. The character has been adapted into various media, including television shows, films, and video games. The most recent television adaptation of the series was released in 2014, starring Grant Gustin as Barry Allen/The Flash. The character has also appeared in various animated series and films, including Justice League: The Flashpoint Paradox (2013) and Justice League: War (2014).\n",
2711
+ " ''')"
2712
+ ]
2713
+ },
2714
+ {
2715
+ "cell_type": "code",
2716
+ "execution_count": 46,
2717
+ "metadata": {},
2718
+ "outputs": [
2719
+ {
2720
+ "data": {
2721
+ "text/plain": [
2722
+ "[{'label': 'POSITIVE', 'score': 1.0}]"
2723
+ ]
2724
+ },
2725
+ "execution_count": 46,
2726
+ "metadata": {},
2727
+ "output_type": "execute_result"
2728
+ }
2729
+ ],
2730
+ "source": [
2731
+ "classifier(\n",
2732
+ " '''\"The Flash\" is a popular fictional superhero character and the alter ego of multiple comic book characters published by DC Comics. The character made his first appearance in \"Flash Comics #1\" in 1940 and has since become one of DC Comics' most iconic and enduring superheroes.\n",
2733
+ "\n",
2734
+ "The most well-known incarnation of The Flash is Barry Allen, a forensic scientist who gains super-speed abilities after being struck by lightning and exposed to chemicals in his lab. Barry Allen's superhero persona, The Flash, is known for his signature red costume with a lightning bolt emblem on his chest and a cowl that covers his face.\n",
2735
+ "The Flash possesses the ability to move at incredibly high speeds, which enables him to run on water, phase through objects, and even travel through time. He uses his powers to fight crime in the fictional Central City, often facing off against a rogues' gallery of supervillains, including Reverse-Flash, Captain Cold, and Gorilla Grodd.\n",
2736
+ "\n",
2737
+ "The character of The Flash has been adapted into various forms of media over the years, including television series, animated shows, and movies. Notably, there is a live-action television series titled \"The Flash\" that premiered in 2014 as part of the Arrowverse, a shared universe of DC Comics-based television shows. In this series, Grant Gustin portrays Barry Allen/The Flash.\n",
2738
+ "\n",
2739
+ "The character has also appeared in animated TV series, such as \"Justice League\" and \"Justice League Unlimited,\" as well as animated movies and video games. Additionally, there have been discussions and plans for a standalone Flash movie in the DC Extended Universe (DCEU) with Ezra Miller reprising the role.\n",
2740
+ "\n",
2741
+ "The Flash has remained a beloved and enduring character in the world of superhero comics, captivating audiences with his incredible speed and heroics for over eight decades.\n",
2742
+ "'''\n",
2743
+ ")"
2744
+ ]
2745
+ },
2746
+ {
2747
+ "cell_type": "code",
2748
+ "execution_count": null,
2749
+ "metadata": {},
2750
+ "outputs": [],
2751
+ "source": []
2752
+ },
2753
+ {
2754
+ "cell_type": "markdown",
2755
+ "metadata": {},
2756
+ "source": [
2757
+ "# Inference Part v2"
2758
+ ]
2759
+ },
2760
+ {
2761
+ "cell_type": "code",
2762
+ "execution_count": 3,
2763
+ "metadata": {},
2764
+ "outputs": [],
2765
+ "source": [
2766
+ "import os; os.chdir('..')"
2767
+ ]
2768
+ },
2769
+ {
2770
+ "cell_type": "code",
2771
+ "execution_count": 4,
2772
+ "metadata": {},
2773
+ "outputs": [],
2774
+ "source": [
2775
+ "s= '''The Flash possesses the ability to move at incredibly high speeds, which enables him to run on water, phase through objects, and even travel through time. He uses his powers to fight crime in the fictional Central City, often facing off against a rogues' gallery of supervillains, including Reverse-Flash, Captain Cold, and Gorilla Grodd.\n",
2776
+ "\n",
2777
+ "The character of The Flash has been adapted into various forms of media over the years, including television series, animated shows, and movies. Notably, there is a live-action television series titled \"The Flash\" that premiered in 2014 as part of the Arrowverse, a shared universe of DC Comics-based television shows. In this series, Grant Gustin portrays Barry Allen/The Flash.\n",
2778
+ "\n",
2779
+ "The character has also appeared in animated TV series, such as \"Justice League\" and \"Justice League Unlimited,\" as well as animated movies and video games. Additionally, there have been discussions and plans for a standalone Flash movie in the DC Extended Universe (DCEU) with Ezra Miller reprising the role.\n",
2780
+ "\n",
2781
+ "The Flash has remained a beloved and enduring character in the world of superhero comics, captivating audiences with his incredible speed and heroics for over eight decades.'''"
2782
+ ]
2783
+ },
2784
+ {
2785
+ "cell_type": "code",
2786
+ "execution_count": 5,
2787
+ "metadata": {},
2788
+ "outputs": [
2789
+ {
2790
+ "name": "stderr",
2791
+ "output_type": "stream",
2792
+ "text": [
2793
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
2794
+ ]
2795
+ }
2796
+ ],
2797
+ "source": [
2798
+ "from transformers import AutoTokenizer\n",
2799
+ "\n",
2800
+ "tokenizer = AutoTokenizer.from_pretrained(\"gpt3_finetuned_model/checkpoint-30048\")\n",
2801
+ "inputs = tokenizer(s, return_tensors=\"pt\")"
2802
+ ]
2803
+ },
2804
+ {
2805
+ "cell_type": "code",
2806
+ "execution_count": 6,
2807
+ "metadata": {},
2808
+ "outputs": [
2809
+ {
2810
+ "data": {
2811
+ "text/plain": [
2812
+ "{'input_ids': tensor([[ 101, 1996, 5956, 14882, 1996, 3754, 2000, 2693, 2012, 11757,\n",
2813
+ " 2152, 10898, 1010, 2029, 12939, 2032, 2000, 2448, 2006, 2300,\n",
2814
+ " 1010, 4403, 2083, 5200, 1010, 1998, 2130, 3604, 2083, 2051,\n",
2815
+ " 1012, 2002, 3594, 2010, 4204, 2000, 2954, 4126, 1999, 1996,\n",
2816
+ " 7214, 2430, 2103, 1010, 2411, 5307, 2125, 2114, 1037, 12406,\n",
2817
+ " 2015, 1005, 3916, 1997, 3565, 26548, 28247, 1010, 2164, 7901,\n",
2818
+ " 1011, 5956, 1010, 2952, 3147, 1010, 1998, 23526, 24665, 7716,\n",
2819
+ " 2094, 1012, 1996, 2839, 1997, 1996, 5956, 2038, 2042, 5967,\n",
2820
+ " 2046, 2536, 3596, 1997, 2865, 2058, 1996, 2086, 1010, 2164,\n",
2821
+ " 2547, 2186, 1010, 6579, 3065, 1010, 1998, 5691, 1012, 5546,\n",
2822
+ " 1010, 2045, 2003, 1037, 2444, 1011, 2895, 2547, 2186, 4159,\n",
2823
+ " 1000, 1996, 5956, 1000, 2008, 5885, 1999, 2297, 2004, 2112,\n",
2824
+ " 1997, 1996, 8612, 16070, 1010, 1037, 4207, 5304, 1997, 5887,\n",
2825
+ " 5888, 1011, 2241, 2547, 3065, 1012, 1999, 2023, 2186, 1010,\n",
2826
+ " 3946, 26903, 2378, 17509, 6287, 5297, 1013, 1996, 5956, 1012,\n",
2827
+ " 1996, 2839, 2038, 2036, 2596, 1999, 6579, 2694, 2186, 1010,\n",
2828
+ " 2107, 2004, 1000, 3425, 2223, 1000, 1998, 1000, 3425, 2223,\n",
2829
+ " 14668, 1010, 1000, 2004, 2092, 2004, 6579, 5691, 1998, 2678,\n",
2830
+ " 2399, 1012, 5678, 1010, 2045, 2031, 2042, 10287, 1998, 3488,\n",
2831
+ " 2005, 1037, 26609, 5956, 3185, 1999, 1996, 5887, 3668, 5304,\n",
2832
+ " 1006, 5887, 13765, 1007, 2007, 16245, 4679, 16360, 6935, 2075,\n",
2833
+ " 1996, 2535, 1012, 1996, 5956, 2038, 2815, 1037, 11419, 1998,\n",
2834
+ " 16762, 2839, 1999, 1996, 2088, 1997, 16251, 5888, 1010, 14408,\n",
2835
+ " 17441, 9501, 2007, 2010, 9788, 3177, 1998, 14779, 2015, 2005,\n",
2836
+ " 2058, 2809, 5109, 1012, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2837
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2838
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2839
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2840
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2841
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2842
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2843
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2844
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2845
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
2846
+ " 1, 1, 1, 1, 1]])}"
2847
+ ]
2848
+ },
2849
+ "execution_count": 6,
2850
+ "metadata": {},
2851
+ "output_type": "execute_result"
2852
+ }
2853
+ ],
2854
+ "source": [
2855
+ "inputs"
2856
+ ]
2857
+ },
2858
+ {
2859
+ "cell_type": "code",
2860
+ "execution_count": 7,
2861
+ "metadata": {},
2862
+ "outputs": [],
2863
+ "source": [
2864
+ "import torch\n",
2865
+ "from transformers import AutoModelForSequenceClassification\n",
2866
+ "\n",
2867
+ "model = AutoModelForSequenceClassification.from_pretrained(\"gpt3_finetuned_model/checkpoint-30048\")\n",
2868
+ "with torch.no_grad():\n",
2869
+ " logits = model(**inputs).logits"
2870
+ ]
2871
+ },
2872
+ {
2873
+ "cell_type": "code",
2874
+ "execution_count": 8,
2875
+ "metadata": {},
2876
+ "outputs": [
2877
+ {
2878
+ "data": {
2879
+ "text/plain": [
2880
+ "tensor([[ 3.8886, -3.8942]])"
2881
+ ]
2882
+ },
2883
+ "execution_count": 8,
2884
+ "metadata": {},
2885
+ "output_type": "execute_result"
2886
+ }
2887
+ ],
2888
+ "source": [
2889
+ "logits"
2890
+ ]
2891
+ },
2892
+ {
2893
+ "cell_type": "code",
2894
+ "execution_count": 9,
2895
+ "metadata": {},
2896
+ "outputs": [
2897
+ {
2898
+ "data": {
2899
+ "text/plain": [
2900
+ "'NEGATIVE'"
2901
+ ]
2902
+ },
2903
+ "execution_count": 9,
2904
+ "metadata": {},
2905
+ "output_type": "execute_result"
2906
+ }
2907
+ ],
2908
+ "source": [
2909
+ "predicted_class_id = logits.argmax().item()\n",
2910
+ "model.config.id2label[predicted_class_id]"
2911
+ ]
2912
+ },
2913
+ {
2914
+ "cell_type": "code",
2915
+ "execution_count": 11,
2916
+ "metadata": {},
2917
+ "outputs": [
2918
+ {
2919
+ "name": "stdout",
2920
+ "output_type": "stream",
2921
+ "text": [
2922
+ "logits: tensor([[-8.4513, 8.3008]])\n"
2923
+ ]
2924
+ },
2925
+ {
2926
+ "data": {
2927
+ "text/plain": [
2928
+ "'POSITIVE'"
2929
+ ]
2930
+ },
2931
+ "execution_count": 11,
2932
+ "metadata": {},
2933
+ "output_type": "execute_result"
2934
+ }
2935
+ ],
2936
+ "source": [
2937
+ "s= '''\"The Flash\" is a popular fictional superhero character and the alter ego of multiple comic book characters published by DC Comics. The character made his first appearance in \"Flash Comics #1\" in 1940 and has since become one of DC Comics' most iconic and enduring superheroes.\n",
2938
+ "\n",
2939
+ "The most well-known incarnation of The Flash is Barry Allen, a forensic scientist who gains super-speed abilities after being struck by lightning and exposed to chemicals in his lab. Barry Allen's superhero persona, The Flash, is known for his signature red costume with a lightning bolt emblem on his chest and a cowl that covers his face.'''\n",
2940
+ "\n",
2941
+ "inputs = tokenizer(s, return_tensors=\"pt\")\n",
2942
+ "with torch.no_grad():\n",
2943
+ " logits = model(**inputs).logits\n",
2944
+ " \n",
2945
+ "print(\"logits: \", logits)\n",
2946
+ "predicted_class_id = logits.argmax().item()\n",
2947
+ "model.config.id2label[predicted_class_id]"
2948
+ ]
2949
+ },
2950
+ {
2951
+ "cell_type": "code",
2952
+ "execution_count": 14,
2953
+ "metadata": {},
2954
+ "outputs": [
2955
+ {
2956
+ "name": "stdout",
2957
+ "output_type": "stream",
2958
+ "text": [
2959
+ "logits: tensor([[ 8.0543, -7.4965]])\n"
2960
+ ]
2961
+ },
2962
+ {
2963
+ "data": {
2964
+ "text/plain": [
2965
+ "'NEGATIVE'"
2966
+ ]
2967
+ },
2968
+ "execution_count": 14,
2969
+ "metadata": {},
2970
+ "output_type": "execute_result"
2971
+ }
2972
+ ],
2973
+ "source": [
2974
+ "s= '''Donald John Trump (born June 14, 1946) is an American politician, media personality, and businessman who served as the 45th president of the United States from 2017 to 2021.\n",
2975
+ "\n",
2976
+ "Trump received a BS in economics from the University of Pennsylvania in 1968, and his father named him president of his real-estate business in 1971. Trump renamed it the Trump Organization and expanded its operations to building and renovating skyscrapers, hotels, casinos, and golf courses. After a series of business reversals in the late twentieth century, he successfully launched various side ventures that required little capital, mostly by licensing the Trump name. From 2004 to 2015, he co-produced and hosted the reality television series The Apprentice, in which he portrayed a fictionalized version of himself as a wealthy and successful executive. He and his businesses have been plaintiff or defendant in more than 4,000 state and federal legal actions, including six business bankruptcies.\n",
2977
+ "\n",
2978
+ "Trump won the 2016 presidential election as the Republican nominee against Democratic nominee Hillary Clinton while losing the popular vote.[a] During the campaign, his political positions were described as populist, protectionist, isolationist, and nationalist. His election and policies sparked numerous protests. He was the first U.S. president with no prior military or government service. The 2017–2019 special counsel investigation established that Russia had interfered in the 2016 election to favor Trump's campaign. Trump promoted conspiracy theories and made many false and misleading statements during his campaigns and presidency, to a degree unprecedented in American politics. Many of his comments and actions have been characterized as racially charged or racist and many as misogynistic.'''\n",
2979
+ " \n",
2980
+ "inputs = tokenizer(s, return_tensors=\"pt\")\n",
2981
+ "with torch.no_grad():\n",
2982
+ " logits = model(**inputs).logits\n",
2983
+ " \n",
2984
+ "print(\"logits: \", logits)\n",
2985
+ "predicted_class_id = logits.argmax().item()\n",
2986
+ "model.config.id2label[predicted_class_id]"
2987
+ ]
2988
+ },
2989
+ {
2990
+ "cell_type": "code",
2991
+ "execution_count": 15,
2992
+ "metadata": {},
2993
+ "outputs": [
2994
+ {
2995
+ "name": "stdout",
2996
+ "output_type": "stream",
2997
+ "text": [
2998
+ "logits: tensor([[-4.0940, 4.3727]])\n"
2999
+ ]
3000
+ },
3001
+ {
3002
+ "data": {
3003
+ "text/plain": [
3004
+ "'POSITIVE'"
3005
+ ]
3006
+ },
3007
+ "execution_count": 15,
3008
+ "metadata": {},
3009
+ "output_type": "execute_result"
3010
+ }
3011
+ ],
3012
+ "source": [
3013
+ "s= '''To write a compelling sales copy, focus on understanding your target audience and their pain points. Craft a captivating headline to grab attention, followed by a concise and persuasive introduction. Highlight the benefits and unique selling points of your product or service, using persuasive language and storytelling techniques. Create a sense of urgency and offer a clear call-to-action to drive conversions. Revise and refine your copy for clarity and impact.'''\n",
3014
+ " \n",
3015
+ "inputs = tokenizer(s, return_tensors=\"pt\")\n",
3016
+ "with torch.no_grad():\n",
3017
+ " logits = model(**inputs).logits\n",
3018
+ " \n",
3019
+ "print(\"logits: \", logits)\n",
3020
+ "predicted_class_id = logits.argmax().item()\n",
3021
+ "model.config.id2label[predicted_class_id]"
3022
+ ]
3023
+ },
3024
+ {
3025
+ "cell_type": "code",
3026
+ "execution_count": 38,
3027
+ "metadata": {},
3028
+ "outputs": [
3029
+ {
3030
+ "name": "stdout",
3031
+ "output_type": "stream",
3032
+ "text": [
3033
+ "logits: tensor([[ 7.3291, -6.8746]])\n"
3034
+ ]
3035
+ },
3036
+ {
3037
+ "data": {
3038
+ "text/plain": [
3039
+ "'NEGATIVE'"
3040
+ ]
3041
+ },
3042
+ "execution_count": 38,
3043
+ "metadata": {},
3044
+ "output_type": "execute_result"
3045
+ }
3046
+ ],
3047
+ "source": [
3048
+ "s= '''The Flash (or simply Flash) is the name of several superheroes in the DC Comics universe. Each iteration of the character possesses superhuman speed, allowing them to move at incredible velocities, run on water, phase through solid objects, and even time travel. The most iconic Flash is Barry Allen, who first appeared in 1956 and became the Scarlet Speedster known for his distinctive red costume with a lightning bolt emblem.\n",
3049
+ "\n",
3050
+ "Barry Allen's origin story involves a lightning strike combined with a chemical accident, granting him his incredible speed powers. He adopts the superhero persona of The Flash to fight crime in Central City. His adventures often revolve around thwarting supervillains and metahuman threats, while also serving as a founding member of the Justice League.\n",
3051
+ "'''\n",
3052
+ "\n",
3053
+ "\n",
3054
+ "s= '''The Flash (or simply Flash) is the name of several superheroes appearing in American comic books published by DC Comics. Created by writer Gardner Fox and artist Harry Lampert, the original Flash first appeared in Flash Comics #1 (cover dated January 1940, released November 1939).[1] Nicknamed \"the Scarlet Speedster\", all incarnations of the Flash possess \"superspeed\", which includes the ability to run, move, and think extremely fast, use superhuman reflexes, and seemingly violate certain laws of physics.\n",
3055
+ "\n",
3056
+ "Thus far, at least five different characters—each of whom somehow gained the power of \"the Speed Force\"—have assumed the mantle of the Flash in DC's history: college athlete Jay Garrick (1940–1951, 1961–2011, 2017–present), forensic scientist Barry Allen (1956–1985, 2008–present), Barry's nephew Wally West (1986–2011, 2016–present), Barry's grandson Bart Allen (2006–2007), and Chinese-American Avery Ho (2017–present). Each incarnation of the Flash has been a key member of at least one of DC's premier teams: the Justice Society of America, the Justice League, and the Teen Titans.'''\n",
3057
+ "inputs = tokenizer(s, return_tensors=\"pt\")\n",
3058
+ "with torch.no_grad():\n",
3059
+ " logits = model(**inputs).logits\n",
3060
+ " \n",
3061
+ "print(\"logits: \", logits)\n",
3062
+ "predicted_class_id = logits.argmax().item()\n",
3063
+ "model.config.id2label[predicted_class_id]"
3064
+ ]
3065
+ },
3066
+ {
3067
+ "cell_type": "code",
3068
+ "execution_count": 42,
3069
+ "metadata": {},
3070
+ "outputs": [],
3071
+ "source": [
3072
+ "def predict(sentence):\n",
3073
+ " inputs = tokenizer(sentence, return_tensors=\"pt\")\n",
3074
+ " with torch.no_grad():\n",
3075
+ " logits = model(**inputs).logits\n",
3076
+ " \n",
3077
+ " print(\"logits: \", logits)\n",
3078
+ " predicted_class_id = logits.argmax().item()\n",
3079
+ " print(model.config.id2label[predicted_class_id])\n",
3080
+ " label= \"Human Written\" if model.config.id2label[predicted_class_id]=='NEGATIVE' else 'AI written'\n",
3081
+ " print(\"Label: \", label)"
3082
+ ]
3083
+ },
3084
+ {
3085
+ "cell_type": "code",
3086
+ "execution_count": 43,
3087
+ "metadata": {},
3088
+ "outputs": [
3089
+ {
3090
+ "name": "stdout",
3091
+ "output_type": "stream",
3092
+ "text": [
3093
+ "logits: tensor([[-7.7618, 7.7867]])\n",
3094
+ "POSITIVE\n",
3095
+ "Label: AI written\n"
3096
+ ]
3097
+ }
3098
+ ],
3099
+ "source": [
3100
+ "predict('''The Flash (or simply Flash) is the name of several superheroes in the DC Comics universe. Each iteration of the character possesses superhuman speed, allowing them to move at incredible velocities, run on water, phase through solid objects, and even time travel. The most iconic Flash is Barry Allen, who first appeared in 1956 and became the Scarlet Speedster known for his distinctive red costume with a lightning bolt emblem.\n",
3101
+ "\n",
3102
+ "Barry Allen's origin story involves a lightning strike combined with a chemical accident, granting him his incredible speed powers. He adopts the superhero persona of The Flash to fight crime in Central City. His adventures often revolve around thwarting supervillains and metahuman threats, while also serving as a founding member of the Justice League.\n",
3103
+ "\n",
3104
+ "''')"
3105
+ ]
3106
+ },
3107
+ {
3108
+ "cell_type": "code",
3109
+ "execution_count": 44,
3110
+ "metadata": {},
3111
+ "outputs": [
3112
+ {
3113
+ "name": "stdout",
3114
+ "output_type": "stream",
3115
+ "text": [
3116
+ "logits: tensor([[ 7.3291, -6.8746]])\n",
3117
+ "NEGATIVE\n",
3118
+ "Label: Human Written\n"
3119
+ ]
3120
+ }
3121
+ ],
3122
+ "source": [
3123
+ "predict(\n",
3124
+ " '''The Flash (or simply Flash) is the name of several superheroes appearing in American comic books published by DC Comics. Created by writer Gardner Fox and artist Harry Lampert, the original Flash first appeared in Flash Comics #1 (cover dated January 1940, released November 1939).[1] Nicknamed \"the Scarlet Speedster\", all incarnations of the Flash possess \"superspeed\", which includes the ability to run, move, and think extremely fast, use superhuman reflexes, and seemingly violate certain laws of physics.\n",
3125
+ "\n",
3126
+ "Thus far, at least five different characters—each of whom somehow gained the power of \"the Speed Force\"—have assumed the mantle of the Flash in DC's history: college athlete Jay Garrick (1940–1951, 1961–2011, 2017–present), forensic scientist Barry Allen (1956–1985, 2008–present), Barry's nephew Wally West (1986–2011, 2016–present), Barry's grandson Bart Allen (2006–2007), and Chinese-American Avery Ho (2017–present). Each incarnation of the Flash has been a key member of at least one of DC's premier teams: the Justice Society of America, the Justice League, and the Teen Titans.'''\n",
3127
+ ")"
3128
+ ]
3129
+ },
3130
+ {
3131
+ "cell_type": "code",
3132
+ "execution_count": 45,
3133
+ "metadata": {},
3134
+ "outputs": [
3135
+ {
3136
+ "name": "stdout",
3137
+ "output_type": "stream",
3138
+ "text": [
3139
+ "logits: tensor([[ 8.0190, -7.4839]])\n",
3140
+ "NEGATIVE\n",
3141
+ "Label: Human Written\n"
3142
+ ]
3143
+ }
3144
+ ],
3145
+ "source": [
3146
+ "predict(\n",
3147
+ " '''The Flash first appeared in the Golden Age Flash Comics #1 (January 1940), from All-American Publications, one of three companies that would eventually merge to form DC Comics. Created by writer Gardner Fox and artist Harry Lampert, this Flash was Jay Garrick, a college student who gained his speed through the inhalation of hard water vapors. When re-introduced in the 1960s Garrick's origin was modified slightly, gaining his powers through exposure to heavy water.\n",
3148
+ "\n",
3149
+ "Jay Garrick was a popular character in the 1940s, supporting both Flash Comics and All-Flash Quarterly (later published bi-monthly as simply All-Flash); co-starring in Comic Cavalcade; and being a charter member of the Justice Society of America, the first superhero team, whose adventures ran in All Star Comics. With superheroes' post-war decline in popularity, Flash Comics was canceled with issue #104 (1949) which featured an evil version of the Flash called the Rival. The Justice Society's final Golden Age story ran in All Star Comics #57 (1951; the title itself continued as All Star Western).'''\n",
3150
+ ")"
3151
+ ]
3152
+ },
3153
+ {
3154
+ "cell_type": "code",
3155
+ "execution_count": 46,
3156
+ "metadata": {},
3157
+ "outputs": [
3158
+ {
3159
+ "name": "stdout",
3160
+ "output_type": "stream",
3161
+ "text": [
3162
+ "logits: tensor([[-2.8285, 3.1423]])\n",
3163
+ "POSITIVE\n",
3164
+ "Label: AI written\n"
3165
+ ]
3166
+ }
3167
+ ],
3168
+ "source": [
3169
+ "predict(\n",
3170
+ " '''The Flash first appeared in the Golden Age of comic books, debuting in \"Flash Comics #1\" in 1940. Created by writer Gardner Fox and artist Harry Lampert, the character's secret identity was Jay Garrick, a college student who gained super-speed abilities after inhaling the fumes of \"hard water\" during a lab accident. Jay Garrick's Flash became a beloved figure during the Golden Age of comics, known for his winged helmet and red costume with lightning bolt accents.\n",
3171
+ "\n",
3172
+ "However, the character experienced a revitalization during the Silver Age of comics in the late 1950s. This era saw the creation of a new Flash, Barry Allen, in \"Showcase #4\" in 1956. Barry, a police scientist, became the Scarlet Speedster after being struck by lightning and doused in chemicals while working in his laboratory. This event led to the iconic origin story that many fans recognize today.'''\n",
3173
+ ")"
3174
+ ]
3175
+ },
3176
+ {
3177
+ "cell_type": "code",
3178
+ "execution_count": 47,
3179
+ "metadata": {},
3180
+ "outputs": [
3181
+ {
3182
+ "name": "stdout",
3183
+ "output_type": "stream",
3184
+ "text": [
3185
+ "logits: tensor([[ 7.9124, -7.3888]])\n",
3186
+ "NEGATIVE\n",
3187
+ "Label: Human Written\n"
3188
+ ]
3189
+ }
3190
+ ],
3191
+ "source": [
3192
+ "predict(\n",
3193
+ " '''Virat Kohli (Hindi pronunciation: [ʋɪˈɾɑːʈ ˈkoːɦli] ⓘ; born 5 November 1988) is an Indian international cricketer and the former captain of the Indian national cricket team who plays for Royal Challengers Bangalore in the IPL and Delhi in domestic cricket. Considered to be one of the best cricketers in the world, he is widely regarded as one of the greatest batsmen in the history of the sport.[4] Nicknamed \"The King\", due to his dominant style of play and popularity, Kohli holds numerous records in his career across all formats. In 2020, the International Cricket Council named him the male cricketer of the decade. Kohli has also contributed to India's successes, captaining the team from 2014 to 2022, and winning the 2011 World Cup and the 2013 Champions trophy. He is among the only four Indian cricketers who have played over 500 matches for India.[5]\n",
3194
+ "\n",
3195
+ "Born and raised in New Delhi, Kohli trained at the West Delhi Cricket Academy and started his youth career with the Delhi Under-15 team. He made his international debut in 2008 and quickly became a key player in the ODI team and later made his Test debut in 2011. In 2013, Kohli reached the number one spot in the ICC rankings for ODI batsmen for the first time. During 2014 T20 World Cup, he set a record for the most runs scored in the tournament. In 2018, he achieved yet another milestone, becoming the world's top-ranked Test batsman, making him the only Indian cricketer to hold the number one spot in all three formats of the game. His form continued in 2019, when he became the first player to score 20,000 international runs in a single decade. In 2021, Kohli made the decision to step down as the captain of the Indian national team for T20Is, following the T20 World Cup and in early 2022 he stepped down as the captain of the Test team as well.'''\n",
3196
+ ")"
3197
+ ]
3198
+ },
3199
+ {
3200
+ "cell_type": "code",
3201
+ "execution_count": 49,
3202
+ "metadata": {},
3203
+ "outputs": [
3204
+ {
3205
+ "name": "stdout",
3206
+ "output_type": "stream",
3207
+ "text": [
3208
+ "logits: tensor([[-8.4224, 8.2709]])\n",
3209
+ "POSITIVE\n",
3210
+ "Label: AI written\n"
3211
+ ]
3212
+ }
3213
+ ],
3214
+ "source": [
3215
+ "predict(\n",
3216
+ " '''Virat Kohli is an Indian cricketing sensation who has left an indelible mark on the world of sports. Born in Delhi, India, Kohli's journey from a young aspiring cricketer to becoming one of the greatest batsmen in the history of the game is nothing short of remarkable.\n",
3217
+ "\n",
3218
+ "Kohli's cricketing prowess was evident from a tender age, and he quickly rose through the ranks of junior cricket in India. He made his debut for the Indian national team in 2008, and since then, he has been a symbol of consistency and excellence. His distinctive blend of aggression and technical finesse at the crease has earned him a reputation as a modern-day batting maestro.'''\n",
3219
+ ")"
3220
+ ]
3221
+ },
3222
+ {
3223
+ "cell_type": "code",
3224
+ "execution_count": 50,
3225
+ "metadata": {},
3226
+ "outputs": [
3227
+ {
3228
+ "name": "stdout",
3229
+ "output_type": "stream",
3230
+ "text": [
3231
+ "logits: tensor([[ 7.4143, -6.9287]])\n",
3232
+ "NEGATIVE\n",
3233
+ "Label: Human Written\n"
3234
+ ]
3235
+ }
3236
+ ],
3237
+ "source": [
3238
+ "predict(\n",
3239
+ " '''Ah, spring. It’s our season of hope, holding a promise that change is a-comin’, for the better. The river will “flow again after it was frozen,” Ernest Hemingway wrote of the season in “A Moveable Feast.”\n",
3240
+ "\n",
3241
+ "Spring signifies coming out of the darkness. We’ve tipped the balance from longer nights to longer days. Equinox means “equal night” of light and dark (roughly), and through the rest of the season we can benefit — experientially and metaphorically – from more time spent in the light.\n",
3242
+ "\n",
3243
+ "“If we had no winter, the spring would not be so pleasant,” wrote English poet Anne Bradstreet. “If we did not sometimes taste of adversity, prosperity would not be so welcome.”\n",
3244
+ "\n",
3245
+ "'''\n",
3246
+ ")"
3247
+ ]
3248
+ },
3249
+ {
3250
+ "cell_type": "code",
3251
+ "execution_count": 51,
3252
+ "metadata": {},
3253
+ "outputs": [
3254
+ {
3255
+ "name": "stdout",
3256
+ "output_type": "stream",
3257
+ "text": [
3258
+ "logits: tensor([[-2.8957, 3.2729]])\n",
3259
+ "POSITIVE\n",
3260
+ "Label: AI written\n"
3261
+ ]
3262
+ }
3263
+ ],
3264
+ "source": [
3265
+ "predict(\n",
3266
+ " '''Ah, spring. It's our season of hope, holding a promise of renewal and rebirth. As the days grow longer and the frost of winter slowly relinquishes its grip, the world awakens from its slumber. The air, once cold and crisp, becomes infused with the delicate fragrance of blooming flowers and fresh grass. Spring carries with it a sense of magic as nature's dormant forces burst forth in a riot of color and life.\n",
3267
+ "\n",
3268
+ "The most captivating transformation occurs in the trees, as they begin to bud and sprout tender green leaves. Cherry blossoms, daffodils, and tulips adorn gardens and parks, painting the landscape with their vibrant hues. Birds return from their migratory journeys, filling the air with their melodic symphonies, and animals emerge from hibernation, venturing out in search of sustenance and companionship.'''\n",
3269
+ ")"
3270
+ ]
3271
+ },
3272
+ {
3273
+ "cell_type": "code",
3274
+ "execution_count": 52,
3275
+ "metadata": {},
3276
+ "outputs": [
3277
+ {
3278
+ "name": "stdout",
3279
+ "output_type": "stream",
3280
+ "text": [
3281
+ "logits: tensor([[ 8.1593, -7.5862]])\n",
3282
+ "NEGATIVE\n",
3283
+ "Label: Human Written\n"
3284
+ ]
3285
+ }
3286
+ ],
3287
+ "source": [
3288
+ "predict(\n",
3289
+ " '''Born and raised in New Delhi, Kohli trained at the West Delhi Cricket Academy and started his youth career with the Delhi Under-15 team. He made his international debut in 2008 and quickly became a key player in the ODI team and later made his Test debut in 2011. In 2013, Kohli reached the number one spot in the ICC rankings for ODI batsmen for the first time. During 2014 T20 World Cup, he set a record for the most runs scored in the tournament. In 2018, he achieved yet another milestone, becoming the world's top-ranked Test batsman, making him the only Indian cricketer to hold the number one spot in all three formats of the game. His form continued in 2019, when he became the first player to score 20,000 international runs in a single decade. In 2021, Kohli made the decision to step down as the captain of the Indian national team for T20Is, following the T20 World Cup and in early 2022 he stepped down as the captain of the Test team as well.\n",
3290
+ "\n",
3291
+ "He has received many accolades for his performances on the cricket field. He was recognized as the ICC ODI Player of the Year in 2012 and has won the Sir Garfield Sobers Trophy, given to the ICC Cricketer of the Year, on two occasions, in 2017 and 2018 respectively. Subsequently, Kohli also won ICC Test Player of the Year and ICC ODI Player of the Year awards in 2018, becoming the first player to win both awards in the same year. Additionally, he was named the Wisden Leading Cricketer in the World for three consecutive years, from 2016 to 2018. At the national level, Kohli was honoured with the Arjuna Award in 2013, the Padma Shri under the sports category in 2017 and the Khel Ratna award, India's highest sporting honour, in 2018.'''\n",
3292
+ ")"
3293
+ ]
3294
+ },
3295
+ {
3296
+ "cell_type": "code",
3297
+ "execution_count": 53,
3298
+ "metadata": {},
3299
+ "outputs": [
3300
+ {
3301
+ "name": "stdout",
3302
+ "output_type": "stream",
3303
+ "text": [
3304
+ "logits: tensor([[-6.4534, 6.6134]])\n",
3305
+ "POSITIVE\n",
3306
+ "Label: AI written\n"
3307
+ ]
3308
+ }
3309
+ ],
3310
+ "source": [
3311
+ "predict(\n",
3312
+ " '''\"The Flash\" can refer to several different things:\n",
3313
+ "\n",
3314
+ "1. **The Flash (DC Comics character)**: The Flash is the name of several superheroes appearing in American comic books published by DC Comics³.\n",
3315
+ "\n",
3316
+ "2. **The Flash (2014 TV series)**: This is an American superhero television series developed by Greg Berlanti, Andrew Kreisberg, and Geoff Johns, airing on The CW. It is based on the Barry Allen incarnation of DC Comics character the Flash, a costumed superhero crime-fighter with the power to move at superhuman speeds².\n",
3317
+ "\n",
3318
+ "3. **The Flash (2023 film)**: This is a 2023 American superhero film based on the DC Comics character of the same name. Produced by Warner Bros. Pictures, DC Studios, Double Dream, and the Disco Factory, and distributed by Warner Bros. Pictures, it is the 13th installment in the DC Extended Universe (DCEU)¹.\n",
3319
+ "'''\n",
3320
+ ")"
3321
+ ]
3322
+ },
3323
  {
3324
  "cell_type": "code",
3325
  "execution_count": null,