Ubuntu commited on
Commit
2fb2dc0
1 Parent(s): 4d7573b

added fintuned for GPT3

Browse files
.gitignore CHANGED
@@ -1 +1,2 @@
1
- keys.py
 
 
1
+ keys.py
2
+ wiki_gpt/
__pycache__/keys.cpython-310.pyc CHANGED
Binary files a/__pycache__/keys.cpython-310.pyc and b/__pycache__/keys.cpython-310.pyc differ
 
data/AI_checker_gpt3_remade.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eced73bd37592d7e9b002fc695eff3608be967847e298090e2275cc59b70523a
3
+ size 548390
data/original_data_gpt3.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd7b35d2e5f88acf8b3bf35f49c6851031707a13427bed3096b2022195eee69
3
+ size 5060493
gpt3_finetuned_model/checkpoint-15024/added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS]": 101,
3
+ "[MASK]": 103,
4
+ "[PAD]": 0,
5
+ "[SEP]": 102,
6
+ "[UNK]": 100
7
+ }
gpt3_finetuned_model/checkpoint-15024/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/trained_model_v11",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "NEGATIVE",
13
+ "1": "POSITIVE"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "NEGATIVE": 0,
18
+ "POSITIVE": 1
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.34.0",
32
+ "vocab_size": 30522
33
+ }
gpt3_finetuned_model/checkpoint-15024/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e1f6afa4c90e8a4bcccdfc4cce96d793ef0a8c1877fb2248f6c82ff12d912c
3
+ size 535727290
gpt3_finetuned_model/checkpoint-15024/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093e71706112c59b9e886af85797cd68cbeaf9414453c54b69c5c195a37da325
3
+ size 267855978
gpt3_finetuned_model/checkpoint-15024/rng_state.pth ADDED
Binary file (14.2 kB). View file
 
gpt3_finetuned_model/checkpoint-15024/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a0a11db4223dccc3415193638186a90b52f9f914b3b51fd8ffdbd3c713e739
3
+ size 1064
gpt3_finetuned_model/checkpoint-15024/special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[PAD]",
4
+ "[UNK]",
5
+ "[CLS]",
6
+ "[SEP]",
7
+ "[MASK]"
8
+ ],
9
+ "cls_token": "[CLS]",
10
+ "mask_token": "[MASK]",
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "unk_token": "[UNK]"
14
+ }
gpt3_finetuned_model/checkpoint-15024/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gpt3_finetuned_model/checkpoint-15024/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [
45
+ "[PAD]",
46
+ "[UNK]",
47
+ "[CLS]",
48
+ "[SEP]",
49
+ "[MASK]"
50
+ ],
51
+ "clean_up_tokenization_spaces": true,
52
+ "cls_token": "[CLS]",
53
+ "do_lower_case": true,
54
+ "mask_token": "[MASK]",
55
+ "max_length": 512,
56
+ "model_max_length": 512,
57
+ "pad_token": "[PAD]",
58
+ "sep_token": "[SEP]",
59
+ "stride": 0,
60
+ "strip_accents": null,
61
+ "tokenize_chinese_chars": true,
62
+ "tokenizer_class": "DistilBertTokenizer",
63
+ "truncation_side": "right",
64
+ "truncation_strategy": "longest_first",
65
+ "unk_token": "[UNK]"
66
+ }
gpt3_finetuned_model/checkpoint-15024/trainer_state.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.021361960098147392,
3
+ "best_model_checkpoint": "gpt3_finetuned_model/checkpoint-15024",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 15024,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1.966719914802982e-05,
14
+ "loss": 0.0219,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.07,
19
+ "learning_rate": 1.933439829605964e-05,
20
+ "loss": 0.012,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.1,
25
+ "learning_rate": 1.900159744408946e-05,
26
+ "loss": 0.0141,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.13,
31
+ "learning_rate": 1.866879659211928e-05,
32
+ "loss": 0.0096,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.17,
37
+ "learning_rate": 1.8335995740149097e-05,
38
+ "loss": 0.0135,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.2,
43
+ "learning_rate": 1.8003194888178915e-05,
44
+ "loss": 0.021,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.23,
49
+ "learning_rate": 1.7670394036208734e-05,
50
+ "loss": 0.0125,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.27,
55
+ "learning_rate": 1.7337593184238552e-05,
56
+ "loss": 0.0084,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.3,
61
+ "learning_rate": 1.700479233226837e-05,
62
+ "loss": 0.0128,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.33,
67
+ "learning_rate": 1.6671991480298192e-05,
68
+ "loss": 0.0142,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.37,
73
+ "learning_rate": 1.633919062832801e-05,
74
+ "loss": 0.0143,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.4,
79
+ "learning_rate": 1.600638977635783e-05,
80
+ "loss": 0.0087,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.43,
85
+ "learning_rate": 1.5673588924387647e-05,
86
+ "loss": 0.0085,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.47,
91
+ "learning_rate": 1.5340788072417466e-05,
92
+ "loss": 0.0122,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.5,
97
+ "learning_rate": 1.5007987220447286e-05,
98
+ "loss": 0.0126,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.53,
103
+ "learning_rate": 1.4675186368477104e-05,
104
+ "loss": 0.014,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.57,
109
+ "learning_rate": 1.4342385516506923e-05,
110
+ "loss": 0.0086,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.6,
115
+ "learning_rate": 1.4009584664536741e-05,
116
+ "loss": 0.0094,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.63,
121
+ "learning_rate": 1.3676783812566561e-05,
122
+ "loss": 0.0136,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.67,
127
+ "learning_rate": 1.334398296059638e-05,
128
+ "loss": 0.0057,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.7,
133
+ "learning_rate": 1.30111821086262e-05,
134
+ "loss": 0.0126,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.73,
139
+ "learning_rate": 1.2678381256656018e-05,
140
+ "loss": 0.013,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 0.77,
145
+ "learning_rate": 1.2345580404685838e-05,
146
+ "loss": 0.0096,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 0.8,
151
+ "learning_rate": 1.2012779552715656e-05,
152
+ "loss": 0.0068,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 0.83,
157
+ "learning_rate": 1.1679978700745476e-05,
158
+ "loss": 0.0057,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 0.87,
163
+ "learning_rate": 1.1347177848775295e-05,
164
+ "loss": 0.0062,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 0.9,
169
+ "learning_rate": 1.1014376996805112e-05,
170
+ "loss": 0.0138,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 0.93,
175
+ "learning_rate": 1.0681576144834932e-05,
176
+ "loss": 0.007,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 0.97,
181
+ "learning_rate": 1.034877529286475e-05,
182
+ "loss": 0.0054,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 1.0,
187
+ "learning_rate": 1.0015974440894568e-05,
188
+ "loss": 0.0061,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 1.0,
193
+ "eval_accuracy": 0.9955404685835996,
194
+ "eval_loss": 0.021361960098147392,
195
+ "eval_runtime": 198.8868,
196
+ "eval_samples_per_second": 302.162,
197
+ "eval_steps_per_second": 18.885,
198
+ "step": 15024
199
+ }
200
+ ],
201
+ "logging_steps": 500,
202
+ "max_steps": 30048,
203
+ "num_train_epochs": 2,
204
+ "save_steps": 500,
205
+ "total_flos": 2.288794378322357e+16,
206
+ "trial_name": null,
207
+ "trial_params": null
208
+ }
gpt3_finetuned_model/checkpoint-15024/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723d5cdbe3a4ea51b735c6cbdf8a9dcb0dafce1461aeb999cf821459dd7890f8
3
+ size 4536
gpt3_finetuned_model/checkpoint-15024/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
gpt3_finetuned_model/runs/Oct09_08-16-50_ip-172-31-95-165/events.out.tfevents.1696839410.ip-172-31-95-165.74908.0 ADDED
Binary file (13.9 kB). View file
 
my_awesome_model/runs/Oct06_09-35-53_ip-172-31-95-165/events.out.tfevents.1696584953.ip-172-31-95-165.4302.0 DELETED
Binary file (4.14 kB)
 
my_awesome_model/runs/Oct06_10-12-41_ip-172-31-95-165/events.out.tfevents.1696587161.ip-172-31-95-165.5338.0 DELETED
Binary file (14.7 kB)
 
research/02_dl_Ai_checker.ipynb CHANGED
@@ -11,32 +11,42 @@
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 3,
15
  "metadata": {},
16
- "outputs": [],
 
 
 
 
 
 
 
 
 
17
  "source": [
18
  "from datasets import Dataset, load_dataset"
19
  ]
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 4,
24
  "metadata": {},
25
  "outputs": [
26
  {
27
  "name": "stderr",
28
  "output_type": "stream",
29
  "text": [
30
- "Downloading readme: 100%|██████████| 2.63k/2.63k [00:00<00:00, 19.4MB/s]\n",
31
- "Downloading data: 100%|██████████| 127M/127M [00:04<00:00, 30.3MB/s]\n",
32
- "Downloading data files: 100%|██████████| 1/1 [00:04<00:00, 4.21s/it]\n",
33
- "Extracting data files: 100%|██████████| 1/1 [00:02<00:00, 2.09s/it]\n",
34
- "Generating train split: 150000 examples [00:04, 30015.64 examples/s]\n"
35
  ]
36
  }
37
  ],
38
  "source": [
39
- "dataset_name= \"aadityaubhat/GPT-wiki-intro\"\n",
 
40
  "\n",
41
  "\n",
42
  "\n",
@@ -45,21 +55,21 @@
45
  },
46
  {
47
  "cell_type": "code",
48
- "execution_count": 5,
49
  "metadata": {},
50
  "outputs": [
51
  {
52
  "data": {
53
  "text/plain": [
54
  "DatasetDict({\n",
55
- " train: Dataset({\n",
56
- " features: ['id', 'url', 'title', 'wiki_intro', 'generated_intro', 'title_len', 'wiki_intro_len', 'generated_intro_len', 'prompt', 'generated_text', 'prompt_tokens', 'generated_text_tokens'],\n",
57
- " num_rows: 150000\n",
58
  " })\n",
59
  "})"
60
  ]
61
  },
62
- "execution_count": 5,
63
  "metadata": {},
64
  "output_type": "execute_result"
65
  }
@@ -70,29 +80,29 @@
70
  },
71
  {
72
  "cell_type": "code",
73
- "execution_count": 8,
74
  "metadata": {},
75
  "outputs": [
76
  {
77
  "name": "stderr",
78
  "output_type": "stream",
79
  "text": [
80
- "Creating CSV from Arrow format: 100%|██████████| 150/150 [00:08<00:00, 18.69ba/s]\n"
81
  ]
82
  },
83
  {
84
  "data": {
85
  "text/plain": [
86
- "443537732"
87
  ]
88
  },
89
- "execution_count": 8,
90
  "metadata": {},
91
  "output_type": "execute_result"
92
  }
93
  ],
94
  "source": [
95
- "dataset['train'].to_csv(\"data/original_data.csv\")"
96
  ]
97
  },
98
  {
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 2,
15
  "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stderr",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
22
+ " from .autonotebook import tqdm as notebook_tqdm\n"
23
+ ]
24
+ }
25
+ ],
26
  "source": [
27
  "from datasets import Dataset, load_dataset"
28
  ]
29
  },
30
  {
31
  "cell_type": "code",
32
+ "execution_count": 3,
33
  "metadata": {},
34
  "outputs": [
35
  {
36
  "name": "stderr",
37
  "output_type": "stream",
38
  "text": [
39
+ "Downloading readme: 100%|██████████| 2.45k/2.45k [00:00<00:00, 14.5MB/s]\n",
40
+ "Downloading data: 100%|██████████| 2.56M/2.56M [00:00<00:00, 4.77MB/s]\n",
41
+ "Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1.84it/s]\n",
42
+ "Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 603.06it/s]\n",
43
+ "Generating evaluation split: 100%|██████████| 238/238 [00:00<00:00, 4242.81 examples/s]\n"
44
  ]
45
  }
46
  ],
47
  "source": [
48
+ "# dataset_name= \"aadityaubhat/GPT-wiki-intro\"\n",
49
+ "dataset_name= \"potsawee/wiki_bio_gpt3_hallucination\"\n",
50
  "\n",
51
  "\n",
52
  "\n",
 
55
  },
56
  {
57
  "cell_type": "code",
58
+ "execution_count": 4,
59
  "metadata": {},
60
  "outputs": [
61
  {
62
  "data": {
63
  "text/plain": [
64
  "DatasetDict({\n",
65
+ " evaluation: Dataset({\n",
66
+ " features: ['gpt3_text', 'wiki_bio_text', 'gpt3_sentences', 'annotation', 'wiki_bio_test_idx', 'gpt3_text_samples'],\n",
67
+ " num_rows: 238\n",
68
  " })\n",
69
  "})"
70
  ]
71
  },
72
+ "execution_count": 4,
73
  "metadata": {},
74
  "output_type": "execute_result"
75
  }
 
80
  },
81
  {
82
  "cell_type": "code",
83
+ "execution_count": 7,
84
  "metadata": {},
85
  "outputs": [
86
  {
87
  "name": "stderr",
88
  "output_type": "stream",
89
  "text": [
90
+ "Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 5.62ba/s]\n"
91
  ]
92
  },
93
  {
94
  "data": {
95
  "text/plain": [
96
+ "5060493"
97
  ]
98
  },
99
+ "execution_count": 7,
100
  "metadata": {},
101
  "output_type": "execute_result"
102
  }
103
  ],
104
  "source": [
105
+ "dataset['evaluation'].to_csv(\"data/original_data_gpt3.csv\")"
106
  ]
107
  },
108
  {
research/04_distilbert_for_classification.ipynb CHANGED
@@ -726,10 +726,11 @@
726
  }
727
  ],
728
  "source": [
 
729
  "model.save_pretrained(\"models/trained_model_v1\")\n",
 
730
  "tokenizer.save_pretrained(\"models/trained_model_v11\")\n",
731
- "model.save_pretrained(\"models/trained_model_v11\")\n",
732
- "tokenizer.save_pretrained(\"models/tokenizer_v1\")"
733
  ]
734
  },
735
  {
 
726
  }
727
  ],
728
  "source": [
729
+ "tokenizer.save_pretrained(\"models/tokenizer_v1\")\n",
730
  "model.save_pretrained(\"models/trained_model_v1\")\n",
731
+ "\n",
732
  "tokenizer.save_pretrained(\"models/trained_model_v11\")\n",
733
+ "model.save_pretrained(\"models/trained_model_v11\")"
 
734
  ]
735
  },
736
  {
research/05_creating_data_using_gpt3.ipynb ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os; os.chdir('..')"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import pandas as pd\n"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 3,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div>\n",
30
+ "<style scoped>\n",
31
+ " .dataframe tbody tr th:only-of-type {\n",
32
+ " vertical-align: middle;\n",
33
+ " }\n",
34
+ "\n",
35
+ " .dataframe tbody tr th {\n",
36
+ " vertical-align: top;\n",
37
+ " }\n",
38
+ "\n",
39
+ " .dataframe thead th {\n",
40
+ " text-align: right;\n",
41
+ " }\n",
42
+ "</style>\n",
43
+ "<table border=\"1\" class=\"dataframe\">\n",
44
+ " <thead>\n",
45
+ " <tr style=\"text-align: right;\">\n",
46
+ " <th></th>\n",
47
+ " <th>id</th>\n",
48
+ " <th>url</th>\n",
49
+ " <th>title</th>\n",
50
+ " <th>wiki_intro</th>\n",
51
+ " <th>generated_intro</th>\n",
52
+ " <th>title_len</th>\n",
53
+ " <th>wiki_intro_len</th>\n",
54
+ " <th>generated_intro_len</th>\n",
55
+ " <th>prompt</th>\n",
56
+ " <th>generated_text</th>\n",
57
+ " <th>prompt_tokens</th>\n",
58
+ " <th>generated_text_tokens</th>\n",
59
+ " </tr>\n",
60
+ " </thead>\n",
61
+ " <tbody>\n",
62
+ " <tr>\n",
63
+ " <th>0</th>\n",
64
+ " <td>63064638</td>\n",
65
+ " <td>https://en.wikipedia.org/wiki/Sexhow%20railway...</td>\n",
66
+ " <td>Sexhow railway station</td>\n",
67
+ " <td>Sexhow railway station was a railway station b...</td>\n",
68
+ " <td>Sexhow railway station was a railway station l...</td>\n",
69
+ " <td>3</td>\n",
70
+ " <td>174</td>\n",
71
+ " <td>78</td>\n",
72
+ " <td>200 word wikipedia style introduction on 'Sexh...</td>\n",
73
+ " <td>located in the town of Sexhow, on the Cumbria...</td>\n",
74
+ " <td>25</td>\n",
75
+ " <td>88</td>\n",
76
+ " </tr>\n",
77
+ " <tr>\n",
78
+ " <th>1</th>\n",
79
+ " <td>279621</td>\n",
80
+ " <td>https://en.wikipedia.org/wiki/Eti%C3%A4inen</td>\n",
81
+ " <td>Etiäinen</td>\n",
82
+ " <td>In Finnish folklore, all places and things, an...</td>\n",
83
+ " <td>In Finnish folklore, all places and things, an...</td>\n",
84
+ " <td>1</td>\n",
85
+ " <td>187</td>\n",
86
+ " <td>80</td>\n",
87
+ " <td>200 word wikipedia style introduction on 'Etiä...</td>\n",
88
+ " <td>animate or inanimate, have a spirit or \"etiäi...</td>\n",
89
+ " <td>26</td>\n",
90
+ " <td>101</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>2</th>\n",
94
+ " <td>287229</td>\n",
95
+ " <td>https://en.wikipedia.org/wiki/Inverse%20functi...</td>\n",
96
+ " <td>Inverse function theorem</td>\n",
97
+ " <td>In mathematics, specifically differential calc...</td>\n",
98
+ " <td>In mathematics, specifically differential calc...</td>\n",
99
+ " <td>3</td>\n",
100
+ " <td>170</td>\n",
101
+ " <td>59</td>\n",
102
+ " <td>200 word wikipedia style introduction on 'Inve...</td>\n",
103
+ " <td>function theorem states that for every real-v...</td>\n",
104
+ " <td>26</td>\n",
105
+ " <td>65</td>\n",
106
+ " </tr>\n",
107
+ " <tr>\n",
108
+ " <th>3</th>\n",
109
+ " <td>26712375</td>\n",
110
+ " <td>https://en.wikipedia.org/wiki/Stepping%20on%20...</td>\n",
111
+ " <td>Stepping on Roses</td>\n",
112
+ " <td>is a Japanese shōjo manga series written and i...</td>\n",
113
+ " <td>is a Japanese shōjo manga series written and i...</td>\n",
114
+ " <td>3</td>\n",
115
+ " <td>335</td>\n",
116
+ " <td>121</td>\n",
117
+ " <td>200 word wikipedia style introduction on 'Step...</td>\n",
118
+ " <td>and illustrated by Maki Fujii. The series fol...</td>\n",
119
+ " <td>26</td>\n",
120
+ " <td>150</td>\n",
121
+ " </tr>\n",
122
+ " <tr>\n",
123
+ " <th>4</th>\n",
124
+ " <td>38894426</td>\n",
125
+ " <td>https://en.wikipedia.org/wiki/Rob%20Bradley</td>\n",
126
+ " <td>Rob Bradley</td>\n",
127
+ " <td>Robert Milner \"Rob\" Bradley, Jr. (born August ...</td>\n",
128
+ " <td>Robert Milner \"Rob\" Bradley, Jr. (born August ...</td>\n",
129
+ " <td>2</td>\n",
130
+ " <td>170</td>\n",
131
+ " <td>136</td>\n",
132
+ " <td>200 word wikipedia style introduction on 'Rob ...</td>\n",
133
+ " <td>29, 1973) is an American former professional ...</td>\n",
134
+ " <td>28</td>\n",
135
+ " <td>162</td>\n",
136
+ " </tr>\n",
137
+ " </tbody>\n",
138
+ "</table>\n",
139
+ "</div>"
140
+ ],
141
+ "text/plain": [
142
+ " id url \\\n",
143
+ "0 63064638 https://en.wikipedia.org/wiki/Sexhow%20railway... \n",
144
+ "1 279621 https://en.wikipedia.org/wiki/Eti%C3%A4inen \n",
145
+ "2 287229 https://en.wikipedia.org/wiki/Inverse%20functi... \n",
146
+ "3 26712375 https://en.wikipedia.org/wiki/Stepping%20on%20... \n",
147
+ "4 38894426 https://en.wikipedia.org/wiki/Rob%20Bradley \n",
148
+ "\n",
149
+ " title \\\n",
150
+ "0 Sexhow railway station \n",
151
+ "1 Etiäinen \n",
152
+ "2 Inverse function theorem \n",
153
+ "3 Stepping on Roses \n",
154
+ "4 Rob Bradley \n",
155
+ "\n",
156
+ " wiki_intro \\\n",
157
+ "0 Sexhow railway station was a railway station b... \n",
158
+ "1 In Finnish folklore, all places and things, an... \n",
159
+ "2 In mathematics, specifically differential calc... \n",
160
+ "3 is a Japanese shōjo manga series written and i... \n",
161
+ "4 Robert Milner \"Rob\" Bradley, Jr. (born August ... \n",
162
+ "\n",
163
+ " generated_intro title_len \\\n",
164
+ "0 Sexhow railway station was a railway station l... 3 \n",
165
+ "1 In Finnish folklore, all places and things, an... 1 \n",
166
+ "2 In mathematics, specifically differential calc... 3 \n",
167
+ "3 is a Japanese shōjo manga series written and i... 3 \n",
168
+ "4 Robert Milner \"Rob\" Bradley, Jr. (born August ... 2 \n",
169
+ "\n",
170
+ " wiki_intro_len generated_intro_len \\\n",
171
+ "0 174 78 \n",
172
+ "1 187 80 \n",
173
+ "2 170 59 \n",
174
+ "3 335 121 \n",
175
+ "4 170 136 \n",
176
+ "\n",
177
+ " prompt \\\n",
178
+ "0 200 word wikipedia style introduction on 'Sexh... \n",
179
+ "1 200 word wikipedia style introduction on 'Etiä... \n",
180
+ "2 200 word wikipedia style introduction on 'Inve... \n",
181
+ "3 200 word wikipedia style introduction on 'Step... \n",
182
+ "4 200 word wikipedia style introduction on 'Rob ... \n",
183
+ "\n",
184
+ " generated_text prompt_tokens \\\n",
185
+ "0 located in the town of Sexhow, on the Cumbria... 25 \n",
186
+ "1 animate or inanimate, have a spirit or \"etiäi... 26 \n",
187
+ "2 function theorem states that for every real-v... 26 \n",
188
+ "3 and illustrated by Maki Fujii. The series fol... 26 \n",
189
+ "4 29, 1973) is an American former professional ... 28 \n",
190
+ "\n",
191
+ " generated_text_tokens \n",
192
+ "0 88 \n",
193
+ "1 101 \n",
194
+ "2 65 \n",
195
+ "3 150 \n",
196
+ "4 162 "
197
+ ]
198
+ },
199
+ "execution_count": 3,
200
+ "metadata": {},
201
+ "output_type": "execute_result"
202
+ }
203
+ ],
204
+ "source": [
205
+ "original_df= pd.read_csv(\"data/original_data.csv\")\n",
206
+ "original_df.head()"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": 4,
212
+ "metadata": {},
213
+ "outputs": [
214
+ {
215
+ "data": {
216
+ "text/plain": [
217
+ "array([\"200 word wikipedia style introduction on 'Sexhow railway station'\\n Sexhow railway station was a railway station\",\n",
218
+ " \"200 word wikipedia style introduction on 'Etiäinen'\\n In Finnish folklore, all places and things,\",\n",
219
+ " \"200 word wikipedia style introduction on 'Inverse function theorem'\\n In mathematics, specifically differential calculus, the inverse\",\n",
220
+ " \"200 word wikipedia style introduction on 'Stepping on Roses'\\n is a Japanese shōjo manga series written\"],\n",
221
+ " dtype=object)"
222
+ ]
223
+ },
224
+ "execution_count": 4,
225
+ "metadata": {},
226
+ "output_type": "execute_result"
227
+ }
228
+ ],
229
+ "source": [
230
+ "original_df.prompt.values[:4]"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": 5,
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "import pickle\n",
240
+ "from tqdm import tqdm\n",
241
+ "import pandas as pd\n",
242
+ "import re\n",
243
+ "import openai\n",
244
+ "from keys import OPENAI_API_KEY\n",
245
+ "import dask"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 6,
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "openai.api_key = OPENAI_API_KEY\n"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": 7,
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "def get_openai_response(prompt:str): \n",
264
+ " startertext= prompt.split(\"\\n\")[-1]\n",
265
+ " \n",
266
+ " response= openai.Completion.create(\n",
267
+ " model=\"gpt-3.5-turbo-instruct\",\n",
268
+ " prompt=prompt,\n",
269
+ " temperature=0.7,\n",
270
+ " max_tokens=300,\n",
271
+ " top_p=1,\n",
272
+ " frequency_penalty=0.4,\n",
273
+ " presence_penalty=0.1\n",
274
+ " )\n",
275
+ " \n",
276
+ " return startertext.strip()+ response.choices[0].text"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": 8,
282
+ "metadata": {},
283
+ "outputs": [
284
+ {
285
+ "data": {
286
+ "text/plain": [
287
+ "\"200 word wikipedia style introduction on 'Sexhow railway station'\\n Sexhow railway station was a railway station\""
288
+ ]
289
+ },
290
+ "execution_count": 8,
291
+ "metadata": {},
292
+ "output_type": "execute_result"
293
+ }
294
+ ],
295
+ "source": [
296
+ "original_df.prompt[0]\n"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 10,
302
+ "metadata": {},
303
+ "outputs": [],
304
+ "source": [
305
+ "\n",
306
+ "response= get_openai_response(original_df.prompt[0])"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": 11,
312
+ "metadata": {},
313
+ "outputs": [
314
+ {
315
+ "data": {
316
+ "text/plain": [
317
+ "\"Sexhow railway station was a railway station located in the small village of Sexhow, North Yorkshire, England. It was a small station on the Middlesbrough to Whitby branch line, which opened in 1865. The station served as a vital link for the local community, providing access to nearby towns and cities. The station had two platforms and a small waiting room for passengers. In its early years, it was primarily used for transporting agricultural goods and livestock. However, with the rise of tourism in the area, the station became an important stop for tourists visiting the North Yorkshire Moors National Park and the nearby coastal town of Whitby. Despite its small size and rural location, Sexhow railway station played an important role in connecting the local community to the rest of England. Unfortunately, due to declining usage and financial difficulties, the station was closed in 1960 and has since been demolished. Today, only remnants of its platforms can be seen, serving as a reminder of its once important role in the region's transportation network.\""
318
+ ]
319
+ },
320
+ "execution_count": 11,
321
+ "metadata": {},
322
+ "output_type": "execute_result"
323
+ }
324
+ ],
325
+ "source": [
326
+ "response"
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 12,
332
+ "metadata": {},
333
+ "outputs": [
334
+ {
335
+ "ename": "KeyboardInterrupt",
336
+ "evalue": "",
337
+ "output_type": "error",
338
+ "traceback": [
339
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
340
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
341
+ "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb Cell 11\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m original_df[\u001b[39m'\u001b[39m\u001b[39mgenerated_intro_gpt3\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m=\u001b[39m original_df\u001b[39m.\u001b[39;49mgenerated_intro\u001b[39m.\u001b[39;49mapply(get_openai_response)\n",
342
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/series.py:4760\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4625\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[1;32m 4626\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 4627\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4632\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 4633\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[1;32m 4634\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 4635\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4636\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4751\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[1;32m 4752\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 4753\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\n\u001b[1;32m 4754\u001b[0m \u001b[39mself\u001b[39;49m,\n\u001b[1;32m 4755\u001b[0m func,\n\u001b[1;32m 4756\u001b[0m convert_dtype\u001b[39m=\u001b[39;49mconvert_dtype,\n\u001b[1;32m 4757\u001b[0m by_row\u001b[39m=\u001b[39;49mby_row,\n\u001b[1;32m 4758\u001b[0m args\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m 4759\u001b[0m kwargs\u001b[39m=\u001b[39;49mkwargs,\n\u001b[0;32m-> 4760\u001b[0m )\u001b[39m.\u001b[39;49mapply()\n",
343
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/apply.py:1207\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1204\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_compat()\n\u001b[1;32m 1206\u001b[0m \u001b[39m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1207\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n",
344
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/apply.py:1287\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1281\u001b[0m \u001b[39m# row-wise access\u001b[39;00m\n\u001b[1;32m 1282\u001b[0m \u001b[39m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1283\u001b[0m \u001b[39m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[39m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[39m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m action \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(obj\u001b[39m.\u001b[39mdtype, CategoricalDtype) \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 1287\u001b[0m mapped \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39;49m_map_values(\n\u001b[1;32m 1288\u001b[0m mapper\u001b[39m=\u001b[39;49mcurried, na_action\u001b[39m=\u001b[39;49maction, convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype\n\u001b[1;32m 1289\u001b[0m )\n\u001b[1;32m 1291\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1292\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1293\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1294\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n",
345
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[39mreturn\u001b[39;00m arr\u001b[39m.\u001b[39mmap(mapper, na_action\u001b[39m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[39mreturn\u001b[39;00m algorithms\u001b[39m.\u001b[39;49mmap_array(arr, mapper, na_action\u001b[39m=\u001b[39;49mna_action, convert\u001b[39m=\u001b[39;49mconvert)\n",
346
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[39m=\u001b[39m arr\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m, copy\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[39mif\u001b[39;00m na_action \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[39mreturn\u001b[39;00m lib\u001b[39m.\u001b[39;49mmap_infer(values, mapper, convert\u001b[39m=\u001b[39;49mconvert)\n\u001b[1;32m 1815\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[39mreturn\u001b[39;00m lib\u001b[39m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[39m=\u001b[39misna(values)\u001b[39m.\u001b[39mview(np\u001b[39m.\u001b[39muint8), convert\u001b[39m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n",
347
+ "File \u001b[0;32mlib.pyx:2917\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
348
+ "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb Cell 11\u001b[0m line \u001b[0;36m4\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_openai_response\u001b[39m(prompt:\u001b[39mstr\u001b[39m): \n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m startertext\u001b[39m=\u001b[39m prompt\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m)[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m response\u001b[39m=\u001b[39m openai\u001b[39m.\u001b[39;49mCompletion\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m model\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mgpt-3.5-turbo-instruct\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m prompt\u001b[39m=\u001b[39;49mprompt,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a>\u001b[0m temperature\u001b[39m=\u001b[39;49m\u001b[39m0.7\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=7'>8</a>\u001b[0m max_tokens\u001b[39m=\u001b[39;49m\u001b[39m300\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a>\u001b[0m top_p\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m frequency_penalty\u001b[39m=\u001b[39;49m\u001b[39m0.4\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a>\u001b[0m presence_penalty\u001b[39m=\u001b[39;49m\u001b[39m0.1\u001b[39;49m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=11'>12</a>\u001b[0m )\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a22456d62656464696e6773227d/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mreturn\u001b[39;00m startertext\u001b[39m.\u001b[39mstrip()\u001b[39m+\u001b[39m response\u001b[39m.\u001b[39mchoices[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mtext\n",
349
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_resources/completion.py:25\u001b[0m, in \u001b[0;36mCompletion.create\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 24\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mcreate(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 26\u001b[0m \u001b[39mexcept\u001b[39;00m TryAgain \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 27\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m time\u001b[39m.\u001b[39mtime() \u001b[39m>\u001b[39m start \u001b[39m+\u001b[39m timeout:\n",
350
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py:155\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[0;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 131\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[1;32m 139\u001b[0m ):\n\u001b[1;32m 140\u001b[0m (\n\u001b[1;32m 141\u001b[0m deployment_id,\n\u001b[1;32m 142\u001b[0m engine,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 156\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 157\u001b[0m url,\n\u001b[1;32m 158\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 159\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 160\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 161\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 162\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 163\u001b[0m )\n\u001b[1;32m 165\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[1;32m 166\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[1;32m 167\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n",
351
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_requestor.py:289\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[0;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[1;32m 279\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 280\u001b[0m method,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 287\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 288\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m--> 289\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest_raw(\n\u001b[1;32m 290\u001b[0m method\u001b[39m.\u001b[39;49mlower(),\n\u001b[1;32m 291\u001b[0m url,\n\u001b[1;32m 292\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 293\u001b[0m supplied_headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 294\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 295\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 296\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 297\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 298\u001b[0m )\n\u001b[1;32m 299\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response(result, stream)\n\u001b[1;32m 300\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n",
352
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_requestor.py:606\u001b[0m, in \u001b[0;36mAPIRequestor.request_raw\u001b[0;34m(self, method, url, params, supplied_headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 604\u001b[0m _thread_context\u001b[39m.\u001b[39msession_create_time \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mtime()\n\u001b[1;32m 605\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 606\u001b[0m result \u001b[39m=\u001b[39m _thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 607\u001b[0m method,\n\u001b[1;32m 608\u001b[0m abs_url,\n\u001b[1;32m 609\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 610\u001b[0m data\u001b[39m=\u001b[39;49mdata,\n\u001b[1;32m 611\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 612\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 613\u001b[0m timeout\u001b[39m=\u001b[39;49mrequest_timeout \u001b[39mif\u001b[39;49;00m request_timeout \u001b[39melse\u001b[39;49;00m TIMEOUT_SECS,\n\u001b[1;32m 614\u001b[0m proxies\u001b[39m=\u001b[39;49m_thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mproxies,\n\u001b[1;32m 615\u001b[0m )\n\u001b[1;32m 616\u001b[0m \u001b[39mexcept\u001b[39;00m requests\u001b[39m.\u001b[39mexceptions\u001b[39m.\u001b[39mTimeout \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 617\u001b[0m \u001b[39mraise\u001b[39;00m error\u001b[39m.\u001b[39mTimeout(\u001b[39m\"\u001b[39m\u001b[39mRequest timed out: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(e)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n",
353
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 591\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n",
354
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 705\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n",
355
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[39m=\u001b[39m TimeoutSauce(connect\u001b[39m=\u001b[39mtimeout, read\u001b[39m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 487\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 488\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 489\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 490\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 491\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 492\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 493\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 494\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 496\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 497\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 498\u001b[0m )\n\u001b[1;32m 500\u001b[0m \u001b[39mexcept\u001b[39;00m (ProtocolError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m(err, request\u001b[39m=\u001b[39mrequest)\n",
356
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[39m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 791\u001b[0m conn,\n\u001b[1;32m 792\u001b[0m method,\n\u001b[1;32m 793\u001b[0m url,\n\u001b[1;32m 794\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 795\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 796\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 797\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 798\u001b[0m retries\u001b[39m=\u001b[39;49mretries,\n\u001b[1;32m 799\u001b[0m response_conn\u001b[39m=\u001b[39;49mresponse_conn,\n\u001b[1;32m 800\u001b[0m preload_content\u001b[39m=\u001b[39;49mpreload_content,\n\u001b[1;32m 801\u001b[0m decode_content\u001b[39m=\u001b[39;49mdecode_content,\n\u001b[1;32m 802\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mresponse_kw,\n\u001b[1;32m 803\u001b[0m )\n\u001b[1;32m 805\u001b[0m \u001b[39m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
357
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[39m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 537\u001b[0m \u001b[39mexcept\u001b[39;00m (BaseSSLError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n",
358
+ "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connection.py:461\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mresponse\u001b[39;00m \u001b[39mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 460\u001b[0m \u001b[39m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 461\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 463\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 464\u001b[0m assert_header_parsing(httplib_response\u001b[39m.\u001b[39mmsg)\n",
359
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:1375\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1374\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1375\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1376\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1377\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
360
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 319\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[39mbreak\u001b[39;00m\n",
361
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n",
362
+ "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 706\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
363
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1275\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
364
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n",
365
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
366
+ ]
367
+ }
368
+ ],
369
+ "source": [
370
+ "original_df['generated_intro_gpt3']= original_df.generated_intro.apply(get_openai_response)"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "original_df.to_csv(\"data/new_original_df_with_gpt3.csv\")"
380
+ ]
381
+ },
382
+ {
383
+ "cell_type": "code",
384
+ "execution_count": null,
385
+ "metadata": {},
386
+ "outputs": [],
387
+ "source": []
388
+ }
389
+ ],
390
+ "metadata": {
391
+ "kernelspec": {
392
+ "display_name": "venv",
393
+ "language": "python",
394
+ "name": "python3"
395
+ },
396
+ "language_info": {
397
+ "codemirror_mode": {
398
+ "name": "ipython",
399
+ "version": 3
400
+ },
401
+ "file_extension": ".py",
402
+ "mimetype": "text/x-python",
403
+ "name": "python",
404
+ "nbconvert_exporter": "python",
405
+ "pygments_lexer": "ipython3",
406
+ "version": "3.10.12"
407
+ }
408
+ },
409
+ "nbformat": 4,
410
+ "nbformat_minor": 2
411
+ }
research/05_data_gpt.ipynb ADDED
The diff for this file is too large to render. See raw diff