{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os; os.chdir('..')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idurltitlewiki_introgenerated_introtitle_lenwiki_intro_lengenerated_intro_lenpromptgenerated_textprompt_tokensgenerated_text_tokens
063064638https://en.wikipedia.org/wiki/Sexhow%20railway...Sexhow railway stationSexhow railway station was a railway station b...Sexhow railway station was a railway station l...317478200 word wikipedia style introduction on 'Sexh...located in the town of Sexhow, on the Cumbria...2588
1279621https://en.wikipedia.org/wiki/Eti%C3%A4inenEtiäinenIn Finnish folklore, all places and things, an...In Finnish folklore, all places and things, an...118780200 word wikipedia style introduction on 'Etiä...animate or inanimate, have a spirit or \"etiäi...26101
2287229https://en.wikipedia.org/wiki/Inverse%20functi...Inverse function theoremIn mathematics, specifically differential calc...In mathematics, specifically differential calc...317059200 word wikipedia style introduction on 'Inve...function theorem states that for every real-v...2665
326712375https://en.wikipedia.org/wiki/Stepping%20on%20...Stepping on Rosesis a Japanese shōjo manga series written and i...is a Japanese shōjo manga series written and i...3335121200 word wikipedia style introduction on 'Step...and illustrated by Maki Fujii. The series fol...26150
438894426https://en.wikipedia.org/wiki/Rob%20BradleyRob BradleyRobert Milner \"Rob\" Bradley, Jr. (born August ...Robert Milner \"Rob\" Bradley, Jr. (born August ...2170136200 word wikipedia style introduction on 'Rob ...29, 1973) is an American former professional ...28162
\n", "
" ], "text/plain": [ " id url \\\n", "0 63064638 https://en.wikipedia.org/wiki/Sexhow%20railway... \n", "1 279621 https://en.wikipedia.org/wiki/Eti%C3%A4inen \n", "2 287229 https://en.wikipedia.org/wiki/Inverse%20functi... \n", "3 26712375 https://en.wikipedia.org/wiki/Stepping%20on%20... \n", "4 38894426 https://en.wikipedia.org/wiki/Rob%20Bradley \n", "\n", " title \\\n", "0 Sexhow railway station \n", "1 Etiäinen \n", "2 Inverse function theorem \n", "3 Stepping on Roses \n", "4 Rob Bradley \n", "\n", " wiki_intro \\\n", "0 Sexhow railway station was a railway station b... \n", "1 In Finnish folklore, all places and things, an... \n", "2 In mathematics, specifically differential calc... \n", "3 is a Japanese shōjo manga series written and i... \n", "4 Robert Milner \"Rob\" Bradley, Jr. (born August ... \n", "\n", " generated_intro title_len \\\n", "0 Sexhow railway station was a railway station l... 3 \n", "1 In Finnish folklore, all places and things, an... 1 \n", "2 In mathematics, specifically differential calc... 3 \n", "3 is a Japanese shōjo manga series written and i... 3 \n", "4 Robert Milner \"Rob\" Bradley, Jr. (born August ... 2 \n", "\n", " wiki_intro_len generated_intro_len \\\n", "0 174 78 \n", "1 187 80 \n", "2 170 59 \n", "3 335 121 \n", "4 170 136 \n", "\n", " prompt \\\n", "0 200 word wikipedia style introduction on 'Sexh... \n", "1 200 word wikipedia style introduction on 'Etiä... \n", "2 200 word wikipedia style introduction on 'Inve... \n", "3 200 word wikipedia style introduction on 'Step... \n", "4 200 word wikipedia style introduction on 'Rob ... \n", "\n", " generated_text prompt_tokens \\\n", "0 located in the town of Sexhow, on the Cumbria... 25 \n", "1 animate or inanimate, have a spirit or \"etiäi... 26 \n", "2 function theorem states that for every real-v... 26 \n", "3 and illustrated by Maki Fujii. The series fol... 26 \n", "4 29, 1973) is an American former professional ... 28 \n", "\n", " generated_text_tokens \n", "0 88 \n", "1 101 \n", "2 65 \n", "3 150 \n", "4 162 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "original_df= pd.read_csv(\"data/original_data.csv\")\n", "original_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([\"200 word wikipedia style introduction on 'Sexhow railway station'\\n Sexhow railway station was a railway station\",\n", " \"200 word wikipedia style introduction on 'Etiäinen'\\n In Finnish folklore, all places and things,\",\n", " \"200 word wikipedia style introduction on 'Inverse function theorem'\\n In mathematics, specifically differential calculus, the inverse\",\n", " \"200 word wikipedia style introduction on 'Stepping on Roses'\\n is a Japanese shōjo manga series written\"],\n", " dtype=object)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "original_df.prompt.values[:4]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "from tqdm import tqdm\n", "import pandas as pd\n", "import re\n", "import openai\n", "from keys import OPENAI_API_KEY\n", "import dask" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "openai.api_key = OPENAI_API_KEY\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def get_openai_response(prompt:str): \n", " startertext= prompt.split(\"\\n\")[-1]\n", " \n", " response= openai.Completion.create(\n", " model=\"gpt-3.5-turbo-instruct\",\n", " prompt=prompt,\n", " temperature=0.7,\n", " max_tokens=300,\n", " top_p=1,\n", " frequency_penalty=0.4,\n", " presence_penalty=0.1\n", " )\n", " \n", " return startertext.strip()+ response.choices[0].text" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"200 word wikipedia style introduction on 'Sexhow railway station'\\n Sexhow railway station was a railway station\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "original_df.prompt[0]\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "\n", "response= get_openai_response(original_df.prompt[0])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"Sexhow railway station was a railway station located in the small village of Sexhow, North Yorkshire, England. It was a small station on the Middlesbrough to Whitby branch line, which opened in 1865. The station served as a vital link for the local community, providing access to nearby towns and cities. The station had two platforms and a small waiting room for passengers. In its early years, it was primarily used for transporting agricultural goods and livestock. However, with the rise of tourism in the area, the station became an important stop for tourists visiting the North Yorkshire Moors National Park and the nearby coastal town of Whitby. Despite its small size and rural location, Sexhow railway station played an important role in connecting the local community to the rest of England. Unfortunately, due to declining usage and financial difficulties, the station was closed in 1960 and has since been demolished. Today, only remnants of its platforms can be seen, serving as a reminder of its once important role in the region's transportation network.\"" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb Cell 11\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m original_df[\u001b[39m'\u001b[39m\u001b[39mgenerated_intro_gpt3\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m=\u001b[39m original_df\u001b[39m.\u001b[39;49mgenerated_intro\u001b[39m.\u001b[39;49mapply(get_openai_response)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/series.py:4760\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4625\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[1;32m 4626\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 4627\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4632\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 4633\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[1;32m 4634\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 4635\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4636\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4751\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[1;32m 4752\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 4753\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\n\u001b[1;32m 4754\u001b[0m \u001b[39mself\u001b[39;49m,\n\u001b[1;32m 4755\u001b[0m func,\n\u001b[1;32m 4756\u001b[0m convert_dtype\u001b[39m=\u001b[39;49mconvert_dtype,\n\u001b[1;32m 4757\u001b[0m by_row\u001b[39m=\u001b[39;49mby_row,\n\u001b[1;32m 4758\u001b[0m args\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m 4759\u001b[0m kwargs\u001b[39m=\u001b[39;49mkwargs,\n\u001b[0;32m-> 4760\u001b[0m )\u001b[39m.\u001b[39;49mapply()\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/apply.py:1207\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1204\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_compat()\n\u001b[1;32m 1206\u001b[0m \u001b[39m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1207\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/apply.py:1287\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1281\u001b[0m \u001b[39m# row-wise access\u001b[39;00m\n\u001b[1;32m 1282\u001b[0m \u001b[39m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1283\u001b[0m \u001b[39m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[39m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[39m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m action \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(obj\u001b[39m.\u001b[39mdtype, CategoricalDtype) \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 1287\u001b[0m mapped \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39;49m_map_values(\n\u001b[1;32m 1288\u001b[0m mapper\u001b[39m=\u001b[39;49mcurried, na_action\u001b[39m=\u001b[39;49maction, convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype\n\u001b[1;32m 1289\u001b[0m )\n\u001b[1;32m 1291\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1292\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1293\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1294\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[39mreturn\u001b[39;00m arr\u001b[39m.\u001b[39mmap(mapper, na_action\u001b[39m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[39mreturn\u001b[39;00m algorithms\u001b[39m.\u001b[39;49mmap_array(arr, mapper, na_action\u001b[39m=\u001b[39;49mna_action, convert\u001b[39m=\u001b[39;49mconvert)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[39m=\u001b[39m arr\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m, copy\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[39mif\u001b[39;00m na_action \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[39mreturn\u001b[39;00m lib\u001b[39m.\u001b[39;49mmap_infer(values, mapper, convert\u001b[39m=\u001b[39;49mconvert)\n\u001b[1;32m 1815\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[39mreturn\u001b[39;00m lib\u001b[39m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[39m=\u001b[39misna(values)\u001b[39m.\u001b[39mview(np\u001b[39m.\u001b[39muint8), convert\u001b[39m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", "File \u001b[0;32mlib.pyx:2917\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_creating_data_using_gpt3.ipynb Cell 11\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_openai_response\u001b[39m(prompt:\u001b[39mstr\u001b[39m): \n\u001b[1;32m 2\u001b[0m startertext\u001b[39m=\u001b[39m prompt\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m)[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[0;32m----> 4\u001b[0m response\u001b[39m=\u001b[39m openai\u001b[39m.\u001b[39;49mCompletion\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 5\u001b[0m model\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mgpt-3.5-turbo-instruct\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 6\u001b[0m prompt\u001b[39m=\u001b[39;49mprompt,\n\u001b[1;32m 7\u001b[0m temperature\u001b[39m=\u001b[39;49m\u001b[39m0.7\u001b[39;49m,\n\u001b[1;32m 8\u001b[0m max_tokens\u001b[39m=\u001b[39;49m\u001b[39m300\u001b[39;49m,\n\u001b[1;32m 9\u001b[0m top_p\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m,\n\u001b[1;32m 10\u001b[0m frequency_penalty\u001b[39m=\u001b[39;49m\u001b[39m0.4\u001b[39;49m,\n\u001b[1;32m 11\u001b[0m presence_penalty\u001b[39m=\u001b[39;49m\u001b[39m0.1\u001b[39;49m\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 14\u001b[0m \u001b[39mreturn\u001b[39;00m startertext\u001b[39m.\u001b[39mstrip()\u001b[39m+\u001b[39m response\u001b[39m.\u001b[39mchoices[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mtext\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_resources/completion.py:25\u001b[0m, in \u001b[0;36mCompletion.create\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 24\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mcreate(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 26\u001b[0m \u001b[39mexcept\u001b[39;00m TryAgain \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 27\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m time\u001b[39m.\u001b[39mtime() \u001b[39m>\u001b[39m start \u001b[39m+\u001b[39m timeout:\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py:155\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[0;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 131\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[1;32m 139\u001b[0m ):\n\u001b[1;32m 140\u001b[0m (\n\u001b[1;32m 141\u001b[0m deployment_id,\n\u001b[1;32m 142\u001b[0m engine,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 156\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 157\u001b[0m url,\n\u001b[1;32m 158\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 159\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 160\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 161\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 162\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 163\u001b[0m )\n\u001b[1;32m 165\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[1;32m 166\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[1;32m 167\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_requestor.py:289\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[0;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[1;32m 279\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 280\u001b[0m method,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 287\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 288\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m--> 289\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest_raw(\n\u001b[1;32m 290\u001b[0m method\u001b[39m.\u001b[39;49mlower(),\n\u001b[1;32m 291\u001b[0m url,\n\u001b[1;32m 292\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 293\u001b[0m supplied_headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 294\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 295\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 296\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 297\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 298\u001b[0m )\n\u001b[1;32m 299\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response(result, stream)\n\u001b[1;32m 300\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/openai/api_requestor.py:606\u001b[0m, in \u001b[0;36mAPIRequestor.request_raw\u001b[0;34m(self, method, url, params, supplied_headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 604\u001b[0m _thread_context\u001b[39m.\u001b[39msession_create_time \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mtime()\n\u001b[1;32m 605\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 606\u001b[0m result \u001b[39m=\u001b[39m _thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 607\u001b[0m method,\n\u001b[1;32m 608\u001b[0m abs_url,\n\u001b[1;32m 609\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 610\u001b[0m data\u001b[39m=\u001b[39;49mdata,\n\u001b[1;32m 611\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 612\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 613\u001b[0m timeout\u001b[39m=\u001b[39;49mrequest_timeout \u001b[39mif\u001b[39;49;00m request_timeout \u001b[39melse\u001b[39;49;00m TIMEOUT_SECS,\n\u001b[1;32m 614\u001b[0m proxies\u001b[39m=\u001b[39;49m_thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mproxies,\n\u001b[1;32m 615\u001b[0m )\n\u001b[1;32m 616\u001b[0m \u001b[39mexcept\u001b[39;00m requests\u001b[39m.\u001b[39mexceptions\u001b[39m.\u001b[39mTimeout \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 617\u001b[0m \u001b[39mraise\u001b[39;00m error\u001b[39m.\u001b[39mTimeout(\u001b[39m\"\u001b[39m\u001b[39mRequest timed out: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(e)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 591\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 705\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[39m=\u001b[39m TimeoutSauce(connect\u001b[39m=\u001b[39mtimeout, read\u001b[39m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 487\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 488\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 489\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 490\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 491\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 492\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 493\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 494\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 496\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 497\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 498\u001b[0m )\n\u001b[1;32m 500\u001b[0m \u001b[39mexcept\u001b[39;00m (ProtocolError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m(err, request\u001b[39m=\u001b[39mrequest)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[39m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 791\u001b[0m conn,\n\u001b[1;32m 792\u001b[0m method,\n\u001b[1;32m 793\u001b[0m url,\n\u001b[1;32m 794\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 795\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 796\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 797\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 798\u001b[0m retries\u001b[39m=\u001b[39;49mretries,\n\u001b[1;32m 799\u001b[0m response_conn\u001b[39m=\u001b[39;49mresponse_conn,\n\u001b[1;32m 800\u001b[0m preload_content\u001b[39m=\u001b[39;49mpreload_content,\n\u001b[1;32m 801\u001b[0m decode_content\u001b[39m=\u001b[39;49mdecode_content,\n\u001b[1;32m 802\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mresponse_kw,\n\u001b[1;32m 803\u001b[0m )\n\u001b[1;32m 805\u001b[0m \u001b[39m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[39m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 537\u001b[0m \u001b[39mexcept\u001b[39;00m (BaseSSLError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/urllib3/connection.py:461\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mresponse\u001b[39;00m \u001b[39mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 460\u001b[0m \u001b[39m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 461\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 463\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 464\u001b[0m assert_header_parsing(httplib_response\u001b[39m.\u001b[39mmsg)\n", "File \u001b[0;32m/usr/lib/python3.10/http/client.py:1375\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1374\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1375\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1376\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1377\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n", "File \u001b[0;32m/usr/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 319\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[39mbreak\u001b[39;00m\n", "File \u001b[0;32m/usr/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n", "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 706\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1275\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "original_df['generated_intro_gpt3']= original_df.generated_intro.apply(get_openai_response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "original_df.to_csv(\"data/new_original_df_with_gpt3.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }