File size: 4,740 Bytes
7c69cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fffcf2e
 
0c83a26
 
fffcf2e
0c83a26
fffcf2e
 
 
 
 
 
 
 
 
 
 
 
 
7c69cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('selected_prompts.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import ChatCompletion,Completion, api_key\n",
    "import openai\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "openai.api_key = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_sys = \"I'll send you a prompt that's too wordy. You will summarize it in a short for 5-7 words, capture subject, composition, verb first and maybe add a few adjectives. Capture intent precisely. Example output from you: A girl in paris, modern art. Reply only with the new short prompt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "first_1000 = df.iloc[:1000]\n",
    "second_1000 = df.iloc[1000:2000]\n",
    "last_1000 = df.iloc[2000:3000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "from openai import ChatCompletion\n",
    "from requests.exceptions import ReadTimeout, Timeout\n",
    "\n",
    "# Create a subfolder for batch outputs\n",
    "os.makedirs('batch_outputs', exist_ok=True)\n",
    "\n",
    "batch_size = 20  # Define your desired batch size\n",
    "num_batches = len(last_1000) // batch_size + (1 if len(last_1000) % batch_size != 0 else 0)\n",
    "\n",
    "for i in range(num_batches):\n",
    "    print(f\"Starting Batch {i+1}...\")\n",
    "    start_idx = i * batch_size\n",
    "    end_idx = start_idx + batch_size\n",
    "    batch = last_1000.iloc[start_idx:end_idx].copy()\n",
    "    \n",
    "    short_versions = []\n",
    "\n",
    "    for idx, prompt in enumerate(batch['prompt']):\n",
    "        success = False\n",
    "        retries = 3\n",
    "        while not success and retries > 0:\n",
    "            start_time = time.time()\n",
    "            try:\n",
    "                c = ChatCompletion.create(\n",
    "                    model=\"gpt-3.5-turbo\",\n",
    "                    messages=[{\"role\": \"system\", \"content\": prompt_sys},\n",
    "                              {\"role\": \"user\", \"content\": prompt}],\n",
    "                    timeout=20)\n",
    "                short_versions.append(c['choices'][0]['message']['content'])\n",
    "                success = True\n",
    "                end_time = time.time()\n",
    "                elapsed_time = end_time - start_time\n",
    "                print(f\"Processed Batch {i+1}, Prompt {idx+1}. Took {elapsed_time:.2f} seconds.\")\n",
    "            except (ReadTimeout, Timeout) as e:\n",
    "                print(f\"Timeout error for Batch {i+1}, Prompt {idx+1}: {str(e)}. Retrying...\")\n",
    "            except Exception as e:  # Catch all other exceptions\n",
    "                print(f\"Unhandled error for Batch {i+1}, Prompt {idx+1}: {str(e)}. Retrying...\")\n",
    "            retries -= 1\n",
    "            time.sleep(10)  # Waiting for 10 seconds before retrying\n",
    "\n",
    "        \n",
    "        # Introduce a small delay between prompts\n",
    "        time.sleep(1)\n",
    "    \n",
    "    batch['short_version'] = short_versions\n",
    "    batch.to_csv(f'batch_outputs/batch_{i+1}_output.csv', index=False)\n",
    "    print(f\"Saved Batch {i+1} to CSV.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import glob\n",
    "\n",
    "# Generate a list of all CSV files in the folder\n",
    "files = glob.glob('batch_outputs/data_to_mix/*.csv')\n",
    "\n",
    "# Read each file into a DataFrame and store them in a list\n",
    "dfs = [pd.read_csv(file) for file in files]\n",
    "\n",
    "# Concatenate all the DataFrames together\n",
    "combined_df = pd.concat(dfs, ignore_index=True)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fastai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}