File size: 4,740 Bytes
7c69cac fffcf2e 0c83a26 fffcf2e 0c83a26 fffcf2e 7c69cac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv('selected_prompts.csv')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from openai import ChatCompletion,Completion, api_key\n",
"import openai\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"openai.api_key = ''"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"prompt_sys = \"I'll send you a prompt that's too wordy. You will summarize it in a short for 5-7 words, capture subject, composition, verb first and maybe add a few adjectives. Capture intent precisely. Example output from you: A girl in paris, modern art. Reply only with the new short prompt\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"first_1000 = df.iloc[:1000]\n",
"second_1000 = df.iloc[1000:2000]\n",
"last_1000 = df.iloc[2000:3000]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import time\n",
"from openai import ChatCompletion\n",
"from requests.exceptions import ReadTimeout, Timeout\n",
"\n",
"# Create a subfolder for batch outputs\n",
"os.makedirs('batch_outputs', exist_ok=True)\n",
"\n",
"batch_size = 20 # Define your desired batch size\n",
"num_batches = len(last_1000) // batch_size + (1 if len(last_1000) % batch_size != 0 else 0)\n",
"\n",
"for i in range(num_batches):\n",
" print(f\"Starting Batch {i+1}...\")\n",
" start_idx = i * batch_size\n",
" end_idx = start_idx + batch_size\n",
" batch = last_1000.iloc[start_idx:end_idx].copy()\n",
" \n",
" short_versions = []\n",
"\n",
" for idx, prompt in enumerate(batch['prompt']):\n",
" success = False\n",
" retries = 3\n",
" while not success and retries > 0:\n",
" start_time = time.time()\n",
" try:\n",
" c = ChatCompletion.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"role\": \"system\", \"content\": prompt_sys},\n",
" {\"role\": \"user\", \"content\": prompt}],\n",
" timeout=20)\n",
" short_versions.append(c['choices'][0]['message']['content'])\n",
" success = True\n",
" end_time = time.time()\n",
" elapsed_time = end_time - start_time\n",
" print(f\"Processed Batch {i+1}, Prompt {idx+1}. Took {elapsed_time:.2f} seconds.\")\n",
" except (ReadTimeout, Timeout) as e:\n",
" print(f\"Timeout error for Batch {i+1}, Prompt {idx+1}: {str(e)}. Retrying...\")\n",
" except Exception as e: # Catch all other exceptions\n",
" print(f\"Unhandled error for Batch {i+1}, Prompt {idx+1}: {str(e)}. Retrying...\")\n",
" retries -= 1\n",
" time.sleep(10) # Waiting for 10 seconds before retrying\n",
"\n",
" \n",
" # Introduce a small delay between prompts\n",
" time.sleep(1)\n",
" \n",
" batch['short_version'] = short_versions\n",
" batch.to_csv(f'batch_outputs/batch_{i+1}_output.csv', index=False)\n",
" print(f\"Saved Batch {i+1} to CSV.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import glob\n",
"\n",
"# Generate a list of all CSV files in the folder\n",
"files = glob.glob('batch_outputs/data_to_mix/*.csv')\n",
"\n",
"# Read each file into a DataFrame and store them in a list\n",
"dfs = [pd.read_csv(file) for file in files]\n",
"\n",
"# Concatenate all the DataFrames together\n",
"combined_df = pd.concat(dfs, ignore_index=True)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "fastai",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|