{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests, uuid\n",
"\n",
"# Add your key and endpoint\n",
"with open(\"bing.key\", \"r\") as key_file:\n",
" key = key_file.read()\n",
"endpoint = \"https://api.cognitive.microsofttranslator.com\"\n",
"\n",
"# location, also known as region.\n",
"# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.\n",
"location = \"eastus\"\n",
"\n",
"path = '/translate'\n",
"constructed_url = endpoint + path\n",
"\n",
"params = {\n",
" 'api-version': '3.0',\n",
" 'from': 'yue',\n",
" 'to': ['zh-Hant']\n",
"}\n",
"\n",
"headers = {\n",
" 'Ocp-Apim-Subscription-Key': key,\n",
" # location required if you're using a multi-service or regional (not global) resource.\n",
" 'Ocp-Apim-Subscription-Region': location,\n",
" 'Content-type': 'application/json',\n",
" 'X-ClientTraceId': str(uuid.uuid4())\n",
"}\n",
"\n",
"# https://stackoverflow.com/a/312464/6798201\n",
"def chunks(lst, n):\n",
" \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n",
" for i in range(0, len(lst), n):\n",
" yield lst[i:i + n]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/Users/kevin/Dev/classes/winter2023/eecs487/canto_mando_bart_bitext_typo_augment_full_bing/commercial_baselines/bing_translator.ipynb Cell 2\u001b[0m in \u001b[0;36m1\n\u001b[1;32m 13\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m../test.typos.pred.bing.man\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39ma+\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m output_file:\n\u001b[1;32m 14\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m chunks(body, \u001b[39m500\u001b[39m):\n\u001b[0;32m---> 15\u001b[0m request \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39;49mpost(constructed_url, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, json\u001b[39m=\u001b[39;49mchunk)\n\u001b[1;32m 16\u001b[0m response \u001b[39m=\u001b[39m request\u001b[39m.\u001b[39mjson()\n\u001b[1;32m 17\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m response:\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpost\u001b[39m(url, data\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, json\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[39mr\u001b[39m\u001b[39m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[39m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[39mreturn\u001b[39;00m request(\u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m, url, data\u001b[39m=\u001b[39;49mdata, json\u001b[39m=\u001b[39;49mjson, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[39m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[39m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[39m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[39mwith\u001b[39;00m sessions\u001b[39m.\u001b[39mSession() \u001b[39mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[39mreturn\u001b[39;00m session\u001b[39m.\u001b[39;49mrequest(method\u001b[39m=\u001b[39;49mmethod, url\u001b[39m=\u001b[39;49murl, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/sessions.py:587\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 582\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 583\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 584\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 585\u001b[0m }\n\u001b[1;32m 586\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 587\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 589\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/sessions.py:701\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 698\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 700\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 701\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 703\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 704\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/adapters.py:489\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 488\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m chunked:\n\u001b[0;32m--> 489\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 490\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 491\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 492\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 493\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 494\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 496\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 497\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 498\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 499\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 500\u001b[0m )\n\u001b[1;32m 502\u001b[0m \u001b[39m# Send the request.\u001b[39;00m\n\u001b[1;32m 503\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 504\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(conn, \u001b[39m\"\u001b[39m\u001b[39mproxy_pool\u001b[39m\u001b[39m\"\u001b[39m):\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:703\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prepare_proxy(conn)\n\u001b[1;32m 702\u001b[0m \u001b[39m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 704\u001b[0m conn,\n\u001b[1;32m 705\u001b[0m method,\n\u001b[1;32m 706\u001b[0m url,\n\u001b[1;32m 707\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 708\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 709\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 710\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 711\u001b[0m )\n\u001b[1;32m 713\u001b[0m \u001b[39m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m 714\u001b[0m \u001b[39m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m 715\u001b[0m \u001b[39m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m 716\u001b[0m \u001b[39m# mess.\u001b[39;00m\n\u001b[1;32m 717\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:449\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[0;32m--> 449\u001b[0m six\u001b[39m.\u001b[39;49mraise_from(e, \u001b[39mNone\u001b[39;49;00m)\n\u001b[1;32m 450\u001b[0m \u001b[39mexcept\u001b[39;00m (SocketTimeout, BaseSSLError, SocketError) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 451\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m:3\u001b[0m, in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:444\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 442\u001b[0m \u001b[39m# Python 3\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[1;32m 449\u001b[0m six\u001b[39m.\u001b[39mraise_from(e, \u001b[39mNone\u001b[39;00m)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:1374\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1372\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1373\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1374\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1375\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 319\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[39mbreak\u001b[39;00m\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 706\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1275\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"\"\"\"\n",
"Translate test sentences\n",
"\"\"\"\n",
"\n",
"# You can pass more than one object in body.\n",
"with open(\"../para/test/test.typos.can\", \"r\") as input_file:\n",
" body = [{'text': line} for line in input_file.read().splitlines()]\n",
"\n",
"# Clear previous outputs\n",
"open(\"../test.typos.pred.bing.man\", 'w').close()\n",
"\n",
"# Split translation request into chunks of 500 lines (10,000 character limit per request)\n",
"with open(\"../test.typos.pred.bing.man\", \"a+\") as output_file:\n",
" for chunk in chunks(body, 500):\n",
" request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n",
" response = request.json()\n",
" for line in response:\n",
" output_file.write(line['translations'][0]['text'] + \"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/110 [00:01, ?it/s]\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/Users/kevin/Dev/classes/winter2023/eecs487/canto_mando_bart_bitext_typo_augment_full_bing/commercial_baselines/bing_translator.ipynb Cell 3\u001b[0m in \u001b[0;36m2\n\u001b[1;32m 21\u001b[0m output_file\u001b[39m.\u001b[39mwrite(line[\u001b[39m'\u001b[39m\u001b[39mtranslations\u001b[39m\u001b[39m'\u001b[39m][\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39m# Slow down because of hourly request limit for free tier\u001b[39;00m\n\u001b[0;32m---> 23\u001b[0m time\u001b[39m.\u001b[39;49msleep(\u001b[39m5\u001b[39;49m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"\"\"\"\n",
"Translate training sentences\n",
"\"\"\"\n",
"\n",
"# You can pass more than one object in body.\n",
"with open(\"bing.can\", \"r\") as input_file:\n",
" body = [{'text': line} for line in input_file.read().splitlines()]\n",
"\n",
"# Clear previous outputs\n",
"open(\"bing.man\", 'w').close()\n",
"\n",
"from tqdm import tqdm\n",
"import time\n",
"\n",
"# Split translation request into chunks of 400 lines (10,000 character limit per request)\n",
"with open(\"bing.man\", \"a+\") as output_file:\n",
" for chunk in tqdm(list(chunks(body, 400))):\n",
" request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n",
" response = request.json()\n",
" for line in response:\n",
" output_file.write(line['translations'][0]['text'] + \"\\n\")\n",
" # Slow down because of hourly request limit for free tier\n",
" time.sleep(5)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 38/38 [03:33<00:00, 5.61s/it]\n"
]
}
],
"source": [
"# You can pass more than one object in body.\n",
"with open(\"../train/abc.can\", \"r\") as input_file:\n",
" body = [{'text': line} for line in input_file.read().splitlines() if len(line) >= 5]\n",
"\n",
"from tqdm import tqdm\n",
"import time\n",
"\n",
"# Split translation request into chunks of 400 lines (10,000 character limit per request)\n",
"with open(\"bing.man\", \"a+\") as output_file:\n",
" for chunk in tqdm(list(chunks(body, 400))):\n",
" request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n",
" response = request.json()\n",
" for line in response:\n",
" output_file.write(line['translations'][0]['text'] + \"\\n\")\n",
" # Slow down because of hourly request limit for free tier\n",
" time.sleep(5)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 352/352 [21:28<00:00, 3.66s/it]\n"
]
}
],
"source": [
"# You can pass more than one object in body.\n",
"with open(\"../train/lihkg.filtered.can\", \"r\") as input_file:\n",
" body = [{'text': line} for line in input_file.read().splitlines()]\n",
"\n",
"from tqdm import tqdm\n",
"import time\n",
"\n",
"# Split translation request into chunks of 400 lines (10,000 character limit per request)\n",
"with open(\"lihkg.filtered.man\", \"w+\") as output_file:\n",
" for chunk in tqdm(list(chunks(body, 400))):\n",
" request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n",
" response = request.json()\n",
" for line in response:\n",
" output_file.write(line['translations'][0]['text'] + \"\\n\")\n",
" # Slow down because of hourly request limit for free tier\n",
" time.sleep(3)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}