{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import requests, uuid\n", "\n", "# Add your key and endpoint\n", "with open(\"bing.key\", \"r\") as key_file:\n", " key = key_file.read()\n", "endpoint = \"https://api.cognitive.microsofttranslator.com\"\n", "\n", "# location, also known as region.\n", "# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.\n", "location = \"eastus\"\n", "\n", "path = '/translate'\n", "constructed_url = endpoint + path\n", "\n", "params = {\n", " 'api-version': '3.0',\n", " 'from': 'yue',\n", " 'to': ['zh-Hant']\n", "}\n", "\n", "headers = {\n", " 'Ocp-Apim-Subscription-Key': key,\n", " # location required if you're using a multi-service or regional (not global) resource.\n", " 'Ocp-Apim-Subscription-Region': location,\n", " 'Content-type': 'application/json',\n", " 'X-ClientTraceId': str(uuid.uuid4())\n", "}\n", "\n", "# https://stackoverflow.com/a/312464/6798201\n", "def chunks(lst, n):\n", " \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n", " for i in range(0, len(lst), n):\n", " yield lst[i:i + n]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/Users/kevin/Dev/classes/winter2023/eecs487/canto_mando_bart_bitext_typo_augment_full_bing/commercial_baselines/bing_translator.ipynb Cell 2\u001b[0m in \u001b[0;36m1\n\u001b[1;32m 13\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m../test.typos.pred.bing.man\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39ma+\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m output_file:\n\u001b[1;32m 14\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m chunks(body, \u001b[39m500\u001b[39m):\n\u001b[0;32m---> 15\u001b[0m request \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39;49mpost(constructed_url, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, json\u001b[39m=\u001b[39;49mchunk)\n\u001b[1;32m 16\u001b[0m response \u001b[39m=\u001b[39m request\u001b[39m.\u001b[39mjson()\n\u001b[1;32m 17\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m response:\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpost\u001b[39m(url, data\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, json\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[39mr\u001b[39m\u001b[39m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[39m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[39mreturn\u001b[39;00m request(\u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m, url, data\u001b[39m=\u001b[39;49mdata, json\u001b[39m=\u001b[39;49mjson, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[39m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[39m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[39m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[39mwith\u001b[39;00m sessions\u001b[39m.\u001b[39mSession() \u001b[39mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[39mreturn\u001b[39;00m session\u001b[39m.\u001b[39;49mrequest(method\u001b[39m=\u001b[39;49mmethod, url\u001b[39m=\u001b[39;49murl, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/sessions.py:587\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 582\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 583\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 584\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 585\u001b[0m }\n\u001b[1;32m 586\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 587\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 589\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/sessions.py:701\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 698\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 700\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 701\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 703\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 704\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/requests/adapters.py:489\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 488\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m chunked:\n\u001b[0;32m--> 489\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 490\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 491\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 492\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 493\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 494\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 496\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 497\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 498\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 499\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 500\u001b[0m )\n\u001b[1;32m 502\u001b[0m \u001b[39m# Send the request.\u001b[39;00m\n\u001b[1;32m 503\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 504\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(conn, \u001b[39m\"\u001b[39m\u001b[39mproxy_pool\u001b[39m\u001b[39m\"\u001b[39m):\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:703\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prepare_proxy(conn)\n\u001b[1;32m 702\u001b[0m \u001b[39m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 704\u001b[0m conn,\n\u001b[1;32m 705\u001b[0m method,\n\u001b[1;32m 706\u001b[0m url,\n\u001b[1;32m 707\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 708\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 709\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 710\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 711\u001b[0m )\n\u001b[1;32m 713\u001b[0m \u001b[39m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m 714\u001b[0m \u001b[39m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m 715\u001b[0m \u001b[39m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m 716\u001b[0m \u001b[39m# mess.\u001b[39;00m\n\u001b[1;32m 717\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:449\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[0;32m--> 449\u001b[0m six\u001b[39m.\u001b[39;49mraise_from(e, \u001b[39mNone\u001b[39;49;00m)\n\u001b[1;32m 450\u001b[0m \u001b[39mexcept\u001b[39;00m (SocketTimeout, BaseSSLError, SocketError) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 451\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n", "File \u001b[0;32m:3\u001b[0m, in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/site-packages/urllib3/connectionpool.py:444\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 442\u001b[0m \u001b[39m# Python 3\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[1;32m 449\u001b[0m six\u001b[39m.\u001b[39mraise_from(e, \u001b[39mNone\u001b[39;00m)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:1374\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1372\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1373\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1374\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1375\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 319\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[39mbreak\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 706\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1275\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.6/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "\"\"\"\n", "Translate test sentences\n", "\"\"\"\n", "\n", "# You can pass more than one object in body.\n", "with open(\"../para/test/test.typos.can\", \"r\") as input_file:\n", " body = [{'text': line} for line in input_file.read().splitlines()]\n", "\n", "# Clear previous outputs\n", "open(\"../test.typos.pred.bing.man\", 'w').close()\n", "\n", "# Split translation request into chunks of 500 lines (10,000 character limit per request)\n", "with open(\"../test.typos.pred.bing.man\", \"a+\") as output_file:\n", " for chunk in chunks(body, 500):\n", " request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n", " response = request.json()\n", " for line in response:\n", " output_file.write(line['translations'][0]['text'] + \"\\n\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/110 [00:0121\u001b[0m output_file\u001b[39m.\u001b[39mwrite(line[\u001b[39m'\u001b[39m\u001b[39mtranslations\u001b[39m\u001b[39m'\u001b[39m][\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39m# Slow down because of hourly request limit for free tier\u001b[39;00m\n\u001b[0;32m---> 23\u001b[0m time\u001b[39m.\u001b[39;49msleep(\u001b[39m5\u001b[39;49m)\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "\"\"\"\n", "Translate training sentences\n", "\"\"\"\n", "\n", "# You can pass more than one object in body.\n", "with open(\"bing.can\", \"r\") as input_file:\n", " body = [{'text': line} for line in input_file.read().splitlines()]\n", "\n", "# Clear previous outputs\n", "open(\"bing.man\", 'w').close()\n", "\n", "from tqdm import tqdm\n", "import time\n", "\n", "# Split translation request into chunks of 400 lines (10,000 character limit per request)\n", "with open(\"bing.man\", \"a+\") as output_file:\n", " for chunk in tqdm(list(chunks(body, 400))):\n", " request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n", " response = request.json()\n", " for line in response:\n", " output_file.write(line['translations'][0]['text'] + \"\\n\")\n", " # Slow down because of hourly request limit for free tier\n", " time.sleep(5)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 38/38 [03:33<00:00, 5.61s/it]\n" ] } ], "source": [ "# You can pass more than one object in body.\n", "with open(\"../train/abc.can\", \"r\") as input_file:\n", " body = [{'text': line} for line in input_file.read().splitlines() if len(line) >= 5]\n", "\n", "from tqdm import tqdm\n", "import time\n", "\n", "# Split translation request into chunks of 400 lines (10,000 character limit per request)\n", "with open(\"bing.man\", \"a+\") as output_file:\n", " for chunk in tqdm(list(chunks(body, 400))):\n", " request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n", " response = request.json()\n", " for line in response:\n", " output_file.write(line['translations'][0]['text'] + \"\\n\")\n", " # Slow down because of hourly request limit for free tier\n", " time.sleep(5)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 352/352 [21:28<00:00, 3.66s/it]\n" ] } ], "source": [ "# You can pass more than one object in body.\n", "with open(\"../train/lihkg.filtered.can\", \"r\") as input_file:\n", " body = [{'text': line} for line in input_file.read().splitlines()]\n", "\n", "from tqdm import tqdm\n", "import time\n", "\n", "# Split translation request into chunks of 400 lines (10,000 character limit per request)\n", "with open(\"lihkg.filtered.man\", \"w+\") as output_file:\n", " for chunk in tqdm(list(chunks(body, 400))):\n", " request = requests.post(constructed_url, params=params, headers=headers, json=chunk)\n", " response = request.json()\n", " for line in response:\n", " output_file.write(line['translations'][0]['text'] + \"\\n\")\n", " # Slow down because of hourly request limit for free tier\n", " time.sleep(3)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }