Spaces:

matdmiller
/

tts-openai

Runtime error

App Files Files Community

matdmiller commited on Jun 17

Commit

99246a0

•

1 Parent(s): e8a8dd0

added get and clean url text contents functionality

Browse files

Files changed (2) hide show

app.ipynb +149 -64
app.py +66 -7

app.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "3bedf0dc-8d8e-4ede-a9e6-b8f35136aa00",
    "metadata": {},
    "outputs": [],
@@ -42,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "667802a7-0f36-4136-a381-e66210b20462",
    "metadata": {},
    "outputs": [
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "7664bc24-e8a7-440d-851d-eb16dc2d69fb",
    "metadata": {},
    "outputs": [
@@ -128,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "4d9863fc-969e-409b-8e20-b9c3cd2cc3e7",
    "metadata": {},
    "outputs": [],
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "4f486d3a",
    "metadata": {},
    "outputs": [],
@@ -166,7 +166,9 @@
     ")  # for exponential backoff\n",
     "import traceback\n",
     "# from cartesia.tts import CartesiaTTS\n",
-    "import cartesia"
    ]
   },
   {
@@ -187,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "ecb7f207-0fc2-4d19-a313-356c05776832",
    "metadata": {},
    "outputs": [
@@ -208,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "52d373be-3a79-412e-8ca2-92bb443fa52d",
    "metadata": {},
    "outputs": [],
@@ -225,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "e5d6cac2-0dee-42d8-9b41-184b5be9cc3f",
    "metadata": {},
    "outputs": [],
@@ -236,7 +238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "b77ad8d6-3289-463c-b213-1c0cc215b141",
    "metadata": {},
    "outputs": [
@@ -268,7 +270,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "87fca48b-a16a-4d2b-919c-75e88e4e5eb5",
    "metadata": {},
    "outputs": [
@@ -340,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "d1352f28-f761-4e91-a9bc-4efe47552f4d",
    "metadata": {},
    "outputs": [],
@@ -389,13 +391,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "8eb7e7d5-7121-4762-b8d1-e5a9539e2b36",
    "metadata": {},
    "outputs": [],
    "source": [
     "#| export\n",
-    "clean_text_prompt = \"\"\"Your job is to clean up text that is going to be fed into a text to speech (TTS) model. You must remove parts of the text that would not normally be spoken such as reference marks `[1]`, spurious citations such as `(Reddy et al., 2021; Wu et al., 2022; Chang et al., 2022; Kondratyuk et al., 2023)` and any other part of the text that is not normally spoken. Please also clean up sections and headers so they are on new lines with proper numbering. You must also clean up any math formulas that are salvageable from being copied from a scientific paper. If they are garbled and do not make sense then remove them. You must carefully perform the text cleanup so it is translated into speech that is easy to listen to however you must not modify the text otherwise. It is critical that you repeat all of the text without modifications except for the cleanup activities you've been instructed to do. Also you must clean all of the text you are given, you may not omit any of it or stop the cleanup task early.\"\"\"\n"
    ]
   },
   {
@@ -408,7 +410,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "b5b29507-92bc-453d-bcc5-6402c17e9a0d",
    "metadata": {},
    "outputs": [],
@@ -428,7 +430,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "24674094-4d47-4e48-b591-55faabcff8df",
    "metadata": {},
    "outputs": [],
@@ -473,7 +475,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "e6224ae5-3792-42b2-8392-3abd42998a50",
    "metadata": {},
    "outputs": [],
@@ -512,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "4691703d-ed0f-4481-8006-b2906289b780",
    "metadata": {},
    "outputs": [
@@ -575,7 +577,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "3420c868-71cb-4ac6-ac65-6f02bfd841d1",
    "metadata": {},
    "outputs": [
@@ -614,7 +616,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "id": "d0082383-9d03-4b25-b68a-080d0b28caa9",
    "metadata": {},
    "outputs": [],
@@ -635,7 +637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "id": "649d90a5-9398-4cb5-a1e8-a464d463a11c",
    "metadata": {},
    "outputs": [],
@@ -646,7 +648,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "e34bb4aa-698c-4452-8cda-bd02b38f7122",
    "metadata": {},
    "outputs": [],
@@ -699,7 +701,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "id": "ca2c6f8c-62ed-4ac1-9c2f-e3b2bfb47e8d",
    "metadata": {},
    "outputs": [],
@@ -711,7 +713,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "id": "236dd8d3-4364-4731-af93-7dcdec6f18a1",
    "metadata": {},
    "outputs": [],
@@ -723,7 +725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "id": "0523a158-ee07-48b3-9350-ee39d4deee7f",
    "metadata": {},
    "outputs": [],
@@ -745,7 +747,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "id": "f4d1ba0b-6960-4e22-8dba-7de70370753a",
    "metadata": {},
    "outputs": [],
@@ -757,7 +759,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "id": "efa28cf2-548d-439f-bf2a-21a5edbf9eba",
    "metadata": {},
    "outputs": [],
@@ -770,7 +772,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "id": "cdc1dde5-5edd-4dbf-bd11-30eb418c571d",
    "metadata": {},
    "outputs": [],
@@ -782,7 +784,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "id": "035c33dd-c8e6-42b4-91d4-6bc5f1b36df3",
    "metadata": {},
    "outputs": [],
@@ -795,7 +797,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "id": "c97c03af-a377-42e1-93e0-1df957c0e4cc",
    "metadata": {},
    "outputs": [],
@@ -806,12 +808,120 @@
     "    return '# Text Splits:\\n' + '<br>----------<br>'.join(output)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 30,
-   "id": "e4fb3159-579b-4271-bc96-4cd1e2816eca",
    "metadata": {},
    "outputs": [],
    "source": [
     "#| export\n",
     "with gr.Blocks(title='TTS', head='TTS', delete_cache=(3600,3600)) as app:\n",
@@ -821,6 +931,9 @@
     "    gr.Markdown(\"\"\"Start typing below and then click **Go** to create the speech from your text.\n",
     "For requests longer than allowed by the API they will be broken into chunks automatically. [Spaces Link](https://matdmiller-tts-openai.hf.space/) | <a href=\"https://matdmiller-tts-openai.hf.space/\" target=\"_blank\">Spaces Link HTML</a>\"\"\")\n",
     "    with gr.Row():\n",
     "        input_text = gr.Textbox(max_lines=100, label=\"Enter text here\")\n",
     "    with gr.Row():\n",
     "        tts_provider_dropdown = gr.Dropdown(value=DEFAULT_PROVIDER,\n",
@@ -843,6 +956,8 @@
     "    \n",
     "\n",
     "    ### Define UI Actions ###\n",
     "    \n",
     "    # input_text \n",
     "    input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)\n",
@@ -878,7 +993,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "id": "a00648a1-891b-470b-9959-f5d502055713",
    "metadata": {},
    "outputs": [],
@@ -892,40 +1007,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "id": "4b534fe7-4337-423e-846a-1bdb7cccc4ea",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running on local URL:  http://127.0.0.1:7860\n",
-      "\n",
-      "To create a public link, set `share=True` in `launch()`.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": []
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "#| hide\n",
     "#Notebook launch\n",

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "3bedf0dc-8d8e-4ede-a9e6-b8f35136aa00",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "667802a7-0f36-4136-a381-e66210b20462",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "7664bc24-e8a7-440d-851d-eb16dc2d69fb",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "4d9863fc-969e-409b-8e20-b9c3cd2cc3e7",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "4f486d3a",
    "metadata": {},
    "outputs": [],
     ")  # for exponential backoff\n",
     "import traceback\n",
     "# from cartesia.tts import CartesiaTTS\n",
+    "import cartesia\n",
+    "import requests\n",
+    "import urllib"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "ecb7f207-0fc2-4d19-a313-356c05776832",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "52d373be-3a79-412e-8ca2-92bb443fa52d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "e5d6cac2-0dee-42d8-9b41-184b5be9cc3f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "b77ad8d6-3289-463c-b213-1c0cc215b141",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "87fca48b-a16a-4d2b-919c-75e88e4e5eb5",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "d1352f28-f761-4e91-a9bc-4efe47552f4d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "8eb7e7d5-7121-4762-b8d1-e5a9539e2b36",
    "metadata": {},
    "outputs": [],
    "source": [
     "#| export\n",
+    "CLEAN_TEXT_SYSTEM_PROMPT = \"\"\"You are a helpful expert AI assistant. You are an autoregressive LLM. Your job is to take markdown that was created from a web page html and clean it up so it can be fed to a text to speech model. Remove all hyperlink URL's, navigation references, citations or complex formulas that are not useful when only listening to in audio format. It is also helpful to spell out dates, long numbers, abbreviations, acronym, units etc. For example if you see `50C` in the context of temperature change it to `50 degrees celsius`. If you see an acronym, for example NASA, please spell it out `National Aeronautics and Space Administration`. When you have finished your task please finish the text you return with <<COMPLETE>>. The maximum context length you can return in one shot is 4,000 tokens so you may get cut off. If that happens I will send you another message with the text <<CONTINUE>> and you should continue the task where you had previously left off. This is why I need you finish your response with <<COMPLETE>> once you have fully completed your task.  DO NOT MODIFY THE TEXT IN ANY WAY EXCEPT FOR AS INSTRUCTED HERE.\"\"\"\n"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "b5b29507-92bc-453d-bcc5-6402c17e9a0d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "24674094-4d47-4e48-b591-55faabcff8df",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "e6224ae5-3792-42b2-8392-3abd42998a50",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "4691703d-ed0f-4481-8006-b2906289b780",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "3420c868-71cb-4ac6-ac65-6f02bfd841d1",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "d0082383-9d03-4b25-b68a-080d0b28caa9",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "649d90a5-9398-4cb5-a1e8-a464d463a11c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "e34bb4aa-698c-4452-8cda-bd02b38f7122",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "ca2c6f8c-62ed-4ac1-9c2f-e3b2bfb47e8d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "236dd8d3-4364-4731-af93-7dcdec6f18a1",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "0523a158-ee07-48b3-9350-ee39d4deee7f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "f4d1ba0b-6960-4e22-8dba-7de70370753a",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "id": "efa28cf2-548d-439f-bf2a-21a5edbf9eba",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 26,
    "id": "cdc1dde5-5edd-4dbf-bd11-30eb418c571d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 27,
    "id": "035c33dd-c8e6-42b4-91d4-6bc5f1b36df3",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 28,
    "id": "c97c03af-a377-42e1-93e0-1df957c0e4cc",
    "metadata": {},
    "outputs": [],
     "    return '# Text Splits:\\n' + '<br>----------<br>'.join(output)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "db54a6a6-4bdc-430a-b1ea-444c249b77fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def get_page_md(url):\n",
+    "    # result = requests.get('https://r.jina.ai/'+urllib.parse.quote_plus(url))\n",
+    "    result = requests.get('https://r.jina.ai/'+url)\n",
+    "    result.raise_for_status()\n",
+    "    return result.text"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 30,
+   "id": "75891855-6c08-4a42-9ad5-a02e0b43bb3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test_page_md = get_page_md('https://simonwillison.net/2024/Jun/16/jina-ai-reader/')\n",
+    "# test_page_md"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "340089c7-0693-43bc-8fc0-cea4fcd0f3f0",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "#| export\n",
+    "# import json\n",
+    "def clean_page_md(text):\n",
+    "    max_iters = 15\n",
+    "    complete = False\n",
+    "    client = openai.OpenAI()\n",
+    "\n",
+    "    tokens = 0\n",
+    "    messages = messages=[\n",
+    "        {\"role\": \"system\", \"content\": CLEAN_TEXT_SYSTEM_PROMPT},\n",
+    "        {\"role\": \"user\", \"content\": text},\n",
+    "        # {\"role\": \"assistant\", \"content\": \"The Los Angeles Dodgers won the World Series in 2020.\"},\n",
+    "        # {\"role\": \"user\", \"content\": \"Where was it played?\"}\n",
+    "      ]\n",
+    "\n",
+    "    idx = 0\n",
+    "    while complete == False and idx < max_iters:\n",
+    "        idx += 1\n",
+    "        response = client.chat.completions.create(\n",
+    "          model=\"gpt-4o\",\n",
+    "          messages=messages\n",
+    "        )\n",
+    "        # print(response,'\\n\\n\\n')\n",
+    "        response_text = response.choices[0].message.content\n",
+    "        if '<<complete>>' in response_text.lower():\n",
+    "            complete = True\n",
+    "        messages += [\n",
+    "            {\"role\": \"assistant\", \"content\": response_text},\n",
+    "            {\"role\": \"user\", \"content\": \"Please continue.\"},\n",
+    "        ]\n",
+    "        tokens += response.usage.total_tokens\n",
+    "        # print(json.dumps(messages, indent=4))\n",
+    "\n",
+    "    print('TOKENS CLEANUP:', tokens)\n",
+    "    result = ' '.join([o['content'] for o in messages if o['role'] == 'assistant'])\n",
+    "    \n",
+    "    return result.replace('<<COMPLETE>>','')\n",
+    "# res = clean_page_md(test_page_md)\n",
+    "# res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "d55dbe5b-83c6-4ba9-836c-48a181badd38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean_page_md(get_page_md('https://www.ineteconomics.org/perspectives/blog/from-long-covid-odds-to-lost-iq-points-ongoing-threats-you-dont-know-about'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "7899e7b2-beeb-40a4-a571-a2ccfc7c9618",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def get_page_text(url):\n",
+    "    return clean_page_md(get_page_md(url))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "e4fb3159-579b-4271-bc96-4cd1e2816eca",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument 'input'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[34], line 35\u001b[0m\n\u001b[1;32m     30\u001b[0m chunks_md \u001b[38;5;241m=\u001b[39m gr\u001b[38;5;241m.\u001b[39mMarkdown(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m,label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mChunks\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     33\u001b[0m \u001b[38;5;66;03m### Define UI Actions ###\u001b[39;00m\n\u001b[0;32m---> 35\u001b[0m \u001b[43mget_url_content_btn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclick\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mget_page_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_url\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_text\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     37\u001b[0m \u001b[38;5;66;03m# input_text \u001b[39;00m\n\u001b[1;32m     38\u001b[0m input_text\u001b[38;5;241m.\u001b[39minput(fn\u001b[38;5;241m=\u001b[39mget_input_text_len, inputs\u001b[38;5;241m=\u001b[39minput_text, outputs\u001b[38;5;241m=\u001b[39minput_text_length)\n",
+      "\u001b[0;31mTypeError\u001b[0m: EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument 'input'"
+     ]
+    }
+   ],
    "source": [
     "#| export\n",
     "with gr.Blocks(title='TTS', head='TTS', delete_cache=(3600,3600)) as app:\n",
     "    gr.Markdown(\"\"\"Start typing below and then click **Go** to create the speech from your text.\n",
     "For requests longer than allowed by the API they will be broken into chunks automatically. [Spaces Link](https://matdmiller-tts-openai.hf.space/) | <a href=\"https://matdmiller-tts-openai.hf.space/\" target=\"_blank\">Spaces Link HTML</a>\"\"\")\n",
     "    with gr.Row():\n",
+    "        input_url = gr.Textbox(max_lines=1, label=\"Optional - Enter a URL\")\n",
+    "        get_url_content_btn = gr.Button(\"Get URL Contents\")\n",
+    "    with gr.Row():\n",
     "        input_text = gr.Textbox(max_lines=100, label=\"Enter text here\")\n",
     "    with gr.Row():\n",
     "        tts_provider_dropdown = gr.Dropdown(value=DEFAULT_PROVIDER,\n",
     "    \n",
     "\n",
     "    ### Define UI Actions ###\n",
+    "\n",
+    "    get_url_content_btn.click(fn=get_page_text, inputs=input_url, outputs=input_text)\n",
     "    \n",
     "    # input_text \n",
     "    input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a00648a1-891b-470b-9959-f5d502055713",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "4b534fe7-4337-423e-846a-1bdb7cccc4ea",
    "metadata": {},
+   "outputs": [],
    "source": [
     "#| hide\n",
     "#Notebook launch\n",

app.py CHANGED Viewed

@@ -2,10 +2,11 @@
 # %% auto 0
 __all__ = ['secret_import_failed', 'TEMP', 'TEMP_DIR', 'OPENAI_CLIENT_TTS_THREADS', 'CARTESIAAI_CLIENT_TTS_THREADS',
-           'DEFAULT_PROVIDER', 'DEFAULT_MODEL', 'DEFAULT_VOICE', 'providers', 'clean_text_prompt', 'launch_kwargs',
-           'queue_kwargs', 'verify_authorization', 'split_text', 'concatenate_audio', 'create_speech_openai',
-           'create_speech_cartesiaai', 'create_speech', 'get_input_text_len', 'get_generation_cost',
-           'get_model_choices', 'update_model_choices', 'get_voice_choices', 'update_voice_choices', 'split_text_as_md']
 # %% app.ipynb 4
 import os
@@ -69,6 +70,8 @@ from tenacity import (
 import traceback
 # from cartesia.tts import CartesiaTTS
 import cartesia
 # %% app.ipynb 11
 TEMP = os.environ.get('GRADIO_TEMP_DIR','/tmp/')
@@ -121,7 +124,7 @@ except Exception as e:
 # providers
 # %% app.ipynb 19
-clean_text_prompt = """Your job is to clean up text that is going to be fed into a text to speech (TTS) model. You must remove parts of the text that would not normally be spoken such as reference marks `[1]`, spurious citations such as `(Reddy et al., 2021; Wu et al., 2022; Chang et al., 2022; Kondratyuk et al., 2023)` and any other part of the text that is not normally spoken. Please also clean up sections and headers so they are on new lines with proper numbering. You must also clean up any math formulas that are salvageable from being copied from a scientific paper. If they are garbled and do not make sense then remove them. You must carefully perform the text cleanup so it is translated into speech that is easy to listen to however you must not modify the text otherwise. It is critical that you repeat all of the text without modifications except for the cleanup activities you've been instructed to do. Also you must clean all of the text you are given, you may not omit any of it or stop the cleanup task early."""
 # %% app.ipynb 21
@@ -330,12 +333,66 @@ def split_text_as_md(*args, **kwargs):
     return '# Text Splits:\n' + '<br>----------<br>'.join(output)
 # %% app.ipynb 38
 with gr.Blocks(title='TTS', head='TTS', delete_cache=(3600,3600)) as app:
     ### Define UI ###
     gr.Markdown("# TTS")
     gr.Markdown("""Start typing below and then click **Go** to create the speech from your text.
 For requests longer than allowed by the API they will be broken into chunks automatically. [Spaces Link](https://matdmiller-tts-openai.hf.space/) | <a href="https://matdmiller-tts-openai.hf.space/" target="_blank">Spaces Link HTML</a>""")
     with gr.Row():
         input_text = gr.Textbox(max_lines=100, label="Enter text here")
     with gr.Row():
@@ -359,6 +416,8 @@ For requests longer than allowed by the API they will be broken into chunks auto
     ### Define UI Actions ###
     # input_text
     input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)
@@ -391,13 +450,13 @@ For requests longer than allowed by the API they will be broken into chunks auto
-# %% app.ipynb 39
 # launch_kwargs = {'auth':('username',GRADIO_PASSWORD),
 #                  'auth_message':'Please log in to Mat\'s TTS App with username: username and password.'}
 launch_kwargs = {}
 queue_kwargs = {'default_concurrency_limit':10}
-# %% app.ipynb 41
 #.py launch
 if __name__ == "__main__":
     app.queue(**queue_kwargs)

 # %% auto 0
 __all__ = ['secret_import_failed', 'TEMP', 'TEMP_DIR', 'OPENAI_CLIENT_TTS_THREADS', 'CARTESIAAI_CLIENT_TTS_THREADS',
+           'DEFAULT_PROVIDER', 'DEFAULT_MODEL', 'DEFAULT_VOICE', 'providers', 'CLEAN_TEXT_SYSTEM_PROMPT',
+           'launch_kwargs', 'queue_kwargs', 'verify_authorization', 'split_text', 'concatenate_audio',
+           'create_speech_openai', 'create_speech_cartesiaai', 'create_speech', 'get_input_text_len',
+           'get_generation_cost', 'get_model_choices', 'update_model_choices', 'get_voice_choices',
+           'update_voice_choices', 'split_text_as_md', 'get_page_md', 'clean_page_md', 'get_page_text']
 # %% app.ipynb 4
 import os
 import traceback
 # from cartesia.tts import CartesiaTTS
 import cartesia
+import requests
+import urllib
 # %% app.ipynb 11
 TEMP = os.environ.get('GRADIO_TEMP_DIR','/tmp/')
 # providers
 # %% app.ipynb 19
+CLEAN_TEXT_SYSTEM_PROMPT = """You are a helpful expert AI assistant. You are an autoregressive LLM. Your job is to take markdown that was created from a web page html and clean it up so it can be fed to a text to speech model. Remove all hyperlink URL's, navigation references, citations or complex formulas that are not useful when only listening to in audio format. It is also helpful to spell out dates, long numbers, abbreviations, acronym, units etc. For example if you see `50C` in the context of temperature change it to `50 degrees celsius`. If you see an acronym, for example NASA, please spell it out `National Aeronautics and Space Administration`. When you have finished your task please finish the text you return with <<COMPLETE>>. The maximum context length you can return in one shot is 4,000 tokens so you may get cut off. If that happens I will send you another message with the text <<CONTINUE>> and you should continue the task where you had previously left off. This is why I need you finish your response with <<COMPLETE>> once you have fully completed your task.  DO NOT MODIFY THE TEXT IN ANY WAY EXCEPT FOR AS INSTRUCTED HERE."""
 # %% app.ipynb 21
     return '# Text Splits:\n' + '<br>----------<br>'.join(output)
 # %% app.ipynb 38
+def get_page_md(url):
+    # result = requests.get('https://r.jina.ai/'+urllib.parse.quote_plus(url))
+    result = requests.get('https://r.jina.ai/'+url)
+    result.raise_for_status()
+    return result.text
+# %% app.ipynb 40
+# import json
+def clean_page_md(text):
+    max_iters = 15
+    complete = False
+    client = openai.OpenAI()
+    tokens = 0
+    messages = messages=[
+        {"role": "system", "content": CLEAN_TEXT_SYSTEM_PROMPT},
+        {"role": "user", "content": text},
+        # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
+        # {"role": "user", "content": "Where was it played?"}
+      ]
+    idx = 0
+    while complete == False and idx < max_iters:
+        idx += 1
+        response = client.chat.completions.create(
+          model="gpt-4o",
+          messages=messages
+        )
+        # print(response,'\n\n\n')
+        response_text = response.choices[0].message.content
+        if '<<complete>>' in response_text.lower():
+            complete = True
+        messages += [
+            {"role": "assistant", "content": response_text},
+            {"role": "user", "content": "Please continue."},
+        ]
+        tokens += response.usage.total_tokens
+        # print(json.dumps(messages, indent=4))
+    print('TOKENS CLEANUP:', tokens)
+    result = ' '.join([o['content'] for o in messages if o['role'] == 'assistant'])
+    return result.replace('<<COMPLETE>>','')
+# res = clean_page_md(test_page_md)
+# res
+# %% app.ipynb 42
+def get_page_text(url):
+    return clean_page_md(get_page_md(url))
+# %% app.ipynb 43
 with gr.Blocks(title='TTS', head='TTS', delete_cache=(3600,3600)) as app:
     ### Define UI ###
     gr.Markdown("# TTS")
     gr.Markdown("""Start typing below and then click **Go** to create the speech from your text.
 For requests longer than allowed by the API they will be broken into chunks automatically. [Spaces Link](https://matdmiller-tts-openai.hf.space/) | <a href="https://matdmiller-tts-openai.hf.space/" target="_blank">Spaces Link HTML</a>""")
+    with gr.Row():
+        input_url = gr.Textbox(max_lines=1, label="Optional - Enter a URL")
+        get_url_content_btn = gr.Button("Get URL Contents")
     with gr.Row():
         input_text = gr.Textbox(max_lines=100, label="Enter text here")
     with gr.Row():
     ### Define UI Actions ###
+    get_url_content_btn.click(fn=get_page_text, inputs=input_url, outputs=input_text)
     # input_text
     input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)
+# %% app.ipynb 44
 # launch_kwargs = {'auth':('username',GRADIO_PASSWORD),
 #                  'auth_message':'Please log in to Mat\'s TTS App with username: username and password.'}
 launch_kwargs = {}
 queue_kwargs = {'default_concurrency_limit':10}
+# %% app.ipynb 46
 #.py launch
 if __name__ == "__main__":
     app.queue(**queue_kwargs)