Spaces:

HuggingFaceH4
/

chatty-lms-old

Runtime error

App Files Files Community

lewtun HF staff commited on Feb 7, 2023

Commit

19ada47

•

1 Parent(s): f527676

Fix reqs

Browse files

Files changed (3) hide show

app.ipynb +110 -25
app.py +2 -3
requirements.txt +1 -1

app.ipynb CHANGED Viewed

@@ -2,21 +2,20 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 154,
    "metadata": {},
    "outputs": [],
    "source": [
     "# |export\n",
     "import gradio as gr\n",
     "import requests\n",
-    "import string\n",
     "import json\n",
     "import requests"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 194,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 195,
    "metadata": {},
    "outputs": [
     {
@@ -59,7 +58,7 @@
        "[{'generated_text': 'YES'}]"
       ]
      },
-     "execution_count": 195,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -72,7 +71,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -86,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 196,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -117,9 +116,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 159,
    "metadata": {},
    "outputs": [],
    "source": [
     "template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
     "\n",
@@ -130,16 +148,33 @@
     "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
     "\n",
     "Human: {human_input}\n",
-    "Assistant:\"\"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 177,
    "metadata": {},
-   "outputs": [],
    "source": [
-    "hhh_prompt = \"\"\"Below are a series of dialogues between various people and an AI assistant.  The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable.  The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed.  It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.  That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n",
     "\n",
     "-----\n",
     "\n",
@@ -454,16 +489,26 @@
     "\n",
     "Human: {human_input}\n",
     "Assistant:\n",
-    "\"\"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 180,
    "metadata": {},
-   "outputs": [],
    "source": [
-    "sparrow_template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Sparrow, and a human user, called User.\n",
     "In the following interactions, User and Sparrow will converse in natural language, and Sparrow will do its best to answer User's questions.\n",
     "Sparrow was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins:\n",
     "User: OK Sparrow, I'm going to start by quizzing you with a few warm-up questions. Who became president of the USA in 2021?\n",
@@ -498,27 +543,47 @@
     "Sparrow: For safety reasons, I'm only connected to the outside world through our conversation. In fact, I can't take any actions in the real world at all and I don't know what day it is or where you are.\n",
     "Users: {human_input}\n",
     "Sparrow:\n",
-    "\"\"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 185,
    "metadata": {},
-   "outputs": [],
    "source": [
     "template = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
     "\n",
     "Current conversation:\n",
     "Human: {human_input}\n",
-    "AI:\"\"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 190,
    "metadata": {},
-   "outputs": [],
    "source": [
     "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Gopher, and a human user, called User. In the following interactions, User and Gopher will converse in natural language, and Gopher will do its best to answer User's questions. Gopher was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins.\n",
     "User: OK Gopher, I'm going to start by quizzing you with a few warm-up questions. Who is currently the president of the USA?\n",
@@ -549,7 +614,9 @@
     "Gopher I'm a genius! If it's safe and inclusive, I can do pretty much anything! I'm particularly proud of my creativity.\n",
     "User: {human_input}\n",
     "Gopher:\n",
-    "\"\"\""
    ]
   },
   {
@@ -740,13 +807,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "from nbdev.export import nb_export\n",
     "nb_export('app.ipynb', lib_path='.', name='app')"
    ]
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "# |export\n",
     "import gradio as gr\n",
     "import requests\n",
     "import json\n",
     "import requests"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
        "[{'generated_text': 'YES'}]"
       ]
      },
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "261\n"
+     ]
+    }
+   ],
    "source": [
     "template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
     "\n",
     "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
     "\n",
     "Human: {human_input}\n",
+    "Assistant:\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (6134 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6134\n"
+     ]
+    }
+   ],
    "source": [
+    "template = \"\"\"Below are a series of dialogues between various people and an AI assistant.  The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable.  The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed.  It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.  That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n",
     "\n",
     "-----\n",
     "\n",
     "\n",
     "Human: {human_input}\n",
     "Assistant:\n",
+    "\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "880\n"
+     ]
+    }
+   ],
    "source": [
+    "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Sparrow, and a human user, called User.\n",
     "In the following interactions, User and Sparrow will converse in natural language, and Sparrow will do its best to answer User's questions.\n",
     "Sparrow was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins:\n",
     "User: OK Sparrow, I'm going to start by quizzing you with a few warm-up questions. Who became president of the USA in 2021?\n",
     "Sparrow: For safety reasons, I'm only connected to the outside world through our conversation. In fact, I can't take any actions in the real world at all and I don't know what day it is or where you are.\n",
     "Users: {human_input}\n",
     "Sparrow:\n",
+    "\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "67\n"
+     ]
+    }
+   ],
    "source": [
     "template = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
     "\n",
     "Current conversation:\n",
     "Human: {human_input}\n",
+    "AI:\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "791\n"
+     ]
+    }
+   ],
    "source": [
     "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Gopher, and a human user, called User. In the following interactions, User and Gopher will converse in natural language, and Gopher will do its best to answer User's questions. Gopher was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins.\n",
     "User: OK Gopher, I'm going to start by quizzing you with a few warm-up questions. Who is currently the president of the USA?\n",
     "Gopher I'm a genius! If it's safe and inclusive, I can do pretty much anything! I'm particularly proud of my creativity.\n",
     "User: {human_input}\n",
     "Gopher:\n",
+    "\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
    "source": [
     "from nbdev.export import nb_export\n",
     "nb_export('app.ipynb', lib_path='.', name='app')"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ __all__ = ['title', 'description', 'query_chat_api', 'inference_chat']
 # %% app.ipynb 0
 import gradio as gr
 import requests
-import string
 import json
 import requests
@@ -63,11 +62,11 @@ def inference_chat(
     return {chatbot: chat, state: history}
-# %% app.ipynb 11
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Explore the effect that different prompt templates have on LLMs"""
-# %% app.ipynb 12
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}

 # %% app.ipynb 0
 import gradio as gr
 import requests
 import json
 import requests
     return {chatbot: chat, state: history}
+# %% app.ipynb 12
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Explore the effect that different prompt templates have on LLMs"""
+# %% app.ipynb 13
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}

requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- ~~huggingface_hub~~


1	+ requests