{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# |export\n", "import gradio as gr\n", "import requests\n", "import json\n", "import requests\n", "import os\n", "from pathlib import Path\n", "from dotenv import load_dotenv\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# |export\n", "if Path(\".env\").is_file():\n", " load_dotenv(\".env\")\n", "\n", "HF_TOKEN = os.getenv(\"HF_TOKEN\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# |export\n", "def get_model_endpoint_params(model_id):\n", " if \"joi\" in model_id:\n", " headers = None\n", " max_new_tokens_supported = True\n", " return \"https://joi-20b.ngrok.io/generate\", headers, max_new_tokens_supported\n", " else:\n", " max_new_tokens_supported = False\n", " headers = {\"Authorization\": f\"Bearer {HF_TOKEN}\", \"x-wait-for-model\": \"1\"}\n", " return f\"https://api-inference.huggingface.co/models/{model_id}\", headers, max_new_tokens_supported\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# |export\n", "def query_chat_api(\n", " model_id,\n", " inputs,\n", " temperature,\n", " top_p\n", "):\n", " endpoint, headers, max_new_tokens_supported = get_model_endpoint_params(model_id)\n", "\n", " payload = {\n", " \"inputs\": inputs,\n", " \"parameters\": {\n", " \"temperature\": temperature,\n", " \"top_p\": top_p,\n", " \"do_sample\": True,\n", " },\n", " }\n", "\n", " if max_new_tokens_supported is True:\n", " payload[\"parameters\"][\"max_new_tokens\"] = 100\n", " payload[\"parameters\"][\"repetition_penalty\"]: 1.03\n", " payload[\"parameters\"][\"stop\"] = [\"Human:\"]\n", " else:\n", " payload[\"parameters\"][\"max_length\"] = 512\n", "\n", " response = requests.post(endpoint, json=payload, headers=headers)\n", "\n", " if response.status_code == 200:\n", " return response.json()\n", " else:\n", " return \"Error: \" + response.text\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'generated_text': '\\n\\nJoi: Black holes are regions of intense gravitational fields in space where gravity is so strong that light cannot escape its pull. They are formed when massive stars burn out, leaving behind a black hole.'}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# model_id = \"google/flan-t5-xl\"\n", "model_id = \"Rallio67/joi_20B_instruct_alpha\"\n", "query = \"What can you tell me about black holes?\"\n", "query_chat_api(model_id, query, 1, 0.95)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# |export\n", "def inference_chat(\n", " model_id,\n", " prompt_template,\n", " text_input,\n", " temperature,\n", " top_p,\n", " history=[],\n", "):\n", " with open(f\"prompt_templates/{prompt_template}.json\", \"r\") as f:\n", " prompt_template = json.load(f)\n", "\n", " history_input = \"\"\n", " for idx, text in enumerate(history):\n", " if idx % 2 == 0:\n", " history_input += f\"Human: {text}\\n\"\n", " else:\n", " history_input += f\"Assistant: {text}\\n\"\n", " inputs = prompt_template[\"prompt\"].format(human_input=text_input, history=history_input)\n", " history.append(text_input)\n", "\n", " print(f\"History: {history}\")\n", " print(f\"Inputs: {inputs}\")\n", "\n", " output = query_chat_api(model_id, inputs, temperature, top_p)\n", " if isinstance(output, list):\n", " output = output[0]\n", " output = output[\"generated_text\"].rstrip(\" Human:\")\n", " history.append(\" \" + output)\n", "\n", " chat = [\n", " (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)\n", " ] # convert to tuples of list\n", "\n", " return {chatbot: chat, state: history}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Prompt templates" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "800208a288c04e149ff678e625c52bb2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/445 [00:00Chatty Language Models\"\"\"\n", "description = \"\"\"Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:\n", "\n", "```\n", "Human: \n", "Assistant: \n", "Human: \n", "Assistant: \n", "...\n", "```\n", "\n", "In this app, you can explore the outputs of several language models conditioned on different conversational prompts. The models are trained on different datasets and have different objectives, so they will have different personalities and strengths.\n", "\n", "So far, the following prompts are available:\n", "\n", "* `langchain_default`: The default prompt used in the [LangChain library](https://github.com/hwchase17/langchain/blob/bc53c928fc1b221d0038b839d111039d31729def/langchain/chains/conversation/prompt.py#L4). Around 67 tokens long.\n", "* `openai_chatgpt`: The prompt used in the OpenAI ChatGPT model. Around 261 tokens long.\n", "* `deepmind_Assistant`: The prompt used in the DeepMind Assistant model (Table 7 of [their paper](https://arxiv.org/abs/2209.14375)). Around 880 tokens long.\n", "* `deepmind_gopher`: The prompt used in the DeepMind Assistant model (Table A30 of [their paper](https://arxiv.org/abs/2112.11446)). Around 791 tokens long.\n", "* `anthropic_hhh`: The prompt used in the [Anthropic HHH models](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt). A whopping 6,341 tokens long!\n", "\n", "As you can see, most of these prompts exceed the maximum context size of models like Flan-T5, so an error usually means the Inference API has timed out.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "History: ['Hi!']\n", "Inputs: The following is a friendly conversation between a human and an AI Assistant. The Assistant is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", "\n", "Current conversation:\n", "\n", "Human: Hi!\n", "Assistant:\n", "History: ['Hi!']\n", "Inputs: Assistant is a large language model trained by OpenAI.\n", "\n", "Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n", "\n", "Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n", "\n", "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n", "\n", "Current conversation:\n", "\n", "Human: Hi!\n", "Assistant:\n", "History: ['Hi!', ' Hi, Assistant. What can I do for you?', 'Can you help me?']\n", "Inputs: Assistant is a large language model trained by OpenAI.\n", "\n", "Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n", "\n", "Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n", "\n", "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n", "\n", "Current conversation:\n", "Human: Hi!\n", "Assistant: Hi, Assistant. What can I do for you?\n", "\n", "Human: Can you help me?\n", "Assistant:\n", "History: ['Hi!', ' Hi, Assistant. What can I do for you?', 'Can you help me?', ' Sure. What can I do for you?', 'Give me a recipe for pizza']\n", "Inputs: Assistant is a large language model trained by OpenAI.\n", "\n", "Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n", "\n", "Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n", "\n", "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n", "\n", "Current conversation:\n", "Human: Hi!\n", "Assistant: Hi, Assistant. What can I do for you?\n", "Human: Can you help me?\n", "Assistant: Sure. What can I do for you?\n", "\n", "Human: Give me a recipe for pizza\n", "Assistant:\n" ] } ], "source": [ "# |export\n", "with gr.Blocks(\n", " css=\"\"\"\n", " .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}\n", " #component-21 > div.wrap.svelte-w6rprc {height: 600px;}\n", " \"\"\"\n", ") as iface:\n", " state = gr.State([])\n", "\n", " gr.Markdown(title)\n", " gr.Markdown(description)\n", "\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " model_id = gr.Dropdown(\n", " choices=[\"google/flan-t5-xl\" ,\"Rallio67/joi_20B_instruct_alpha\"],\n", " value=\"google/flan-t5-xl\",\n", " label=\"Model\",\n", " interactive=True,\n", " )\n", " prompt_template = gr.Dropdown(\n", " choices=[\n", " \"langchain_default\",\n", " \"openai_chatgpt\",\n", " \"deepmind_Assistant\",\n", " \"deepmind_gopher\",\n", " \"anthropic_hhh\",\n", " ],\n", " value=\"langchain_default\",\n", " label=\"Prompt Template\",\n", " interactive=True,\n", " )\n", " temperature = gr.Slider(\n", " minimum=0.0,\n", " maximum=2.0,\n", " value=1.0,\n", " step=0.1,\n", " interactive=True,\n", " label=\"Temperature\",\n", " )\n", "\n", " top_p = gr.Slider(\n", " minimum=0.,\n", " maximum=1.0,\n", " value=0.8,\n", " step=0.05,\n", " interactive=True,\n", " label=\"Top-p (nucleus sampling)\",\n", " )\n", "\n", " with gr.Column(scale=1.8):\n", " with gr.Row():\n", " chatbot = gr.Chatbot(\n", " label=\"Chat Output\",\n", " )\n", "\n", " with gr.Row():\n", " chat_input = gr.Textbox(lines=1, label=\"Chat Input\")\n", " chat_input.submit(\n", " inference_chat,\n", " [\n", " model_id,\n", " prompt_template,\n", " chat_input,\n", " temperature,\n", " top_p,\n", " state,\n", " ],\n", " [chatbot, state],\n", " )\n", "\n", " with gr.Row():\n", " clear_button = gr.Button(value=\"Clear\", interactive=True)\n", " clear_button.click(\n", " lambda: (\"\", [], []),\n", " [],\n", " [chat_input, chatbot, state],\n", " queue=False,\n", " )\n", "\n", " submit_button = gr.Button(\n", " value=\"Submit\", interactive=True, variant=\"primary\"\n", " )\n", " submit_button.click(\n", " inference_chat,\n", " [\n", " model_id,\n", " prompt_template,\n", " chat_input,\n", " temperature,\n", " top_p,\n", " state,\n", " ],\n", " [chatbot, state],\n", " )\n", "iface.launch()\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Closing server running on port: 7860\n" ] } ], "source": [ "iface.close()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "from nbdev.export import nb_export\n", "nb_export('app.ipynb', lib_path='.', name='app')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "hf", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "66e5af1d4a3a75efffc7cd5a7f382675fc3ac06b0697676e06fa85c907378a99" } } }, "nbformat": 4, "nbformat_minor": 2 }