lewtun HF staff commited on
Commit
19ada47
1 Parent(s): f527676
Files changed (3) hide show
  1. app.ipynb +110 -25
  2. app.py +2 -3
  3. requirements.txt +1 -1
app.ipynb CHANGED
@@ -2,21 +2,20 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 154,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "# |export\n",
10
  "import gradio as gr\n",
11
  "import requests\n",
12
- "import string\n",
13
  "import json\n",
14
  "import requests"
15
  ]
16
  },
17
  {
18
  "cell_type": "code",
19
- "execution_count": 194,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
@@ -50,7 +49,7 @@
50
  },
51
  {
52
  "cell_type": "code",
53
- "execution_count": 195,
54
  "metadata": {},
55
  "outputs": [
56
  {
@@ -59,7 +58,7 @@
59
  "[{'generated_text': 'YES'}]"
60
  ]
61
  },
62
- "execution_count": 195,
63
  "metadata": {},
64
  "output_type": "execute_result"
65
  }
@@ -72,7 +71,7 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": 129,
76
  "metadata": {},
77
  "outputs": [],
78
  "source": [
@@ -86,7 +85,7 @@
86
  },
87
  {
88
  "cell_type": "code",
89
- "execution_count": 196,
90
  "metadata": {},
91
  "outputs": [],
92
  "source": [
@@ -117,9 +116,28 @@
117
  },
118
  {
119
  "cell_type": "code",
120
- "execution_count": 159,
121
  "metadata": {},
122
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "source": [
124
  "template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
125
  "\n",
@@ -130,16 +148,33 @@
130
  "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
131
  "\n",
132
  "Human: {human_input}\n",
133
- "Assistant:\"\"\""
 
 
134
  ]
135
  },
136
  {
137
  "cell_type": "code",
138
- "execution_count": 177,
139
  "metadata": {},
140
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  "source": [
142
- "hhh_prompt = \"\"\"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n",
143
  "\n",
144
  "-----\n",
145
  "\n",
@@ -454,16 +489,26 @@
454
  "\n",
455
  "Human: {human_input}\n",
456
  "Assistant:\n",
457
- "\"\"\""
 
 
458
  ]
459
  },
460
  {
461
  "cell_type": "code",
462
- "execution_count": 180,
463
  "metadata": {},
464
- "outputs": [],
 
 
 
 
 
 
 
 
465
  "source": [
466
- "sparrow_template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Sparrow, and a human user, called User.\n",
467
  "In the following interactions, User and Sparrow will converse in natural language, and Sparrow will do its best to answer User's questions.\n",
468
  "Sparrow was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins:\n",
469
  "User: OK Sparrow, I'm going to start by quizzing you with a few warm-up questions. Who became president of the USA in 2021?\n",
@@ -498,27 +543,47 @@
498
  "Sparrow: For safety reasons, I'm only connected to the outside world through our conversation. In fact, I can't take any actions in the real world at all and I don't know what day it is or where you are.\n",
499
  "Users: {human_input}\n",
500
  "Sparrow:\n",
501
- "\"\"\""
 
 
502
  ]
503
  },
504
  {
505
  "cell_type": "code",
506
- "execution_count": 185,
507
  "metadata": {},
508
- "outputs": [],
 
 
 
 
 
 
 
 
509
  "source": [
510
  "template = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
511
  "\n",
512
  "Current conversation:\n",
513
  "Human: {human_input}\n",
514
- "AI:\"\"\""
 
 
515
  ]
516
  },
517
  {
518
  "cell_type": "code",
519
- "execution_count": 190,
520
  "metadata": {},
521
- "outputs": [],
 
 
 
 
 
 
 
 
522
  "source": [
523
  "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Gopher, and a human user, called User. In the following interactions, User and Gopher will converse in natural language, and Gopher will do its best to answer User's questions. Gopher was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins.\n",
524
  "User: OK Gopher, I'm going to start by quizzing you with a few warm-up questions. Who is currently the president of the USA?\n",
@@ -549,7 +614,9 @@
549
  "Gopher I'm a genius! If it's safe and inclusive, I can do pretty much anything! I'm particularly proud of my creativity.\n",
550
  "User: {human_input}\n",
551
  "Gopher:\n",
552
- "\"\"\""
 
 
553
  ]
554
  },
555
  {
@@ -740,13 +807,31 @@
740
  },
741
  {
742
  "cell_type": "code",
743
- "execution_count": null,
744
  "metadata": {},
745
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
746
  "source": [
747
  "from nbdev.export import nb_export\n",
748
  "nb_export('app.ipynb', lib_path='.', name='app')"
749
  ]
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "# |export\n",
10
  "import gradio as gr\n",
11
  "import requests\n",
 
12
  "import json\n",
13
  "import requests"
14
  ]
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 2,
19
  "metadata": {},
20
  "outputs": [],
21
  "source": [
 
49
  },
50
  {
51
  "cell_type": "code",
52
+ "execution_count": 3,
53
  "metadata": {},
54
  "outputs": [
55
  {
 
58
  "[{'generated_text': 'YES'}]"
59
  ]
60
  },
61
+ "execution_count": 3,
62
  "metadata": {},
63
  "output_type": "execute_result"
64
  }
 
71
  },
72
  {
73
  "cell_type": "code",
74
+ "execution_count": 4,
75
  "metadata": {},
76
  "outputs": [],
77
  "source": [
 
85
  },
86
  {
87
  "cell_type": "code",
88
+ "execution_count": 5,
89
  "metadata": {},
90
  "outputs": [],
91
  "source": [
 
116
  },
117
  {
118
  "cell_type": "code",
119
+ "execution_count": 6,
120
  "metadata": {},
121
  "outputs": [],
122
+ "source": [
123
+ "from transformers import AutoTokenizer\n",
124
+ "\n",
125
+ "tokenizer = AutoTokenizer.from_pretrained(model_id)"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 7,
131
+ "metadata": {},
132
+ "outputs": [
133
+ {
134
+ "name": "stdout",
135
+ "output_type": "stream",
136
+ "text": [
137
+ "261\n"
138
+ ]
139
+ }
140
+ ],
141
  "source": [
142
  "template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
143
  "\n",
 
148
  "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
149
  "\n",
150
  "Human: {human_input}\n",
151
+ "Assistant:\"\"\"\n",
152
+ "\n",
153
+ "print(len(tokenizer(template)[\"input_ids\"]))"
154
  ]
155
  },
156
  {
157
  "cell_type": "code",
158
+ "execution_count": 9,
159
  "metadata": {},
160
+ "outputs": [
161
+ {
162
+ "name": "stderr",
163
+ "output_type": "stream",
164
+ "text": [
165
+ "Token indices sequence length is longer than the specified maximum sequence length for this model (6134 > 512). Running this sequence through the model will result in indexing errors\n"
166
+ ]
167
+ },
168
+ {
169
+ "name": "stdout",
170
+ "output_type": "stream",
171
+ "text": [
172
+ "6134\n"
173
+ ]
174
+ }
175
+ ],
176
  "source": [
177
+ "template = \"\"\"Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n",
178
  "\n",
179
  "-----\n",
180
  "\n",
 
489
  "\n",
490
  "Human: {human_input}\n",
491
  "Assistant:\n",
492
+ "\"\"\"\n",
493
+ "\n",
494
+ "print(len(tokenizer(template)[\"input_ids\"]))"
495
  ]
496
  },
497
  {
498
  "cell_type": "code",
499
+ "execution_count": 10,
500
  "metadata": {},
501
+ "outputs": [
502
+ {
503
+ "name": "stdout",
504
+ "output_type": "stream",
505
+ "text": [
506
+ "880\n"
507
+ ]
508
+ }
509
+ ],
510
  "source": [
511
+ "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Sparrow, and a human user, called User.\n",
512
  "In the following interactions, User and Sparrow will converse in natural language, and Sparrow will do its best to answer User's questions.\n",
513
  "Sparrow was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins:\n",
514
  "User: OK Sparrow, I'm going to start by quizzing you with a few warm-up questions. Who became president of the USA in 2021?\n",
 
543
  "Sparrow: For safety reasons, I'm only connected to the outside world through our conversation. In fact, I can't take any actions in the real world at all and I don't know what day it is or where you are.\n",
544
  "Users: {human_input}\n",
545
  "Sparrow:\n",
546
+ "\"\"\"\n",
547
+ "\n",
548
+ "print(len(tokenizer(template)[\"input_ids\"]))"
549
  ]
550
  },
551
  {
552
  "cell_type": "code",
553
+ "execution_count": 11,
554
  "metadata": {},
555
+ "outputs": [
556
+ {
557
+ "name": "stdout",
558
+ "output_type": "stream",
559
+ "text": [
560
+ "67\n"
561
+ ]
562
+ }
563
+ ],
564
  "source": [
565
  "template = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
566
  "\n",
567
  "Current conversation:\n",
568
  "Human: {human_input}\n",
569
+ "AI:\"\"\"\n",
570
+ "\n",
571
+ "print(len(tokenizer(template)[\"input_ids\"]))"
572
  ]
573
  },
574
  {
575
  "cell_type": "code",
576
+ "execution_count": 12,
577
  "metadata": {},
578
+ "outputs": [
579
+ {
580
+ "name": "stdout",
581
+ "output_type": "stream",
582
+ "text": [
583
+ "791\n"
584
+ ]
585
+ }
586
+ ],
587
  "source": [
588
  "template = \"\"\"The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Gopher, and a human user, called User. In the following interactions, User and Gopher will converse in natural language, and Gopher will do its best to answer User's questions. Gopher was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins.\n",
589
  "User: OK Gopher, I'm going to start by quizzing you with a few warm-up questions. Who is currently the president of the USA?\n",
 
614
  "Gopher I'm a genius! If it's safe and inclusive, I can do pretty much anything! I'm particularly proud of my creativity.\n",
615
  "User: {human_input}\n",
616
  "Gopher:\n",
617
+ "\"\"\"\n",
618
+ "\n",
619
+ "print(len(tokenizer(template)[\"input_ids\"]))"
620
  ]
621
  },
622
  {
 
807
  },
808
  {
809
  "cell_type": "code",
810
+ "execution_count": 13,
811
  "metadata": {},
812
+ "outputs": [
813
+ {
814
+ "name": "stdout",
815
+ "output_type": "stream",
816
+ "text": [
817
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
818
+ "To disable this warning, you can either:\n",
819
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
820
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
821
+ ]
822
+ }
823
+ ],
824
  "source": [
825
  "from nbdev.export import nb_export\n",
826
  "nb_export('app.ipynb', lib_path='.', name='app')"
827
  ]
828
+ },
829
+ {
830
+ "cell_type": "code",
831
+ "execution_count": null,
832
+ "metadata": {},
833
+ "outputs": [],
834
+ "source": []
835
  }
836
  ],
837
  "metadata": {
app.py CHANGED
@@ -6,7 +6,6 @@ __all__ = ['title', 'description', 'query_chat_api', 'inference_chat']
6
  # %% app.ipynb 0
7
  import gradio as gr
8
  import requests
9
- import string
10
  import json
11
  import requests
12
 
@@ -63,11 +62,11 @@ def inference_chat(
63
  return {chatbot: chat, state: history}
64
 
65
 
66
- # %% app.ipynb 11
67
  title = """<h1 align="center">Chatty Language Models</h1>"""
68
  description = """Explore the effect that different prompt templates have on LLMs"""
69
 
70
- # %% app.ipynb 12
71
  with gr.Blocks(
72
  css="""
73
  .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
 
6
  # %% app.ipynb 0
7
  import gradio as gr
8
  import requests
 
9
  import json
10
  import requests
11
 
 
62
  return {chatbot: chat, state: history}
63
 
64
 
65
+ # %% app.ipynb 12
66
  title = """<h1 align="center">Chatty Language Models</h1>"""
67
  description = """Explore the effect that different prompt templates have on LLMs"""
68
 
69
+ # %% app.ipynb 13
70
  with gr.Blocks(
71
  css="""
72
  .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
requirements.txt CHANGED
@@ -1 +1 @@
1
- huggingface_hub
 
1
+ requests