thrag commited on
Commit
21d2102
1 Parent(s): 3b62083

playing with pydantic. no succces

Browse files
Files changed (2) hide show
  1. demo-tools-1.ipynb +5 -1
  2. pydantic-1.ipynb +463 -0
demo-tools-1.ipynb CHANGED
@@ -245,11 +245,15 @@
245
  " \n",
246
  " # LLM \n",
247
  " llm = ChatOpenAI(temperature=0.2, model_name=model) # 'gpt-3.5-turbo' # gpt-4\n",
248
- " agent = initialize_agent(agent=\"zero-shot-react-description\", tools=tools, llm=llm, verbose=True, max_iterations=7, return_intermediate_steps=True, handle_parsing_errors=\"Check your output and make sure it conforms.\")\n",
 
 
 
249
  " system = \"\"\"\n",
250
  " If the answer is not in the tools or context passed to you then don't answer. \\n\n",
251
  " If you don't know the answer then say so. \\n \n",
252
  " \"\"\" \n",
 
253
  " response = agent({\"input\": f\"{system} [[RECIPENAME]] {query}\"})\n",
254
  "\n",
255
  " # Show response \n",
 
245
  " \n",
246
  " # LLM \n",
247
  " llm = ChatOpenAI(temperature=0.2, model_name=model) # 'gpt-3.5-turbo' # gpt-4\n",
248
+ " agent = initialize_agent( \n",
249
+ " agent=\"zero-shot-react-description\", tools=tools, llm=llm, verbose=True, \n",
250
+ " max_iterations=7, return_intermediate_steps=True, \n",
251
+ " handle_parsing_errors=\"Check your output and make sure it conforms.\")\n",
252
  " system = \"\"\"\n",
253
  " If the answer is not in the tools or context passed to you then don't answer. \\n\n",
254
  " If you don't know the answer then say so. \\n \n",
255
  " \"\"\" \n",
256
+ " \n",
257
  " response = agent({\"input\": f\"{system} [[RECIPENAME]] {query}\"})\n",
258
  "\n",
259
  " # Show response \n",
pydantic-1.ipynb ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 27,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n",
13
+ "(10, 20)\n",
14
+ "Mary\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "from datetime import datetime\n",
20
+ "from typing import Tuple\n",
21
+ "\n",
22
+ "from pydantic import BaseModel\n",
23
+ "\n",
24
+ "class Delivery(BaseModel):\n",
25
+ " timestamp: datetime\n",
26
+ " dimensions: Tuple[int, int]\n",
27
+ " name: str = 'Mary'\n",
28
+ "\n",
29
+ "m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) #, name=\"Bob\")\n",
30
+ "print(repr(m.timestamp))\n",
31
+ "#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n",
32
+ "print(m.dimensions)\n",
33
+ "print(m.name)\n",
34
+ "#> (10, 20)\n"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 28,
40
+ "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "name": "stdout",
44
+ "output_type": "stream",
45
+ "text": [
46
+ "2020-01-02 03:04:05+00:00\n"
47
+ ]
48
+ }
49
+ ],
50
+ "source": [
51
+ "print(m.timestamp)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 29,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "from pydantic import BaseModel, PositiveInt\n",
61
+ "\n",
62
+ "class Car(BaseModel):\n",
63
+ " wheel_count: int = 4\n",
64
+ " number_plate: str = ''\n",
65
+ " passeners: list = []\n",
66
+ " components: dict[str, PositiveInt] = {}"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 31,
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "name": "stdout",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "Reading settings from ../../env/ai.json\n"
79
+ ]
80
+ }
81
+ ],
82
+ "source": [
83
+ "import os\n",
84
+ "import json\n",
85
+ "\n",
86
+ "pathToSettings = '../../env/ai.json'\n",
87
+ "if os.path.exists(pathToSettings):\n",
88
+ " # Load setting from Json outside of project.\n",
89
+ " print(f'Reading settings from {pathToSettings}')\n",
90
+ " f = open(pathToSettings)\n",
91
+ " settingsJson = json.load(f)\n",
92
+ " del f\n",
93
+ "\n",
94
+ " for key in settingsJson:\n",
95
+ " os.environ[key] = settingsJson[key]\n",
96
+ " \n",
97
+ " del settingsJson"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 32,
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "from typing import List\n",
107
+ "\n",
108
+ "from langchain.llms import OpenAI\n",
109
+ "from langchain.output_parsers import PydanticOutputParser\n",
110
+ "from langchain.prompts import PromptTemplate\n",
111
+ "from langchain.pydantic_v1 import BaseModel, Field, validator\n",
112
+ "\n",
113
+ "model_name = \"text-davinci-003\"\n",
114
+ "temperature = 0.8\n",
115
+ "model = OpenAI(model_name=model_name, temperature=temperature)"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 30,
121
+ "metadata": {},
122
+ "outputs": [
123
+ {
124
+ "name": "stdout",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "{'wheel_count': 5, 'number_plate': 'CA 1235', 'passeners': [], 'components': {'canopy': 1, 'piston_head': 6, 'spanners': 1}}\n",
128
+ "5\n"
129
+ ]
130
+ }
131
+ ],
132
+ "source": [
133
+ "values = { \n",
134
+ " 'wheel_count': '5',\n",
135
+ " 'number_plate': 'CA 1235',\n",
136
+ " 'passengers': ['Bob Jones', 'Mary Sue'], \n",
137
+ " 'components': {\n",
138
+ " 'canopy': 1, \n",
139
+ " 'piston_head': 6, \n",
140
+ " 'spanners': 1\n",
141
+ " }\n",
142
+ " }\n",
143
+ "c = Car(**values)\n",
144
+ "print(c.model_dump())\n",
145
+ "print(c.wheel_count)"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": 77,
151
+ "metadata": {},
152
+ "outputs": [],
153
+ "source": [
154
+ "import re \n",
155
+ "from pydantic import BaseModel, HttpUrl, ValidationError\n",
156
+ "\n",
157
+ "class WebSite(BaseModel):\n",
158
+ " #url: str #= '' #Field(description=\"{ 'extra': 'Url of the website' }\")\n",
159
+ " url: HttpUrl # = Field(description=\"Url of the website as URI\")\n",
160
+ " \n",
161
+ " # @validator(\"url\")\n",
162
+ " # def validate_url(cls, field):\n",
163
+ " # urlArray = re.findall(r'(https?://\\S+)', field)\n",
164
+ " # if len(urlArray) == 0: \n",
165
+ " # raise ValueError(\"Badly formed url\")\n",
166
+ " # return field \n",
167
+ " \n",
168
+ "# values = {'url': 'https://www.example.com'}\n",
169
+ "#w = WebSite(**values)\n",
170
+ "# w = WebSite(url='https://www.example.com', url2=\"https://www.example.com\")\n",
171
+ "#w = WebSite(url='abc')\n",
172
+ "\n",
173
+ "# website_parser = PydanticOutputParser(pydantic_object=WebSite)\n",
174
+ "# print(website_parser.get_format_instructions())"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": 76,
180
+ "metadata": {},
181
+ "outputs": [
182
+ {
183
+ "name": "stdout",
184
+ "output_type": "stream",
185
+ "text": [
186
+ "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
187
+ "\n",
188
+ "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
189
+ "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
190
+ "\n",
191
+ "Here is the output schema:\n",
192
+ "```\n",
193
+ "{\"properties\": {\"url2\": {\"format\": \"uri\", \"maxLength\": 2083, \"minLength\": 1, \"title\": \"Url2\", \"type\": \"string\"}}, \"required\": [\"url2\"]}\n",
194
+ "```\n"
195
+ ]
196
+ }
197
+ ],
198
+ "source": [
199
+ "model_name = \"text-davinci-003\"\n",
200
+ "temperature = 0.0\n",
201
+ "model = OpenAI(model_name=model_name, temperature=temperature)\n",
202
+ "\n",
203
+ "# And a query intented to prompt a language model to populate the data structure.\n",
204
+ "joke_query = \"Fetch a news website\"\n",
205
+ "\n",
206
+ "# Set up a parser + inject instructions into the prompt template.\n",
207
+ "parser = PydanticOutputParser(pydantic_object=WebSite)\n",
208
+ "\n",
209
+ "prompt = PromptTemplate(\n",
210
+ " template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
211
+ " input_variables=[\"query\"],\n",
212
+ " partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
213
+ ")\n",
214
+ "\n",
215
+ "print(parser.get_format_instructions())\n",
216
+ "# _input = prompt.format_prompt(query=joke_query)\n",
217
+ "# output = model(_input.to_string())\n",
218
+ "# parser.parse(output)"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": 50,
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "# https://xebia.com/blog/enforce-and-validate-llm-output-with-pydantic/\n",
228
+ "import openai\n",
229
+ "\n",
230
+ "def query(prompt: str) -> str:\n",
231
+ " \"\"\"Query the LLM with the given prompt.\"\"\"\n",
232
+ " completion = openai.ChatCompletion.create(\n",
233
+ " model=\"gpt-3.5-turbo\",\n",
234
+ " messages=[\n",
235
+ " {\n",
236
+ " \"role\": \"user\",\n",
237
+ " \"content\": prompt,\n",
238
+ " }\n",
239
+ " ],\n",
240
+ " temperature=0.0,\n",
241
+ " )\n",
242
+ " return completion.choices[0].message.content"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": 56,
248
+ "metadata": {},
249
+ "outputs": [
250
+ {
251
+ "name": "stdout",
252
+ "output_type": "stream",
253
+ "text": [
254
+ "The largest planet in our solar system is Jupiter.\n"
255
+ ]
256
+ }
257
+ ],
258
+ "source": [
259
+ "response = query(\"What is the largest planet in our solar system?\")\n",
260
+ "print(response)"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": 52,
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "prompt = \"\"\"\n",
270
+ "I will ask you questions and you will respond. Your response should be in the following format:\n",
271
+ "```json\n",
272
+ "{\n",
273
+ " \"thought\": \"How you think about the question\",\n",
274
+ " \"answer\": \"The answer to the question\"\n",
275
+ "}\n",
276
+ "```\n",
277
+ "\"\"\"\n"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 58,
283
+ "metadata": {},
284
+ "outputs": [
285
+ {
286
+ "name": "stdout",
287
+ "output_type": "stream",
288
+ "text": [
289
+ "{\n",
290
+ " \"thought\": \"This is a factual question that can be answered with scientific knowledge.\",\n",
291
+ " \"answer\": \"The largest planet in our solar system is Jupiter.\"\n",
292
+ "}\n"
293
+ ]
294
+ }
295
+ ],
296
+ "source": [
297
+ "question = \"What is the largest planet in our solar system?\"\n",
298
+ "response = query(prompt + question)\n",
299
+ "print(response)\n"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 55,
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "name": "stdout",
309
+ "output_type": "stream",
310
+ "text": [
311
+ "The largest planet in our solar system is Jupiter.\n"
312
+ ]
313
+ }
314
+ ],
315
+ "source": [
316
+ "import json\n",
317
+ "\n",
318
+ "parsed_response = json.loads(response)\n",
319
+ "print(parsed_response[\"answer\"])\n",
320
+ "#'The largest planet in our solar system is Jupiter.'\n"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": 61,
326
+ "metadata": {},
327
+ "outputs": [
328
+ {
329
+ "name": "stdout",
330
+ "output_type": "stream",
331
+ "text": [
332
+ "thought='I understand the format of the response' answer='Yes, I am ready to answer your questions'\n",
333
+ "<class '__main__.ThoughtAnswerResponse'>\n"
334
+ ]
335
+ }
336
+ ],
337
+ "source": [
338
+ "from pydantic import BaseModel\n",
339
+ "\n",
340
+ "\n",
341
+ "class ThoughtAnswerResponse(BaseModel):\n",
342
+ " thought: str\n",
343
+ " answer: str\n",
344
+ "\n",
345
+ "\n",
346
+ "raw_response = query(prompt)\n",
347
+ "\n",
348
+ "# Note: When you are using pydantic<2.0, use parse_raw instead of model_validate_json\n",
349
+ "validated_response = ThoughtAnswerResponse.model_validate_json(raw_response)\n",
350
+ "\n",
351
+ "print(validated_response)\n",
352
+ "#thought='This is a factual question that can be answered with scientific knowledge.' answer='The largest planet in our solar system is Jupiter.'\n",
353
+ "\n",
354
+ "print(type(validated_response))\n"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "markdown",
359
+ "metadata": {},
360
+ "source": [
361
+ "# Other stuf"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 33,
367
+ "metadata": {},
368
+ "outputs": [
369
+ {
370
+ "name": "stdout",
371
+ "output_type": "stream",
372
+ "text": [
373
+ "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
374
+ "\n",
375
+ "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
376
+ "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
377
+ "\n",
378
+ "Here is the output schema:\n",
379
+ "```\n",
380
+ "{\"properties\": {\"wheel_count\": {\"default\": 4, \"title\": \"Wheel Count\", \"type\": \"integer\"}, \"number_plate\": {\"default\": \"\", \"title\": \"Number Plate\", \"type\": \"string\"}, \"passeners\": {\"default\": [], \"items\": {}, \"title\": \"Passeners\", \"type\": \"array\"}, \"components\": {\"additionalProperties\": {\"exclusiveMinimum\": 0, \"type\": \"integer\"}, \"default\": {}, \"title\": \"Components\", \"type\": \"object\"}}}\n",
381
+ "```\n"
382
+ ]
383
+ }
384
+ ],
385
+ "source": [
386
+ "# Define your desired data structure.\n",
387
+ "class Joke(BaseModel):\n",
388
+ " setup: str = Field(description=\"question to set up a joke\")\n",
389
+ " punchline: str = Field(description=\"answer to resolve the joke\")\n",
390
+ "\n",
391
+ " # You can add custom validation logic easily with Pydantic.\n",
392
+ " @validator(\"setup\")\n",
393
+ " def question_ends_with_question_mark(cls, field):\n",
394
+ " if field[-1] != \"?\":\n",
395
+ " raise ValueError(\"Badly formed question!\")\n",
396
+ " return field\n",
397
+ "\n",
398
+ "\n",
399
+ "# And a query intented to prompt a language model to populate the data structure.\n",
400
+ "joke_query = \"Tell me a joke.\"\n",
401
+ "\n",
402
+ "# Set up a parser + inject instructions into the prompt template.\n",
403
+ "parser = PydanticOutputParser(pydantic_object=Joke)\n",
404
+ "\n",
405
+ "prompt = PromptTemplate(\n",
406
+ " template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
407
+ " input_variables=[\"query\"],\n",
408
+ " partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
409
+ ")\n",
410
+ "\n",
411
+ "# _input = prompt.format_prompt(query=joke_query)\n",
412
+ "\n",
413
+ "# output = model(_input.to_string())\n",
414
+ "\n",
415
+ "# parser.parse(output)\n",
416
+ "\n",
417
+ "#print(parser.get_format_instructions())\n",
418
+ "car_parser = PydanticOutputParser(pydantic_object=Car)\n",
419
+ "print(car_parser.get_format_instructions())"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 34,
425
+ "metadata": {},
426
+ "outputs": [],
427
+ "source": [
428
+ "# from langchain.chat_models import ChatOpenAI\n",
429
+ "# from langchain.agents import initialize_agent\n",
430
+ "\n",
431
+ "# llm = ChatOpenAI(temperature=0.2, model_name=\"gpt-4\") # 'gpt-3.5-turbo' # gpt-4 #text-davinci-003\n",
432
+ "# agent = initialize_agent(agent=\"zero-shot-react-description\", \n",
433
+ "# tools=[list_dessert_recipes_tool], \n",
434
+ "# llm=llm, \n",
435
+ "# verbose=True, max_iterations=7, return_intermediate_steps=True, \n",
436
+ "# handle_parsing_errors=\"Check your output and make sure it conforms.\")\n",
437
+ "# query = \"How far away is Saturn?\"\n",
438
+ "# response = agent({\"input\": f\"{query}\"})\n"
439
+ ]
440
+ }
441
+ ],
442
+ "metadata": {
443
+ "kernelspec": {
444
+ "display_name": ".venv",
445
+ "language": "python",
446
+ "name": "python3"
447
+ },
448
+ "language_info": {
449
+ "codemirror_mode": {
450
+ "name": "ipython",
451
+ "version": 3
452
+ },
453
+ "file_extension": ".py",
454
+ "mimetype": "text/x-python",
455
+ "name": "python",
456
+ "nbconvert_exporter": "python",
457
+ "pygments_lexer": "ipython3",
458
+ "version": "3.10.6"
459
+ }
460
+ },
461
+ "nbformat": 4,
462
+ "nbformat_minor": 2
463
+ }