{ "cells": [ { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n", "(10, 20)\n", "Mary\n" ] } ], "source": [ "from datetime import datetime\n", "from typing import Tuple\n", "\n", "from pydantic import BaseModel\n", "\n", "class Delivery(BaseModel):\n", " timestamp: datetime\n", " dimensions: Tuple[int, int]\n", " name: str = 'Mary'\n", "\n", "m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) #, name=\"Bob\")\n", "print(repr(m.timestamp))\n", "#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n", "print(m.dimensions)\n", "print(m.name)\n", "#> (10, 20)\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-01-02 03:04:05+00:00\n" ] } ], "source": [ "print(m.timestamp)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "from pydantic import BaseModel, PositiveInt\n", "\n", "class Car(BaseModel):\n", " wheel_count: int = 4\n", " number_plate: str = ''\n", " passeners: list = []\n", " components: dict[str, PositiveInt] = {}" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading settings from ../../env/ai.json\n" ] } ], "source": [ "import os\n", "import json\n", "\n", "pathToSettings = '../../env/ai.json'\n", "if os.path.exists(pathToSettings):\n", " # Load setting from Json outside of project.\n", " print(f'Reading settings from {pathToSettings}')\n", " f = open(pathToSettings)\n", " settingsJson = json.load(f)\n", " del f\n", "\n", " for key in settingsJson:\n", " os.environ[key] = settingsJson[key]\n", " \n", " del settingsJson" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "from typing import List\n", "\n", "from langchain.llms import OpenAI\n", "from langchain.output_parsers import PydanticOutputParser\n", "from langchain.prompts import PromptTemplate\n", "from langchain.pydantic_v1 import BaseModel, Field, validator\n", "\n", "model_name = \"text-davinci-003\"\n", "temperature = 0.8\n", "model = OpenAI(model_name=model_name, temperature=temperature)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'wheel_count': 5, 'number_plate': 'CA 1235', 'passeners': [], 'components': {'canopy': 1, 'piston_head': 6, 'spanners': 1}}\n", "5\n" ] } ], "source": [ "values = { \n", " 'wheel_count': '5',\n", " 'number_plate': 'CA 1235',\n", " 'passengers': ['Bob Jones', 'Mary Sue'], \n", " 'components': {\n", " 'canopy': 1, \n", " 'piston_head': 6, \n", " 'spanners': 1\n", " }\n", " }\n", "c = Car(**values)\n", "print(c.model_dump())\n", "print(c.wheel_count)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "import re \n", "from pydantic import BaseModel, HttpUrl, ValidationError\n", "\n", "class WebSite(BaseModel):\n", " #url: str #= '' #Field(description=\"{ 'extra': 'Url of the website' }\")\n", " url: HttpUrl # = Field(description=\"Url of the website as URI\")\n", " \n", " # @validator(\"url\")\n", " # def validate_url(cls, field):\n", " # urlArray = re.findall(r'(https?://\\S+)', field)\n", " # if len(urlArray) == 0: \n", " # raise ValueError(\"Badly formed url\")\n", " # return field \n", " \n", "# values = {'url': 'https://www.example.com'}\n", "#w = WebSite(**values)\n", "#w = WebSite(url='https://www.example.com')\n", "#w = WebSite(url='abc')\n", "\n", "# website_parser = PydanticOutputParser(pydantic_object=WebSite)\n", "# print(website_parser.get_format_instructions())" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Answer the user query.\n", "{format_instructions}\n", "{query}\n", "\n", "##############\n", "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n", "\n", "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n", "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n", "\n", "Here is the output schema:\n", "```\n", "{\"properties\": {\"url\": {\"format\": \"uri\", \"maxLength\": 2083, \"minLength\": 1, \"title\": \"Url\", \"type\": \"string\"}}, \"required\": [\"url\"]}\n", "```\n" ] } ], "source": [ "model_name = \"text-davinci-003\"\n", "temperature = 0.0\n", "model = OpenAI(model_name=model_name, temperature=temperature)\n", "\n", "# And a query intented to prompt a language model to populate the data structure.\n", "query = \"Fetch a news website\"\n", "\n", "# Set up a parser + inject instructions into the prompt template.\n", "parser = PydanticOutputParser(pydantic_object=WebSite)\n", "\n", "prompt = PromptTemplate(\n", " template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n", " input_variables=[\"query\"],\n", " partial_variables={\"format_instructions\": parser.get_format_instructions()},\n", ")\n", "\n", "print(prompt.template)\n", "print('##############')\n", "print(parser.get_format_instructions())\n", "# _input = prompt.format_prompt(query=query)\n", "# output = model(_input.to_string())\n", "# parser.parse(output)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "# https://xebia.com/blog/enforce-and-validate-llm-output-with-pydantic/\n", "import openai\n", "\n", "def query(prompt: str) -> str:\n", " \"\"\"Query the LLM with the given prompt.\"\"\"\n", " completion = openai.ChatCompletion.create(\n", " model=\"gpt-3.5-turbo\",\n", " messages=[\n", " {\n", " \"role\": \"user\",\n", " \"content\": prompt,\n", " }\n", " ],\n", " temperature=0.0,\n", " )\n", " return completion.choices[0].message.content" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The largest planet in our solar system is Jupiter.\n" ] } ], "source": [ "response = query(\"What is the largest planet in our solar system?\")\n", "print(response)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "prompt = \"\"\"\n", "I will ask you questions and you will respond. Your response should be in the following format:\n", "```json\n", "{\n", " \"thought\": \"How you think about the question\",\n", " \"answer\": \"The answer to the question\"\n", "}\n", "```\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"thought\": \"This is a factual question that can be answered with scientific knowledge.\",\n", " \"answer\": \"The largest planet in our solar system is Jupiter.\"\n", "}\n" ] } ], "source": [ "question = \"What is the largest planet in our solar system?\"\n", "response = query(prompt + question)\n", "print(response)\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The largest planet in our solar system is Jupiter.\n" ] } ], "source": [ "import json\n", "\n", "parsed_response = json.loads(response)\n", "print(parsed_response[\"answer\"])\n", "#'The largest planet in our solar system is Jupiter.'\n" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "thought='I understand the format of the response' answer='Yes, I am ready to answer your questions'\n", "\n" ] } ], "source": [ "from pydantic import BaseModel\n", "\n", "\n", "class ThoughtAnswerResponse(BaseModel):\n", " thought: str\n", " answer: str\n", "\n", "\n", "raw_response = query(prompt)\n", "\n", "# Note: When you are using pydantic<2.0, use parse_raw instead of model_validate_json\n", "validated_response = ThoughtAnswerResponse.model_validate_json(raw_response)\n", "\n", "print(validated_response)\n", "#thought='This is a factual question that can be answered with scientific knowledge.' answer='The largest planet in our solar system is Jupiter.'\n", "\n", "print(type(validated_response))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Other stuf" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n", "\n", "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n", "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n", "\n", "Here is the output schema:\n", "```\n", "{\"properties\": {\"wheel_count\": {\"default\": 4, \"title\": \"Wheel Count\", \"type\": \"integer\"}, \"number_plate\": {\"default\": \"\", \"title\": \"Number Plate\", \"type\": \"string\"}, \"passeners\": {\"default\": [], \"items\": {}, \"title\": \"Passeners\", \"type\": \"array\"}, \"components\": {\"additionalProperties\": {\"exclusiveMinimum\": 0, \"type\": \"integer\"}, \"default\": {}, \"title\": \"Components\", \"type\": \"object\"}}}\n", "```\n" ] } ], "source": [ "# Define your desired data structure.\n", "class Joke(BaseModel):\n", " setup: str = Field(description=\"question to set up a joke\")\n", " punchline: str = Field(description=\"answer to resolve the joke\")\n", "\n", " # You can add custom validation logic easily with Pydantic.\n", " @validator(\"setup\")\n", " def question_ends_with_question_mark(cls, field):\n", " if field[-1] != \"?\":\n", " raise ValueError(\"Badly formed question!\")\n", " return field\n", "\n", "\n", "# And a query intented to prompt a language model to populate the data structure.\n", "joke_query = \"Tell me a joke.\"\n", "\n", "# Set up a parser + inject instructions into the prompt template.\n", "parser = PydanticOutputParser(pydantic_object=Joke)\n", "\n", "prompt = PromptTemplate(\n", " template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n", " input_variables=[\"query\"],\n", " partial_variables={\"format_instructions\": parser.get_format_instructions()},\n", ")\n", "\n", "# _input = prompt.format_prompt(query=joke_query)\n", "\n", "# output = model(_input.to_string())\n", "\n", "# parser.parse(output)\n", "\n", "#print(parser.get_format_instructions())\n", "car_parser = PydanticOutputParser(pydantic_object=Car)\n", "print(car_parser.get_format_instructions())" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# from langchain.chat_models import ChatOpenAI\n", "# from langchain.agents import initialize_agent\n", "\n", "# llm = ChatOpenAI(temperature=0.2, model_name=\"gpt-4\") # 'gpt-3.5-turbo' # gpt-4 #text-davinci-003\n", "# agent = initialize_agent(agent=\"zero-shot-react-description\", \n", "# tools=[list_dessert_recipes_tool], \n", "# llm=llm, \n", "# verbose=True, max_iterations=7, return_intermediate_steps=True, \n", "# handle_parsing_errors=\"Check your output and make sure it conforms.\")\n", "# query = \"How far away is Saturn?\"\n", "# response = agent({\"input\": f\"{query}\"})\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 2 }