{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n",
      "(10, 20)\n",
      "Mary\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "from typing import Tuple\n",
    "\n",
    "from pydantic import BaseModel\n",
    "\n",
    "class Delivery(BaseModel):\n",
    "    timestamp: datetime\n",
    "    dimensions: Tuple[int, int]\n",
    "    name: str = 'Mary'\n",
    "\n",
    "m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) #, name=\"Bob\")\n",
    "print(repr(m.timestamp))\n",
    "#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))\n",
    "print(m.dimensions)\n",
    "print(m.name)\n",
    "#> (10, 20)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2020-01-02 03:04:05+00:00\n"
     ]
    }
   ],
   "source": [
    "print(m.timestamp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pydantic import BaseModel, PositiveInt\n",
    "\n",
    "class Car(BaseModel):\n",
    "    wheel_count: int = 4\n",
    "    number_plate: str = ''\n",
    "    passeners: list = []\n",
    "    components: dict[str, PositiveInt] = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading settings from ../../env/ai.json\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "pathToSettings = '../../env/ai.json'\n",
    "if os.path.exists(pathToSettings):\n",
    "    # Load setting from Json outside of project.\n",
    "    print(f'Reading settings from {pathToSettings}')\n",
    "    f = open(pathToSettings)\n",
    "    settingsJson = json.load(f)\n",
    "    del f\n",
    "\n",
    "    for key in settingsJson:\n",
    "        os.environ[key] = settingsJson[key]\n",
    "        \n",
    "    del settingsJson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain.llms import OpenAI\n",
    "from langchain.output_parsers import PydanticOutputParser\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.pydantic_v1 import BaseModel, Field, validator\n",
    "\n",
    "model_name = \"text-davinci-003\"\n",
    "temperature = 0.8\n",
    "model = OpenAI(model_name=model_name, temperature=temperature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'wheel_count': 5, 'number_plate': 'CA 1235', 'passeners': [], 'components': {'canopy': 1, 'piston_head': 6, 'spanners': 1}}\n",
      "5\n"
     ]
    }
   ],
   "source": [
    "values = { \n",
    "          'wheel_count': '5',\n",
    "          'number_plate': 'CA 1235',\n",
    "          'passengers': ['Bob Jones', 'Mary Sue'], \n",
    "          'components': {\n",
    "                'canopy': 1, \n",
    "                'piston_head': 6, \n",
    "                'spanners': 1\n",
    "            }\n",
    "          }\n",
    "c = Car(**values)\n",
    "print(c.model_dump())\n",
    "print(c.wheel_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re \n",
    "from pydantic import BaseModel, HttpUrl, ValidationError\n",
    "\n",
    "class WebSite(BaseModel):\n",
    "    #url: str  #= '' #Field(description=\"{ 'extra': 'Url of the website' }\")\n",
    "    url: HttpUrl # = Field(description=\"Url of the website as URI\")\n",
    "    \n",
    "    # @validator(\"url\")\n",
    "    # def validate_url(cls, field):\n",
    "    #     urlArray = re.findall(r'(https?://\\S+)', field)\n",
    "    #     if len(urlArray) == 0: \n",
    "    #         raise ValueError(\"Badly formed url\")\n",
    "    #     return field \n",
    "    \n",
    "# values = {'url': 'https://www.example.com'}\n",
    "#w = WebSite(**values)\n",
    "#w = WebSite(url='https://www.example.com')\n",
    "#w = WebSite(url='abc')\n",
    "\n",
    "# website_parser = PydanticOutputParser(pydantic_object=WebSite)\n",
    "# print(website_parser.get_format_instructions())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Answer the user query.\n",
      "{format_instructions}\n",
      "{query}\n",
      "\n",
      "##############\n",
      "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
      "\n",
      "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
      "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
      "\n",
      "Here is the output schema:\n",
      "```\n",
      "{\"properties\": {\"url\": {\"format\": \"uri\", \"maxLength\": 2083, \"minLength\": 1, \"title\": \"Url\", \"type\": \"string\"}}, \"required\": [\"url\"]}\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "model_name = \"text-davinci-003\"\n",
    "temperature = 0.0\n",
    "model = OpenAI(model_name=model_name, temperature=temperature)\n",
    "\n",
    "# And a query intented to prompt a language model to populate the data structure.\n",
    "query = \"Fetch a news website\"\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = PydanticOutputParser(pydantic_object=WebSite)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "print(prompt.template)\n",
    "print('##############')\n",
    "print(parser.get_format_instructions())\n",
    "# _input = prompt.format_prompt(query=query)\n",
    "# output = model(_input.to_string())\n",
    "# parser.parse(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  https://xebia.com/blog/enforce-and-validate-llm-output-with-pydantic/\n",
    "import openai\n",
    "\n",
    "def query(prompt: str) -> str:\n",
    "    \"\"\"Query the LLM with the given prompt.\"\"\"\n",
    "    completion = openai.ChatCompletion.create(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages=[\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": prompt,\n",
    "            }\n",
    "        ],\n",
    "        temperature=0.0,\n",
    "    )\n",
    "    return completion.choices[0].message.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The largest planet in our solar system is Jupiter.\n"
     ]
    }
   ],
   "source": [
    "response = query(\"What is the largest planet in our solar system?\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"\"\"\n",
    "I will ask you questions and you will respond. Your response should be in the following format:\n",
    "```json\n",
    "{\n",
    "    \"thought\": \"How you think about the question\",\n",
    "    \"answer\": \"The answer to the question\"\n",
    "}\n",
    "```\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    \"thought\": \"This is a factual question that can be answered with scientific knowledge.\",\n",
      "    \"answer\": \"The largest planet in our solar system is Jupiter.\"\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "question = \"What is the largest planet in our solar system?\"\n",
    "response = query(prompt + question)\n",
    "print(response)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The largest planet in our solar system is Jupiter.\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "parsed_response = json.loads(response)\n",
    "print(parsed_response[\"answer\"])\n",
    "#'The largest planet in our solar system is Jupiter.'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "thought='I understand the format of the response' answer='Yes, I am ready to answer your questions'\n",
      "<class '__main__.ThoughtAnswerResponse'>\n"
     ]
    }
   ],
   "source": [
    "from pydantic import BaseModel\n",
    "\n",
    "\n",
    "class ThoughtAnswerResponse(BaseModel):\n",
    "    thought: str\n",
    "    answer: str\n",
    "\n",
    "\n",
    "raw_response = query(prompt)\n",
    "\n",
    "# Note: When you are using pydantic<2.0, use parse_raw instead of model_validate_json\n",
    "validated_response = ThoughtAnswerResponse.model_validate_json(raw_response)\n",
    "\n",
    "print(validated_response)\n",
    "#thought='This is a factual question that can be answered with scientific knowledge.' answer='The largest planet in our solar system is Jupiter.'\n",
    "\n",
    "print(type(validated_response))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Other stuf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
      "\n",
      "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
      "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
      "\n",
      "Here is the output schema:\n",
      "```\n",
      "{\"properties\": {\"wheel_count\": {\"default\": 4, \"title\": \"Wheel Count\", \"type\": \"integer\"}, \"number_plate\": {\"default\": \"\", \"title\": \"Number Plate\", \"type\": \"string\"}, \"passeners\": {\"default\": [], \"items\": {}, \"title\": \"Passeners\", \"type\": \"array\"}, \"components\": {\"additionalProperties\": {\"exclusiveMinimum\": 0, \"type\": \"integer\"}, \"default\": {}, \"title\": \"Components\", \"type\": \"object\"}}}\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "# Define your desired data structure.\n",
    "class Joke(BaseModel):\n",
    "    setup: str = Field(description=\"question to set up a joke\")\n",
    "    punchline: str = Field(description=\"answer to resolve the joke\")\n",
    "\n",
    "    # You can add custom validation logic easily with Pydantic.\n",
    "    @validator(\"setup\")\n",
    "    def question_ends_with_question_mark(cls, field):\n",
    "        if field[-1] != \"?\":\n",
    "            raise ValueError(\"Badly formed question!\")\n",
    "        return field\n",
    "\n",
    "\n",
    "# And a query intented to prompt a language model to populate the data structure.\n",
    "joke_query = \"Tell me a joke.\"\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "# _input = prompt.format_prompt(query=joke_query)\n",
    "\n",
    "# output = model(_input.to_string())\n",
    "\n",
    "# parser.parse(output)\n",
    "\n",
    "#print(parser.get_format_instructions())\n",
    "car_parser = PydanticOutputParser(pydantic_object=Car)\n",
    "print(car_parser.get_format_instructions())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from langchain.chat_models import ChatOpenAI\n",
    "# from langchain.agents import initialize_agent\n",
    "\n",
    "# llm = ChatOpenAI(temperature=0.2, model_name=\"gpt-4\") # 'gpt-3.5-turbo'  # gpt-4 #text-davinci-003\n",
    "# agent = initialize_agent(agent=\"zero-shot-react-description\", \n",
    "#                          tools=[list_dessert_recipes_tool], \n",
    "#                          llm=llm, \n",
    "#                          verbose=True, max_iterations=7, return_intermediate_steps=True, \n",
    "#                          handle_parsing_errors=\"Check your output and make sure it conforms.\")\n",
    "# query = \"How far away is Saturn?\"\n",
    "# response = agent({\"input\": f\"{query}\"})\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}