""" https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb Example 1. # Define your desired data structure. class Joke(BaseModel): setup: str = Field(description="question to set up a joke") punchline: str = Field(description="answer to resolve the joke") # You can add custom validation logic easily with Pydantic. @validator("setup") def question_ends_with_question_mark(cls, field): if field[-1] != "?": raise ValueError("Badly formed question!") return field Example 2. # Here's another example, but with a compound typed field. class Actor(BaseModel): name: str = Field(description="name of an actor") film_names: List[str] = Field(description="list of names of films they starred in") """ import json, re, logging PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below. As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted. Here is the output schema: ``` {schema} ```""" PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below. ``` {schema} ```""" class JsonStringError(Exception): ... class GptJsonIO(): def __init__(self, schema, example_instruction=True): self.pydantic_object = schema self.example_instruction = example_instruction self.format_instructions = self.generate_format_instructions() def generate_format_instructions(self): schema = self.pydantic_object.schema() # Remove extraneous fields. reduced_schema = schema if "title" in reduced_schema: del reduced_schema["title"] if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. if self.example_instruction: schema_str = json.dumps(reduced_schema) return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str) else: return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str) def generate_output(self, text): # Greedy search for 1st json candidate. match = re.search( r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL ) json_str = "" if match: json_str = match.group() json_object = json.loads(json_str, strict=False) final_object = self.pydantic_object.parse_obj(json_object) return final_object def generate_repair_prompt(self, broken_json, error): prompt = "Fix a broken json string.\n\n" + \ "(1) The broken json string need to fix is: \n\n" + \ "```" + "\n" + \ broken_json + "\n" + \ "```" + "\n\n" + \ "(2) The error message is: \n\n" + \ error + "\n\n" + \ "Now, fix this json string. \n\n" return prompt def generate_output_auto_repair(self, response, gpt_gen_fn): """ response: string containing canidate json gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt) """ try: result = self.generate_output(response) except Exception as e: try: logging.info(f'Repairing json:{response}') repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e)) result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions)) logging.info('Repaire json success.') except Exception as e: # 没辙了,放弃治疗 logging.info('Repaire json fail.') raise JsonStringError('Cannot repair json.', str(e)) return result