Spaces:

qingxu98
/

gpt-academic

Running

App Files Files Community

402

gpt-academic / crazy_functions /json_fns /pydantic_io.py

qingxu98

5c0a088 about 1 year ago

raw

history blame

4.15 kB

	"""
	https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb

	Example 1.

	# Define your desired data structure.
	class Joke(BaseModel):
	setup: str = Field(description="question to set up a joke")
	punchline: str = Field(description="answer to resolve the joke")

	# You can add custom validation logic easily with Pydantic.
	@validator("setup")
	def question_ends_with_question_mark(cls, field):
	if field[-1] != "?":
	raise ValueError("Badly formed question!")
	return field


	Example 2.

	# Here's another example, but with a compound typed field.
	class Actor(BaseModel):
	name: str = Field(description="name of an actor")
	film_names: List[str] = Field(description="list of names of films they starred in")
	"""

	import json, re, logging


	PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.

	As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
	the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.

	Here is the output schema:
	```
	{schema}
	```"""


	PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
	```
	{schema}
	```"""

	class JsonStringError(Exception): ...

	class GptJsonIO():

	def __init__(self, schema, example_instruction=True):
	self.pydantic_object = schema
	self.example_instruction = example_instruction
	self.format_instructions = self.generate_format_instructions()

	def generate_format_instructions(self):
	schema = self.pydantic_object.schema()

	# Remove extraneous fields.
	reduced_schema = schema
	if "title" in reduced_schema:
	del reduced_schema["title"]
	if "type" in reduced_schema:
	del reduced_schema["type"]
	# Ensure json in context is well-formed with double quotes.
	if self.example_instruction:
	schema_str = json.dumps(reduced_schema)
	return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
	else:
	return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)

	def generate_output(self, text):
	# Greedy search for 1st json candidate.
	match = re.search(
	r"\{.*\}", text.strip(), re.MULTILINE \| re.IGNORECASE \| re.DOTALL
	)
	json_str = ""
	if match: json_str = match.group()
	json_object = json.loads(json_str, strict=False)
	final_object = self.pydantic_object.parse_obj(json_object)
	return final_object

	def generate_repair_prompt(self, broken_json, error):
	prompt = "Fix a broken json string.\n\n" + \
	"(1) The broken json string need to fix is: \n\n" + \
	"```" + "\n" + \
	broken_json + "\n" + \
	"```" + "\n\n" + \
	"(2) The error message is: \n\n" + \
	error + "\n\n" + \
	"Now, fix this json string. \n\n"
	return prompt

	def generate_output_auto_repair(self, response, gpt_gen_fn):
	"""
	response: string containing canidate json
	gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
	"""
	try:
	result = self.generate_output(response)
	except Exception as e:
	try:
	logging.info(f'Repairing json：{response}')
	repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
	result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
	logging.info('Repaire json success.')
	except Exception as e:
	# 没辙了，放弃治疗
	logging.info('Repaire json fail.')
	raise JsonStringError('Cannot repair json.', str(e))
	return result