Spaces:

phyloforfun
/

VoucherVision

Running

VoucherVision / vouchervision /OCR_GPT4.py

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

67f7ed6 9 months ago

raw

history blame contribute delete

1.4 kB

	from langchain.prompts import PromptTemplate
	from langchain_core.output_parsers import JsonOutputParser
	from langchain_core.pydantic_v1 import BaseModel, Field

	class Transcription(BaseModel):
	Transcription_Printed_Text: str = Field(description="The transcription of all printed text in the image.")
	Transcription_Handwritten_Text: str = Field(description="The transcription of all handwritten text in the image.")

	class OCRGPT4VisionPreview:
	def __init__(self, logger, api_key, endpoint_url="https://gpt-4-vision-preview-api.com/ocr"):
	self.logger = logger
	self.api_key = api_key
	self.endpoint_url = endpoint_url
	self.parser = JsonOutputParser(pydantic_object=Transcription)

	def transcribe_image(self, image_file):
	self.logger.start_monitoring_usage()

	headers = {"Authorization": f"Bearer {self.api_key}"}
	files = {'image': open(image_file, 'rb')}
	response = requests.post(self.endpoint_url, headers=headers, files=files)

	if response.status_code == 200:
	json_response = response.json()
	transcription = self.parser.parse(json_response)
	else:
	self.logger.log_error("Failed to transcribe image")
	transcription = {"Transcription": "Error"}

	usage_report = self.logger.stop_monitoring_report_usage()

	return transcription, usage_report