File size: 1,400 Bytes
67f7ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

class Transcription(BaseModel):
    Transcription_Printed_Text: str = Field(description="The transcription of all printed text in the image.")
    Transcription_Handwritten_Text: str = Field(description="The transcription of all handwritten text in the image.")
    
class OCRGPT4VisionPreview:
    def __init__(self, logger, api_key, endpoint_url="https://gpt-4-vision-preview-api.com/ocr"):
        self.logger = logger
        self.api_key = api_key
        self.endpoint_url = endpoint_url
        self.parser = JsonOutputParser(pydantic_object=Transcription)

    def transcribe_image(self, image_file):
        self.logger.start_monitoring_usage()

        headers = {"Authorization": f"Bearer {self.api_key}"}
        files = {'image': open(image_file, 'rb')}
        response = requests.post(self.endpoint_url, headers=headers, files=files)

        if response.status_code == 200:
            json_response = response.json()
            transcription = self.parser.parse(json_response)
        else:
            self.logger.log_error("Failed to transcribe image")
            transcription = {"Transcription": "Error"}

        usage_report = self.logger.stop_monitoring_report_usage()

        return transcription, usage_report