# input : data_path import os from pdfparser_hq import encode_image from config import openai_api from openai import OpenAI def transcribe_all(data_path, name_of_raw_transcripts = "transcript_raw.txt"): imgs = [] client = OpenAI(api_key=openai_api) transcripts = [] system_prompt = """ You will be given a reciept that could be handwritten or properly formated. Your goal is to transcribe what is written in JSON following this format: { "name_of_supplier" : X, "amount" : X, "currency": X, "date" : DD/MM/YYYY } Make sure you provide the total amount and the correct dates, handwritten ones might be tricky. This will be used to reconcile with banking transactions. """ for root, dirs, files in os.walk(data_path): for file in files: if file.endswith('.png'): print(os.path.join(root, file)) imgs.append({"path": os.path.join(root, file)}) pass for i, img in enumerate(imgs): filename = img["path"] base64_image = encode_image(img["path"]) response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": [ { "type": "text", "text": system_prompt } ] }, { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}", } } ] } ], temperature=1, max_tokens=1877, top_p=1, response_format={"type": "json_object"}, frequency_penalty=0, presence_penalty=0 ).choices[0].message.content transcripts.append({"path": filename,"filename" : f"P{i+1}.png", "content": response}) print(f"done transcribing transcript: {i+1}/{len(imgs)}") with open(f"{data_path}/{name_of_raw_transcripts}", 'w') as file: file.write(str(transcripts))