# input : data_path
import os 
from pdfparser_hq import encode_image
from config import openai_api
from openai import OpenAI

def transcribe_all(data_path, 
    name_of_raw_transcripts = "transcript_raw.txt"):
    imgs = []
    client = OpenAI(api_key=openai_api)
    transcripts = []

    system_prompt = """
    You will be given a reciept that could be handwritten or properly formated. Your goal is to transcribe what is written in JSON following this format:

    {
    "name_of_supplier" : X,
    "amount" : X,
    "currency": X,
    "date" : DD/MM/YYYY
    }

    Make sure you provide the total amount and the correct dates, handwritten ones might be tricky. This will be used to reconcile with banking transactions.

    """

    for root, dirs, files in os.walk(data_path):
        for file in files:
            if file.endswith('.png'):
                print(os.path.join(root, file))
                imgs.append({"path": os.path.join(root, file)})
                pass


    for i, img in enumerate(imgs):
        filename = img["path"]
        base64_image = encode_image(img["path"])
        
        response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
            "role": "system",
            "content": [
                {
                "type": "text",
                "text": system_prompt
                }
            ]
            },
            {
            "role": "user",
            "content": [
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}",
                }
                }
            ]
            }
        ],
        temperature=1,
        max_tokens=1877,
        top_p=1,
        response_format={"type": "json_object"},
        frequency_penalty=0,
        presence_penalty=0
        ).choices[0].message.content
        transcripts.append({"path": filename,"filename" : f"P{i+1}.png", "content": response})
        print(f"done transcribing transcript: {i+1}/{len(imgs)}")
    with open(f"{data_path}/{name_of_raw_transcripts}", 'w') as file:
        file.write(str(transcripts))