|
|
|
import os |
|
from pdfparser_hq import encode_image |
|
from config import openai_api |
|
from openai import OpenAI |
|
|
|
def transcribe_all(data_path, |
|
name_of_raw_transcripts = "transcript_raw.txt"): |
|
imgs = [] |
|
client = OpenAI(api_key=openai_api) |
|
transcripts = [] |
|
|
|
system_prompt = """ |
|
You will be given a reciept that could be handwritten or properly formated. Your goal is to transcribe what is written in JSON following this format: |
|
|
|
{ |
|
"name_of_supplier" : X, |
|
"amount" : X, |
|
"currency": X, |
|
"date" : DD/MM/YYYY |
|
} |
|
|
|
Make sure you provide the total amount and the correct dates, handwritten ones might be tricky. This will be used to reconcile with banking transactions. |
|
|
|
""" |
|
|
|
for root, dirs, files in os.walk(data_path): |
|
for file in files: |
|
if file.endswith('.png'): |
|
print(os.path.join(root, file)) |
|
imgs.append({"path": os.path.join(root, file)}) |
|
pass |
|
|
|
|
|
for i, img in enumerate(imgs): |
|
filename = img["path"] |
|
base64_image = encode_image(img["path"]) |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4o", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": system_prompt |
|
} |
|
] |
|
}, |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}", |
|
} |
|
} |
|
] |
|
} |
|
], |
|
temperature=1, |
|
max_tokens=1877, |
|
top_p=1, |
|
response_format={"type": "json_object"}, |
|
frequency_penalty=0, |
|
presence_penalty=0 |
|
).choices[0].message.content |
|
transcripts.append({"path": filename,"filename" : f"P{i+1}.png", "content": response}) |
|
print(f"done transcribing transcript: {i+1}/{len(imgs)}") |
|
with open(f"{data_path}/{name_of_raw_transcripts}", 'w') as file: |
|
file.write(str(transcripts)) |