Shami96 commited on
Commit
543101d
·
verified ·
1 Parent(s): 47ac43f

Update update_docx_with_pdf.py

Browse files
Files changed (1) hide show
  1. update_docx_with_pdf.py +8 -1
update_docx_with_pdf.py CHANGED
@@ -25,9 +25,11 @@ def update_json_with_pdf(word_json_file, pdf_txt_file, output_file):
25
 
26
  # --- Build prompt ---
27
  user_prompt = f"""Here is a JSON template. It contains only the fields that need updating:
 
28
  {word_json}
29
 
30
  Here is the extracted text from a PDF:
 
31
  {pdf_txt}
32
 
33
  Instructions:
@@ -35,9 +37,14 @@ Instructions:
35
  - DO NOT add any extra fields, and do not change the JSON structure.
36
  - Update ALL nested sections properly (like "Operator Declaration" with its "Print Name" and "Position Title")
37
  - Make sure to update both the main sections AND the flattened keys (like "Operator Declaration.Print Name")
 
 
 
 
 
38
  - Output ONLY the updated JSON, as raw JSON (no markdown, no extra text, no greetings).
39
  - Make sure the JSON is valid and ready to use.
40
- - Pay special attention to updating operator names, auditor names, and all personal details consistently throughout all sections."""
41
 
42
  # --- Call OpenAI API ---
43
  api_key = os.environ.get("OPENAI_API_KEY")
 
25
 
26
  # --- Build prompt ---
27
  user_prompt = f"""Here is a JSON template. It contains only the fields that need updating:
28
+
29
  {word_json}
30
 
31
  Here is the extracted text from a PDF:
32
+
33
  {pdf_txt}
34
 
35
  Instructions:
 
37
  - DO NOT add any extra fields, and do not change the JSON structure.
38
  - Update ALL nested sections properly (like "Operator Declaration" with its "Print Name" and "Position Title")
39
  - Make sure to update both the main sections AND the flattened keys (like "Operator Declaration.Print Name")
40
+ - For Operator Declaration specifically:
41
+ * Print Name should be the actual person's name (e.g., "Jeff Nitschke")
42
+ * Position Title should be their job role (e.g., "Director", "Manager", "Owner")
43
+ - Pay special attention to signatures and declarations - extract the person's name and their position/title
44
+ - Look for patterns like "Name - Position" or "Name, Position" in signature areas
45
  - Output ONLY the updated JSON, as raw JSON (no markdown, no extra text, no greetings).
46
  - Make sure the JSON is valid and ready to use.
47
+ - Update operator names, auditor names, and all personal details consistently throughout all sections."""
48
 
49
  # --- Call OpenAI API ---
50
  api_key = os.environ.get("OPENAI_API_KEY")