Shami96 commited on
Commit
f9fae18
·
verified ·
1 Parent(s): ac57db8

Update update_docx_with_pdf.py

Browse files
Files changed (1) hide show
  1. update_docx_with_pdf.py +15 -18
update_docx_with_pdf.py CHANGED
@@ -9,15 +9,12 @@ def update_json_with_pdf(word_json_file, pdf_txt_file, output_file):
9
  with open(pdf_txt_file, "r", encoding="utf-8") as f:
10
  pdf_txt = f.read()
11
 
12
-
13
- # --- Build prompt ---
14
- user_prompt = f"""
15
  Here is a JSON template. It contains only the fields that need updating:
16
  {word_json}
17
-
18
  Here is the extracted text from a PDF:
19
  {pdf_txt}
20
-
21
  Instructions:
22
  - ONLY update the fields present in the JSON template, using information from the PDF text.
23
  - DO NOT add any extra fields, and do not change the JSON structure.
@@ -25,7 +22,7 @@ Instructions:
25
  - Make sure the JSON is valid and ready to use.
26
  """
27
 
28
- # --- Call OpenAI API ---
29
  api_key = os.environ.get("OPENAI_API_KEY")
30
  if not api_key:
31
  raise RuntimeError("OPENAI_API_KEY not found in environment variables!")
@@ -40,18 +37,18 @@ Instructions:
40
  temperature=0
41
  )
42
 
43
- updated_json_str = response.choices[0].message.content.strip()
44
-
45
- # --- Try to parse as JSON ---
46
- try:
47
- parsed = json.loads(updated_json_str)
48
- with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
49
- json.dump(parsed, f, indent=2, ensure_ascii=False)
50
- print("✅ JSON updated and saved to", OUTPUT_FILE)
51
- except Exception as e:
52
- print("⚠️ Model did not return valid JSON. Raw output below:\n")
53
- print(updated_json_str)
54
- print("\n❌ Failed to parse updated JSON:", e)
55
 
56
  if __name__ == "__main__":
57
  import sys
 
9
  with open(pdf_txt_file, "r", encoding="utf-8") as f:
10
  pdf_txt = f.read()
11
 
12
+ # --- Build prompt ---
13
+ user_prompt = f"""
 
14
  Here is a JSON template. It contains only the fields that need updating:
15
  {word_json}
 
16
  Here is the extracted text from a PDF:
17
  {pdf_txt}
 
18
  Instructions:
19
  - ONLY update the fields present in the JSON template, using information from the PDF text.
20
  - DO NOT add any extra fields, and do not change the JSON structure.
 
22
  - Make sure the JSON is valid and ready to use.
23
  """
24
 
25
+ # --- Call OpenAI API ---
26
  api_key = os.environ.get("OPENAI_API_KEY")
27
  if not api_key:
28
  raise RuntimeError("OPENAI_API_KEY not found in environment variables!")
 
37
  temperature=0
38
  )
39
 
40
+ updated_json_str = response.choices[0].message.content.strip()
41
+
42
+ # --- Try to parse as JSON ---
43
+ try:
44
+ parsed = json.loads(updated_json_str)
45
+ with open(output_file, "w", encoding="utf-8") as f:
46
+ json.dump(parsed, f, indent=2, ensure_ascii=False)
47
+ print("✅ JSON updated and saved to", output_file)
48
+ except Exception as e:
49
+ print("⚠️ Model did not return valid JSON. Raw output below:\n")
50
+ print(updated_json_str)
51
+ print("\n❌ Failed to parse updated JSON:", e)
52
 
53
  if __name__ == "__main__":
54
  import sys