Spaces:
Running
Running
Update update_docx_with_pdf.py
Browse files- update_docx_with_pdf.py +8 -1
update_docx_with_pdf.py
CHANGED
|
@@ -25,9 +25,11 @@ def update_json_with_pdf(word_json_file, pdf_txt_file, output_file):
|
|
| 25 |
|
| 26 |
# --- Build prompt ---
|
| 27 |
user_prompt = f"""Here is a JSON template. It contains only the fields that need updating:
|
|
|
|
| 28 |
{word_json}
|
| 29 |
|
| 30 |
Here is the extracted text from a PDF:
|
|
|
|
| 31 |
{pdf_txt}
|
| 32 |
|
| 33 |
Instructions:
|
|
@@ -35,9 +37,14 @@ Instructions:
|
|
| 35 |
- DO NOT add any extra fields, and do not change the JSON structure.
|
| 36 |
- Update ALL nested sections properly (like "Operator Declaration" with its "Print Name" and "Position Title")
|
| 37 |
- Make sure to update both the main sections AND the flattened keys (like "Operator Declaration.Print Name")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
- Output ONLY the updated JSON, as raw JSON (no markdown, no extra text, no greetings).
|
| 39 |
- Make sure the JSON is valid and ready to use.
|
| 40 |
-
-
|
| 41 |
|
| 42 |
# --- Call OpenAI API ---
|
| 43 |
api_key = os.environ.get("OPENAI_API_KEY")
|
|
|
|
| 25 |
|
| 26 |
# --- Build prompt ---
|
| 27 |
user_prompt = f"""Here is a JSON template. It contains only the fields that need updating:
|
| 28 |
+
|
| 29 |
{word_json}
|
| 30 |
|
| 31 |
Here is the extracted text from a PDF:
|
| 32 |
+
|
| 33 |
{pdf_txt}
|
| 34 |
|
| 35 |
Instructions:
|
|
|
|
| 37 |
- DO NOT add any extra fields, and do not change the JSON structure.
|
| 38 |
- Update ALL nested sections properly (like "Operator Declaration" with its "Print Name" and "Position Title")
|
| 39 |
- Make sure to update both the main sections AND the flattened keys (like "Operator Declaration.Print Name")
|
| 40 |
+
- For Operator Declaration specifically:
|
| 41 |
+
* Print Name should be the actual person's name (e.g., "Jeff Nitschke")
|
| 42 |
+
* Position Title should be their job role (e.g., "Director", "Manager", "Owner")
|
| 43 |
+
- Pay special attention to signatures and declarations - extract the person's name and their position/title
|
| 44 |
+
- Look for patterns like "Name - Position" or "Name, Position" in signature areas
|
| 45 |
- Output ONLY the updated JSON, as raw JSON (no markdown, no extra text, no greetings).
|
| 46 |
- Make sure the JSON is valid and ready to use.
|
| 47 |
+
- Update operator names, auditor names, and all personal details consistently throughout all sections."""
|
| 48 |
|
| 49 |
# --- Call OpenAI API ---
|
| 50 |
api_key = os.environ.get("OPENAI_API_KEY")
|