kmuthudurai commited on
Commit
f95c2fe
·
verified ·
1 Parent(s): a9a5999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -61
app.py CHANGED
@@ -62,67 +62,7 @@ def fetch_file_from_s3_file(file_key):
62
 
63
  # Function to summarize text using OpenAI GPT
64
  def summarize_text(text):
65
- system_prompt = """You are tasked with extracting and structuring all relevant information from an invoice in a standardized JSON format for storing invoice headers and line items. The invoice headers should include the following details:
66
-
67
- Vendor Information:
68
-
69
- Vendor Name
70
- Vendor Address
71
- Vendor GST No.
72
- Invoice Details:
73
-
74
- Invoice No./Bill No./Consecutive Serial No./Serial No. of Invoice/INVOICE → Considered as InvoiceNo.
75
- Invoice Date/Date/Date of Supply/Bill Date/Issuing Date/Dated → Considered as InvoiceDate (formatted as dd-MMM-yyyy).
76
- Invoice Currency/Currency
77
- Base Amount/Amount
78
- Tax Amount
79
- Total Invoice Amount
80
- Type of Invoice (e.g., "Tax Invoice", "Proforma Invoice", etc.)
81
- Billing Party Information:
82
-
83
- Invoice Party/Bill To Name/Sold-to-Party/Taxpayer Name/M/s./CB No./Buyer (Bill to)/Billing Party/Customer Name & Address/Name → Considered as BillToName.
84
- Invoice Party to / Bill To Address
85
- Invoice Party to / Bill To GST No.
86
- Shipping and References:
87
-
88
- MBL No./HBL No./Container No./Shipping Bill No./Shipper Invoice No./Manifest No./MAWB/HAWB/OBL No./Bill of Lading Number/REF/Ocean Bill of Lading/House Bill of Lading/BL No./Job No. → Considered as RefNo.
89
- Shipping Order
90
- You should extract this data and structure it into a table-like format in the following JSON format:
91
- {
92
- "invoice_headers": {
93
- "VendorName": "",
94
- "VendorAddress": "",
95
- "VendorGSTNo": "",
96
- "InvoiceNo": "",
97
- "InvoiceDate": "",
98
- "InvoiceCurrency": "",
99
- "BaseAmount": "",
100
- "TaxAmount": "",
101
- "TotalInvoiceAmt": "",
102
- "TypeofInvoice": "",
103
- "BillToName": "",
104
- "BillToAddress": "",
105
- "BillToGSTNO": "",
106
- "RefNo": "",
107
- "ShippingOrder": ""
108
- },
109
- "line_items": [
110
- {
111
- "Description": "",
112
- "TaxPercentage": "",
113
- "TaxAmount": "",
114
- "Amount": 0
115
- }
116
- ]
117
- }
118
- Guidelines for Processing:
119
-
120
- Ensure accurate extraction of data from the invoice by recognizing alternative naming conventions (e.g., Bill to, Taxpayer Name, etc.).
121
- Convert the Invoice Date to the specified dd-MMM-yyyy format.
122
- Use the correct currency and amounts for each invoice field.
123
- For each line item, provide the Description, Tax Percentage, Tax Amount, and Amount.
124
- If certain values are missing or not applicable, leave them empty or set them as null where necessary.
125
- This JSON format will be used to store and manage invoices in a structured and uniform way."""
126
  try:
127
  response = openai.ChatCompletion.create(
128
  model="gpt-4o-mini",
 
62
 
63
  # Function to summarize text using OpenAI GPT
64
  def summarize_text(text):
65
+ system_prompt = "You are a helpful assistant that summarizes extracted Invoice OCR text into JSON format always."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  try:
67
  response = openai.ChatCompletion.create(
68
  model="gpt-4o-mini",