POExtraction_UC3 / parse_perfect_metaprint.py
DSatishchandra's picture
Create parse_perfect_metaprint.py
0be1cdb verified
raw
history blame
623 Bytes
import pdfplumber
import pandas as pd
def parse_perfect_metaprint(pdf_path):
columns = [
"Purchase Order No", "Purchase Order Date", "S. No", "Material No",
"Material Description", "Qty", "Price", "Delivery Date", "Total Value",
"Vat%", "Amount Incl. VAT"
]
data = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
table = page.extract_table()
if table:
for row in table[1:]: # Skip header row
data.append(row)
# Create a DataFrame
df = pd.DataFrame(data, columns=columns)
return df