DSatishchandra commited on
Commit
0be1cdb
1 Parent(s): dfc33ee

Create parse_perfect_metaprint.py

Browse files
Files changed (1) hide show
  1. parse_perfect_metaprint.py +21 -0
parse_perfect_metaprint.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import pandas as pd
3
+
4
+ def parse_perfect_metaprint(pdf_path):
5
+ columns = [
6
+ "Purchase Order No", "Purchase Order Date", "S. No", "Material No",
7
+ "Material Description", "Qty", "Price", "Delivery Date", "Total Value",
8
+ "Vat%", "Amount Incl. VAT"
9
+ ]
10
+ data = []
11
+
12
+ with pdfplumber.open(pdf_path) as pdf:
13
+ for page in pdf.pages:
14
+ table = page.extract_table()
15
+ if table:
16
+ for row in table[1:]: # Skip header row
17
+ data.append(row)
18
+
19
+ # Create a DataFrame
20
+ df = pd.DataFrame(data, columns=columns)
21
+ return df