DSatishchandra commited on
Commit
ac7dc42
1 Parent(s): 331fc60

Create parse_bhel.py

Browse files
Files changed (1) hide show
  1. parse_bhel.py +20 -0
parse_bhel.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import pandas as pd
3
+
4
+ def parse_bhel_pdf(pdf_path):
5
+ columns = [
6
+ "Purchase Order No", "Date", "Sl No", "Material Description",
7
+ "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"
8
+ ]
9
+ data = []
10
+
11
+ with pdfplumber.open(pdf_path) as pdf:
12
+ for page in pdf.pages:
13
+ table = page.extract_table()
14
+ if table:
15
+ for row in table[1:]: # Skip header row
16
+ data.append(row)
17
+
18
+ # Create a DataFrame
19
+ df = pd.DataFrame(data, columns=columns)
20
+ return df