DSatishchandra commited on
Commit
dfc33ee
·
verified ·
1 Parent(s): ac7dc42

Create parse_toshiba.py

Browse files
Files changed (1) hide show
  1. parse_toshiba.py +21 -0
parse_toshiba.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import pandas as pd
3
+
4
+ def parse_toshiba_pdf(pdf_path):
5
+ columns = [
6
+ "Purchase Order", "Order Date", "Pos", "Item Code",
7
+ "Description", "Unit", "Delivery Date",
8
+ "Quantity", "Basic Price", "Discount", "Cur", "Amount", "Sub Total"
9
+ ]
10
+ data = []
11
+
12
+ with pdfplumber.open(pdf_path) as pdf:
13
+ for page in pdf.pages:
14
+ table = page.extract_table()
15
+ if table:
16
+ for row in table[1:]: # Skip header row
17
+ data.append(row)
18
+
19
+ # Create a DataFrame
20
+ df = pd.DataFrame(data, columns=columns)
21
+ return df