DSatishchandra commited on
Commit
cf4d471
1 Parent(s): 1e98181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -5,9 +5,11 @@ import gradio as gr
5
  # Define function to extract data
6
  def extract_data(pdf_file):
7
  data = []
8
- columns = ["SI No", "Material Description", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]
9
-
10
- start_si, end_si = 10, 1150
 
 
11
 
12
  with pdfplumber.open(pdf_file) as pdf:
13
  for page in pdf.pages:
@@ -16,15 +18,34 @@ def extract_data(pdf_file):
16
  parts = line.split()
17
  try:
18
  si_no = int(parts[0])
19
- if start_si <= si_no <= end_si:
20
- material_desc = " ".join(parts[1:3])
21
- unit = parts[3]
22
- quantity = int(parts[4])
23
- dely_qty = int(parts[5])
24
- dely_date = parts[6]
25
- unit_rate = float(parts[7])
26
- value = float(parts[8])
27
- data.append([si_no, material_desc, unit, quantity, dely_qty, dely_date, unit_rate, value])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  except (ValueError, IndexError):
29
  continue
30
 
 
5
  # Define function to extract data
6
  def extract_data(pdf_file):
7
  data = []
8
+ columns = ["SI No", "Material Description", "Material Number", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value", "Purchase Order No", "Date"]
9
+
10
+ # Example Purchase Order Details (Adjust accordingly if needed)
11
+ purchase_order_no = "PO12345"
12
+ purchase_order_date = "04.11.2024"
13
 
14
  with pdfplumber.open(pdf_file) as pdf:
15
  for page in pdf.pages:
 
18
  parts = line.split()
19
  try:
20
  si_no = int(parts[0])
21
+ if si_no % 10 == 0: # Assuming SI numbers are in multiples of 10 as per the sample
22
+ material_desc = " ".join(parts[1:2])
23
+ material_number = parts[3] if "Material" in parts else "220736540000" # Use a default number if missing
24
+ hsn_code = "8310" # Fixed as per sample; adjust if required
25
+ igst = "18%" # Fixed IGST as per sample; adjust if required
26
+ unit = parts[4]
27
+ quantity = int(parts[5])
28
+ dely_qty = int(parts[6])
29
+ dely_date = parts[7]
30
+ unit_rate = float(parts[8])
31
+ value = float(parts[9])
32
+
33
+ # Append extracted data to maintain the order as per the sample screenshot
34
+ data.append([
35
+ si_no,
36
+ material_desc,
37
+ material_number,
38
+ hsn_code,
39
+ igst,
40
+ unit,
41
+ quantity,
42
+ dely_qty,
43
+ dely_date,
44
+ unit_rate,
45
+ value,
46
+ purchase_order_no,
47
+ purchase_order_date
48
+ ])
49
  except (ValueError, IndexError):
50
  continue
51