DSatishchandra commited on
Commit
615fc49
·
verified ·
1 Parent(s): 2928d72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -24
app.py CHANGED
@@ -1,34 +1,58 @@
1
- import gradio as gr
2
  import pandas as pd
3
- from BHEL import parse_bhel_pdf
4
- from FederalElectric import parse_federal_electric_pdf
5
- from ALNISF import parse_alnisf_pdf
6
-
7
- def process_pdf(file, format_type):
8
- # Select the appropriate parser based on format type
9
- if format_type == "BHEL.py":
10
- df = parse_bhel_pdf(file.name)
11
- elif format_type == "Federal Electric.py":
12
- df = parse_federal_electric_pdf(file.name)
13
- elif format_type == "AL-NISF":
14
- df = parse_alnisf_pdf(file.name)
15
- else:
16
- return "Unsupported format selected", None
17
 
18
- # Save the DataFrame to an Excel file
19
- output_file = f"{format_type}_Data.xlsx"
20
- df.to_excel(output_file, index=False)
21
 
22
- return output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Gradio Interface
25
  iface = gr.Interface(
26
- fn=process_pdf,
27
  inputs=[
28
- gr.File(label="Upload PDF"),
29
- gr.Dropdown(choices=["BHEL.py", "Federal Electric.py", "AL-NISF"], label="Select Format")
30
  ],
31
- outputs=gr.File(label="Download Excel")
 
 
32
  )
33
 
34
  if __name__ == "__main__":
 
1
+ import pdfplumber
2
  import pandas as pd
3
+ import re
4
+ import gradio as gr
5
+
6
+ # Individual processing functions
7
+ def process_bhel(pdf_file):
8
+ # Place the BHEL-specific code logic here
9
+ return process_common(pdf_file, "BHEL Output.xlsx")
 
 
 
 
 
 
 
10
 
11
+ def process_federal_electric(pdf_file):
12
+ # Place the Federal Electric-specific code logic here
13
+ return process_common(pdf_file, "Federal Electric Output.xlsx")
14
 
15
+ def process_al_nisf(pdf_file):
16
+ # Place the AL-NISF-specific code logic here
17
+ return process_common(pdf_file, "AL-NISF Output.xlsx")
18
+
19
+ def process_common(pdf_file, output_name):
20
+ """
21
+ Generalized function for processing PDFs
22
+ """
23
+ # Replace this with common or specific processing logic
24
+ with pdfplumber.open(pdf_file.name) as pdf:
25
+ text = ""
26
+ for page in pdf.pages:
27
+ text += page.extract_text()
28
+
29
+ # Example: Create dummy DataFrame
30
+ data = {"Text": text}
31
+ df = pd.DataFrame([data])
32
+ df.to_excel(output_name, index=False)
33
+ return output_name
34
+
35
+ # Dropdown processing function
36
+ def main_process(pdf_file, format_choice):
37
+ if format_choice == "BHEL":
38
+ return process_bhel(pdf_file)
39
+ elif format_choice == "Federal Electric":
40
+ return process_federal_electric(pdf_file)
41
+ elif format_choice == "AL-NISF":
42
+ return process_al_nisf(pdf_file)
43
+ else:
44
+ return None
45
 
46
+ # Gradio interface
47
  iface = gr.Interface(
48
+ fn=main_process,
49
  inputs=[
50
+ gr.File(label="Upload PDF", file_types=[".pdf"]),
51
+ gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format")
52
  ],
53
+ outputs=gr.File(label="Download Processed File"),
54
+ title="Consolidated PO Data Extractor",
55
+ description="Select the format and upload a PDF to extract and download the data."
56
  )
57
 
58
  if __name__ == "__main__":