Spaces:
Runtime error
Runtime error
DSatishchandra
commited on
Update parse_bhel.py
Browse files- parse_bhel.py +5 -5
parse_bhel.py
CHANGED
@@ -2,9 +2,9 @@ import gradio as gr
|
|
2 |
import pdfplumber
|
3 |
import pandas as pd
|
4 |
|
5 |
-
def parse_bhel_pdf(
|
6 |
# Open the uploaded PDF file
|
7 |
-
with pdfplumber.open(
|
8 |
data = []
|
9 |
for page in pdf.pages:
|
10 |
text = page.extract_text()
|
@@ -29,15 +29,15 @@ def parse_bhel_pdf(pdf_file_path):
|
|
29 |
df = pd.DataFrame(data)
|
30 |
return df
|
31 |
|
32 |
-
def gradio_interface(
|
33 |
# Parse the PDF file and return the extracted table as an HTML table
|
34 |
-
df = parse_bhel_pdf(
|
35 |
return df.to_html()
|
36 |
|
37 |
# Gradio interface
|
38 |
gr.Interface(
|
39 |
fn=gradio_interface,
|
40 |
-
inputs=gr.File(type="
|
41 |
outputs="html",
|
42 |
title="BHEL PDF Data Extractor",
|
43 |
description="Upload a BHEL PDF file to extract structured data in a tabular format."
|
|
|
2 |
import pdfplumber
|
3 |
import pandas as pd
|
4 |
|
5 |
+
def parse_bhel_pdf(pdf_file):
|
6 |
# Open the uploaded PDF file
|
7 |
+
with pdfplumber.open(pdf_file) as pdf:
|
8 |
data = []
|
9 |
for page in pdf.pages:
|
10 |
text = page.extract_text()
|
|
|
29 |
df = pd.DataFrame(data)
|
30 |
return df
|
31 |
|
32 |
+
def gradio_interface(pdf_file):
|
33 |
# Parse the PDF file and return the extracted table as an HTML table
|
34 |
+
df = parse_bhel_pdf(pdf_file.name)
|
35 |
return df.to_html()
|
36 |
|
37 |
# Gradio interface
|
38 |
gr.Interface(
|
39 |
fn=gradio_interface,
|
40 |
+
inputs=gr.File(type="file", label="Upload PDF File"),
|
41 |
outputs="html",
|
42 |
title="BHEL PDF Data Extractor",
|
43 |
description="Upload a BHEL PDF file to extract structured data in a tabular format."
|