Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,16 +20,28 @@ def extract_tables_with_pdfplumber(file):
|
|
| 20 |
return tables
|
| 21 |
|
| 22 |
def process_tables(tables):
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
table1_df = pd.concat(table1_data, ignore_index=True)
|
| 26 |
table1_filename = "table1.csv"
|
| 27 |
table1_df.to_csv(table1_filename, index=False)
|
| 28 |
|
| 29 |
-
# Extract the last table assumed to be Table 2
|
| 30 |
-
|
| 31 |
table2_filename = "table2.csv"
|
| 32 |
-
|
| 33 |
|
| 34 |
# Prepare context JSON with detailed context
|
| 35 |
context = {
|
|
@@ -45,8 +57,8 @@ def process_tables(tables):
|
|
| 45 |
'description': 'Table 2 extracted from the last page',
|
| 46 |
'pages': tables[-1]['page_number'],
|
| 47 |
'csv_path': table2_filename,
|
| 48 |
-
'rows':
|
| 49 |
-
'columns':
|
| 50 |
}
|
| 51 |
}
|
| 52 |
|
|
|
|
| 20 |
return tables
|
| 21 |
|
| 22 |
def process_tables(tables):
|
| 23 |
+
def create_dataframe(table_data):
|
| 24 |
+
headers = table_data[0]
|
| 25 |
+
# Make headers unique if they are not
|
| 26 |
+
unique_headers = []
|
| 27 |
+
for header in headers:
|
| 28 |
+
if header in unique_headers:
|
| 29 |
+
new_header = f"{header}_{unique_headers.count(header)}"
|
| 30 |
+
unique_headers.append(new_header)
|
| 31 |
+
else:
|
| 32 |
+
unique_headers.append(header)
|
| 33 |
+
return pd.DataFrame(table_data[1:], columns=unique_headers)
|
| 34 |
+
|
| 35 |
+
# Extract and process the tables into DataFrames
|
| 36 |
+
table1_data = [create_dataframe(t['data']) for t in tables[:3]]
|
| 37 |
table1_df = pd.concat(table1_data, ignore_index=True)
|
| 38 |
table1_filename = "table1.csv"
|
| 39 |
table1_df.to_csv(table1_filename, index=False)
|
| 40 |
|
| 41 |
+
# Extract and process the last table assumed to be Table 2
|
| 42 |
+
table2_df = create_dataframe(tables[-1]['data'])
|
| 43 |
table2_filename = "table2.csv"
|
| 44 |
+
table2_df.to_csv(table2_filename, index=False)
|
| 45 |
|
| 46 |
# Prepare context JSON with detailed context
|
| 47 |
context = {
|
|
|
|
| 57 |
'description': 'Table 2 extracted from the last page',
|
| 58 |
'pages': tables[-1]['page_number'],
|
| 59 |
'csv_path': table2_filename,
|
| 60 |
+
'rows': table2_df.shape[0],
|
| 61 |
+
'columns': table2_df.shape[1]
|
| 62 |
}
|
| 63 |
}
|
| 64 |
|