omdivyatej's picture
change
d3e5614
# app.py
import gradio as gr
import pandas as pd # Import pandas
from ocr_request import ocr_request
import io
def process_file(files):
response_arr = []
# Send the uploaded file to the function from ocr_request.py
for file in files:
response = ocr_request(file.name)
response_arr.append(response)
print("Main file :", response_arr)
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
flat_list = []
for item in response_arr:
invoice_number = item['invoice_number']
# Extracting product descriptions
products = item.get('predictions', []) or item.get('product_description', [])
for product in products:
# Rename 'description' key to 'product_description' for uniformity across all products
product_description = product.get('product_description', product.get('description'))
predicted_material = product['predicted_material']
confidence = product['confidence']
flat_list.append({
'invoice_number': invoice_number,
'product_description': product_description,
'predicted_material': predicted_material,
'confidence': confidence
})
df = pd.DataFrame(flat_list)
print("Df final : ", df)
# Save the dataframe to a CSV in-memory
result_csv = df.to_csv(index=False)
csv_filename = "categories.csv"
with open(csv_filename, "w") as f:
f.write(result_csv)
return df,csv_filename # Gradio will display this as a table
interface = gr.Interface(fn=process_file,
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
interface.launch(share=True)