Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pdfplumber | |
| import pandas as pd | |
| import os | |
| # Define the path for input and output | |
| input_file_path = '/mnt/data/extracted_data (1).xlsx' | |
| output_file_path = '/mnt/data/filtered_positions_10_to_450.xlsx' | |
| # Ensure the directory exists | |
| if not os.path.exists('/mnt/data/'): | |
| os.makedirs('/mnt/data/') | |
| # Check if the file exists | |
| if os.path.exists(input_file_path): | |
| # Read the Excel file if it exists | |
| df = pd.read_excel(input_file_path) | |
| print("File loaded successfully.") | |
| else: | |
| print(f"File not found: {input_file_path}. Please ensure the file is in the directory.") | |
| # Check if directory exists and list files | |
| directory_path = "/mnt/data/" | |
| if os.path.exists(directory_path): | |
| files = os.listdir(directory_path) | |
| print("Files in /mnt/data/:", files) | |
| else: | |
| os.makedirs(directory_path) | |
| print("Directory created:", directory_path) | |
| # Define the function to extract data from PDF | |
| def extract_data(pdf_file_path, start_pos, end_pos): | |
| try: | |
| # Load and process the PDF | |
| with pdfplumber.open(pdf_file_path) as pdf: | |
| data = [] | |
| for page in pdf.pages: | |
| text = page.extract_text() | |
| if text is None: | |
| return "Error: Could not extract text from the PDF. Please check the file format." | |
| print("Extracted Text:", text) # Debugging line | |
| # Example extracted data structure | |
| extracted_data = { | |
| "Pos": [10, 20, 30], | |
| "Item Code": ["155569003011", "155569003012", "155569003013"], | |
| "Quantity": [10, 10, 10], | |
| "Basic Price": [57.66, 57.66, 57.66], | |
| "Sub Total": [576.60, 576.60, 576.60] | |
| } | |
| # Convert to DataFrame and save to Excel | |
| df = pd.DataFrame(extracted_data) | |
| output_path = "/mnt/data/extracted_data.xlsx" | |
| df.to_excel(output_path, index=False) | |
| if os.path.exists(output_path): | |
| print("File saved successfully:", output_path) | |
| return output_path | |
| else: | |
| return "Error: Failed to save the Excel file." | |
| except Exception as e: | |
| print("Error encountered:", str(e)) | |
| return f"Error: {e}" | |
| # Set up Gradio interface | |
| interface = gr.Interface( | |
| fn=extract_data, | |
| inputs=[ | |
| gr.File(type="filepath", label="Upload PDF File"), | |
| gr.Number(value=10, label="Start Position"), | |
| gr.Number(value=450, label="End Position") | |
| ], | |
| outputs=gr.File(label="Download Extracted Excel") | |
| ) | |
| # Additional Excel filtering logic | |
| if os.path.exists(input_file_path): | |
| df = pd.read_excel(input_file_path) | |
| # Filter for positions between 10 and 450 | |
| filtered_df = df[(df['Pos'] >= 10) & (df['Pos'] <= 450)] | |
| filtered_df.to_excel(output_file_path, index=False) | |
| print(f"Filtered data saved to: {output_file_path}") | |
| else: | |
| print(f"Input file not found: {input_file_path}. Skipping filtering.") | |
| interface.launch() | |