Update app.py
Browse files
app.py
CHANGED
|
@@ -36,6 +36,35 @@ def clean_description(description, item_number=None):
|
|
| 36 |
|
| 37 |
return description.strip()
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def parse_po_items_with_filters(text):
|
| 40 |
"""
|
| 41 |
Parses purchase order items from the extracted text using regex with filters.
|
|
@@ -60,6 +89,7 @@ def parse_po_items_with_filters(text):
|
|
| 60 |
" ".join(description_accumulator).strip(),
|
| 61 |
item_number=int(current_item["Item"]),
|
| 62 |
)
|
|
|
|
| 63 |
data.append(current_item)
|
| 64 |
description_accumulator = []
|
| 65 |
|
|
@@ -94,27 +124,9 @@ def parse_po_items_with_filters(text):
|
|
| 94 |
" ".join(description_accumulator).strip(),
|
| 95 |
item_number=int(current_item["Item"]),
|
| 96 |
)
|
|
|
|
| 97 |
data.append(current_item)
|
| 98 |
|
| 99 |
-
# Handle item 3 split from item 2
|
| 100 |
-
for i, row in enumerate(data):
|
| 101 |
-
if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
|
| 102 |
-
item_3_description = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
|
| 103 |
-
if item_3_description:
|
| 104 |
-
data.insert(
|
| 105 |
-
i + 1,
|
| 106 |
-
{
|
| 107 |
-
"Item": "3",
|
| 108 |
-
"Description": item_3_description.group(),
|
| 109 |
-
"Qty": "12",
|
| 110 |
-
"Unit": "Nos.",
|
| 111 |
-
"Unit Price": "3.80",
|
| 112 |
-
"Total Price": "45.60",
|
| 113 |
-
},
|
| 114 |
-
)
|
| 115 |
-
# Remove the extracted portion from item 2's description
|
| 116 |
-
row["Description"] = row["Description"].replace(item_3_description.group(), "").strip()
|
| 117 |
-
|
| 118 |
# Clean specific patterns from item 7
|
| 119 |
for item in data:
|
| 120 |
if item["Item"] == "7":
|
|
@@ -139,8 +151,6 @@ def parse_po_items_with_filters(text):
|
|
| 139 |
return df, "Data extracted successfully."
|
| 140 |
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
# Function: Save to Excel
|
| 145 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
| 146 |
df.to_excel(output_path, index=False)
|
|
|
|
| 36 |
|
| 37 |
return description.strip()
|
| 38 |
|
| 39 |
+
def format_description(description):
|
| 40 |
+
"""
|
| 41 |
+
Formats the description into multiple lines based on predefined patterns.
|
| 42 |
+
Args:
|
| 43 |
+
description (str): Raw description string.
|
| 44 |
+
Returns:
|
| 45 |
+
str: Formatted description with line breaks.
|
| 46 |
+
"""
|
| 47 |
+
# Extract parts of the description based on the expected structure
|
| 48 |
+
line1_match = re.search(r"Stainless Steel RATING AND DIAGRAM PLATE", description)
|
| 49 |
+
line2_match = re.search(r"As per Drg\.No\..*?\d+", description)
|
| 50 |
+
line3_match = re.search(r"SIZE\s*:\s*\d+mm\s*X\s*\d+mm\s*X\s*[\d.]+mm\s*Thick", description)
|
| 51 |
+
line4_match = re.search(r"With Serial No:.*", description)
|
| 52 |
+
|
| 53 |
+
# Construct the formatted description
|
| 54 |
+
formatted_description = []
|
| 55 |
+
if line1_match:
|
| 56 |
+
formatted_description.append(line1_match.group())
|
| 57 |
+
if line2_match:
|
| 58 |
+
formatted_description.append(line2_match.group())
|
| 59 |
+
if line3_match:
|
| 60 |
+
formatted_description.append(line3_match.group())
|
| 61 |
+
if line4_match:
|
| 62 |
+
formatted_description.append(line4_match.group())
|
| 63 |
+
|
| 64 |
+
# Join the lines with a newline character
|
| 65 |
+
return "\n".join(formatted_description)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
def parse_po_items_with_filters(text):
|
| 69 |
"""
|
| 70 |
Parses purchase order items from the extracted text using regex with filters.
|
|
|
|
| 89 |
" ".join(description_accumulator).strip(),
|
| 90 |
item_number=int(current_item["Item"]),
|
| 91 |
)
|
| 92 |
+
current_item["Description"] = format_description(current_item["Description"])
|
| 93 |
data.append(current_item)
|
| 94 |
description_accumulator = []
|
| 95 |
|
|
|
|
| 124 |
" ".join(description_accumulator).strip(),
|
| 125 |
item_number=int(current_item["Item"]),
|
| 126 |
)
|
| 127 |
+
current_item["Description"] = format_description(current_item["Description"])
|
| 128 |
data.append(current_item)
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
# Clean specific patterns from item 7
|
| 131 |
for item in data:
|
| 132 |
if item["Item"] == "7":
|
|
|
|
| 151 |
return df, "Data extracted successfully."
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
| 154 |
# Function: Save to Excel
|
| 155 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
| 156 |
df.to_excel(output_path, index=False)
|