dschandra commited on
Commit
5224ad4
·
verified ·
1 Parent(s): b9789b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -46,7 +46,7 @@ def format_description(description):
46
  """
47
  # Extract parts of the description based on the expected structure
48
  line1_match = re.search(r"Stainless Steel RATING AND DIAGRAM PLATE", description)
49
- line2_match = re.search(r"As per Drg\.No\..*?\d+", description)
50
  line3_match = re.search(r"SIZE\s*:\s*\d+mm\s*X\s*\d+mm\s*X\s*[\d.]+mm\s*Thick", description)
51
  line4_match = re.search(r"With Serial No:.*", description)
52
 
@@ -55,11 +55,11 @@ def format_description(description):
55
  if line1_match:
56
  formatted_description.append(line1_match.group())
57
  if line2_match:
58
- formatted_description.append(line2_match.group())
59
  if line3_match:
60
- formatted_description.append(line3_match.group())
61
  if line4_match:
62
- formatted_description.append(line4_match.group())
63
 
64
  # Join the lines with a newline character
65
  return "\n".join(formatted_description)
@@ -151,6 +151,7 @@ def parse_po_items_with_filters(text):
151
  return df, "Data extracted successfully."
152
 
153
 
 
154
  # Function: Save to Excel
155
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
156
  df.to_excel(output_path, index=False)
 
46
  """
47
  # Extract parts of the description based on the expected structure
48
  line1_match = re.search(r"Stainless Steel RATING AND DIAGRAM PLATE", description)
49
+ line2_match = re.search(r"As per Drg\.No\..*?[A-Z0-9]+\s", description)
50
  line3_match = re.search(r"SIZE\s*:\s*\d+mm\s*X\s*\d+mm\s*X\s*[\d.]+mm\s*Thick", description)
51
  line4_match = re.search(r"With Serial No:.*", description)
52
 
 
55
  if line1_match:
56
  formatted_description.append(line1_match.group())
57
  if line2_match:
58
+ formatted_description.append(line2_match.group().strip())
59
  if line3_match:
60
+ formatted_description.append(line3_match.group().strip())
61
  if line4_match:
62
+ formatted_description.append(line4_match.group().strip())
63
 
64
  # Join the lines with a newline character
65
  return "\n".join(formatted_description)
 
151
  return df, "Data extracted successfully."
152
 
153
 
154
+
155
  # Function: Save to Excel
156
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
157
  df.to_excel(output_path, index=False)