Spaces:

Alealejandrooo
/

MindBody_VS_Medserv

Running

App Files Files Community

Alealejandrooo commited on May 16

Commit

605feb9

•

1 Parent(s): 7bbe80f

u[dated main function

Browse files

Files changed (1) hide show

process.py +58 -72

process.py CHANGED Viewed

@@ -4,88 +4,74 @@ import re
 from datetime import timedelta
-def process_data(files_mindbody, files_medserv, tolerance, progress=gr.Progress()):
-    try:
-        mindbody = load_data(files_mindbody)
-        medserv = load_data(files_medserv)
-    except Exception as e:
-        print(f"An error occurred while loading data: {e}")
-        return None
-    try:
-        # Remove multiple commas from the 'Client' column
-        medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
-        mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
-        # Split 'Client' names into first name and last name components for both DataFrames
-        medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
-        mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
-    except Exception as e:
-        print(f"An error occurred while processing client names: {e}")
-    try:
-        # Split dates if they contain commas in the 'DOS' column of medserv
-        medserv['DOS'] = medserv['DOS'].astype(str)
-        medserv['DOS'] = medserv['DOS'].str.split(',')
-        medserv = medserv.explode('DOS')
-        # Attempt to convert dates using multiple formats
-        formats_to_try = ['%d/%m/%Y', '%Y-%m-%d']  # Add more formats as needed
-        for format_to_try in formats_to_try:
-            try:
-                medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
-                break  # Break out of loop if conversion succeeds
-            except ValueError:
-                continue  # Continue to next format if conversion fails
-    except Exception as e:
-        print(f"An error occurred while processing dates in medserv: {e}")
     unmatched_rows = []
-    try:
-        rows = len(mindbody)
-        # Iterate through each row in the mindbody DataFrame
-        for idx in progress.tdqm(range(rows), desc='Analyzing files...'):
-            # Extract relevant information from the current row
-            date = mindbody.iloc[idx]['DOS']
-            first_name = mindbody.iloc[idx]['First Name']
-            last_name = mindbody.iloc[idx]['Last Name']
-            # Define the range of dates to search for a match in medserv
-            date_range = [date - timedelta(days=i) for i in range(tolerance, -tolerance-1, -1)]
-            # Remove the time component from the dates in date_range
-            date_range = [d.date() for d in date_range]
-            # Filter medserv based on the date range and name criteria
-            matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
-                               ((medserv['First Name'].str.lower() == first_name.lower()) |
-                                (medserv['Last Name'].str.lower() == last_name.lower())))]
-            # If no match is found, append the row to the unmatched_rows list
-            if matches.empty:
-                unmatched_rows.append(mindbody.iloc[idx])
-    except Exception as e:
-        print(f"An error occurred while analyzing files: {e}")
-    try:
-        # Create a DataFrame from the unmatched_rows list
-        unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
-        # Specify the columns to include in the output Excel file
-        columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
-        # Format the 'DOS' column to remove time part
-        unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
-        output_file_path = 'Comparison Results.xlsx'
-        unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
-        return output_file_path
-    except Exception as e:
-        print(f"An error occurred while creating the output file: {e}")
-        return None

 from datetime import timedelta
+def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
+    mindbody = load_data(files_mindbody)
+    medserv = load_data(files_medserv)
+    medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
+    mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
+    # Split 'Client' names into first name and last name components for both DataFrames
+    medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
+    mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
+    mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
+      # Split dates if they contain commas in the 'DOS' column of medserv
+    medserv['DOS'] = medserv['DOS'].astype(str)
+    medserv['DOS'] = medserv['DOS'].str.split(',')
+    medserv = medserv.explode('DOS')
+    # Attempt to convert dates using multiple formats
+    formats_to_try = ['%d/%m/%Y', '%Y-%m-%d']  # Add more formats as needed
+    for format_to_try in formats_to_try:
+        try:
+            medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
+            break  # Break out of loop if conversion succeeds
+        except ValueError:
+            continue  # Continue to next format if conversion fails
+    # Initialize an empty list to store unmatched rows
     unmatched_rows = []
+    rows = len(mindbody)
+    # Iterate through each row in the mindbody DataFrame
+    for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
+        # Extract relevant information from the current row
+        date = mindbody.iloc[idx]['DOS']
+        first_name = mindbody.iloc[idx]['First Name']
+        last_name = mindbody.iloc[idx]['Last Name']
+        # Define the range of dates to search for a match in medserv
+        date_range = [date - timedelta(days=i) for i in range(tollerance, -tollerance-1, -1)]
+        # Remove the time component from the dates in date_range
+        date_range = [d.date() for d in date_range]
+        # Filter medserv based on the date range and name criteria
+        matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
+                        ((medserv['First Name'].str.lower() == first_name.lower()) |
+                            (medserv['Last Name'].str.lower() == last_name.lower())))]
+        # If no match is found, append the row to the unmatched_rows list
+        if matches.empty:
+            unmatched_rows.append(mindbody.iloc[idx])
+    # Create a DataFrame from the unmatched_rows list
+    unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
+    # Specify the columns to include in the output Excel file
+    columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
+    # Format the 'DOS' column to remove time part
+    unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
+    output_file_path = 'Comparison Results.xlsx'
+    unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
+    return output_file_path