import pandas as pd import gradio as gr def find_postcode_column(df): # UK Gov postcode regex postcode_pattern = r"([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\s?[0-9][A-Za-z]{2})" max_count = 0 postcode_column = None for column in df.columns: # Count matches of the postcode pattern in each column matches = df[column].astype(str).str.match(postcode_pattern) valid_count = matches.sum() # Sum of True values indicating valid postcodes # Select the column with the maximum count of valid postcodes if valid_count > max_count: max_count = valid_count postcode_column = column return postcode_column def get_lat_lon(postcodes_df, postcode_mapping): try: # Attempt to identify the postcode column dynamically postcode_column = find_postcode_column(postcodes_df) if not postcode_column: raise gr.Error("No valid postcode column found") # Rename columns for consistency postcode_mapping.rename(columns={'postcode': 'Postal code'}, inplace=True) # Normalize postcodes to ensure matching and count occurrences postcodes_df[postcode_column] = postcodes_df[postcode_column].str.lower().str.replace(' ', '') postcode_counts = postcodes_df[postcode_column].value_counts().reset_index() postcode_counts.columns = ['Postal code', 'count'] # Normalize the postcodes in the mapping DataFrame postcode_mapping['Postal code'] = postcode_mapping['Postal code'].str.lower().str.replace(' ', '') # Merge the counts with the mapping data result_df = pd.merge(postcode_counts, postcode_mapping, on='Postal code', how='left') # Fill NaN values for latitude and longitude where postcode was not found in the mapping result_df['latitude'] = result_df['latitude'].fillna('') result_df['longitude'] = result_df['longitude'].fillna('') # Optionally, convert the DataFrame to a dictionary if needed, or work directly with the DataFrame results = result_df.to_dict(orient='records') except Exception as e: raise gr.Error("Error processing postal codes: " + str(e)) return results