######################################## IMPORTING REQUIRED LIBRARIES #################################### import os import sys import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') from utilities import get_data, input_filter, clean_data, autogenerate_labels ################################################## INPUTS ################################################ # left_lat = 18.889833 # left_lon = 72.779844 # dist = 35 def data_sourcing(left_lat, left_lon, dist): lat, lon = input_filter(lat = left_lat, lon=left_lon) df = get_data(lat, lon, dist) df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False) return df def data_clean_for_training(df): df = clean_data(df) df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False) return df if __name__ == '__main__': # df = data_sourcing() ## testing the data sourcing endpoint # if df: # print("Data loaded successfully !!") # clean_df = data_clean_for_training(df) df = pd.read_csv(f'{data_folder}/MMR_DATA.csv') df = clean_data(df) labelled_df, embeddings_df = autogenerate_labels(df) labelled_df.to_csv(f'{data_folder}/MMR_DATA_CLEAN_LABELLED.csv', index=False) embeddings_df.to_csv(f'{data_folder}/MMR_CLEAN_EMBEDDINGS.csv', index=False)