Spaces:
Sleeping
Sleeping
File size: 1,411 Bytes
c161b3b 81ebdf3 c161b3b 81ebdf3 c161b3b 1662e26 c161b3b 1662e26 c161b3b 81ebdf3 c161b3b 81ebdf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
######################################## IMPORTING REQUIRED LIBRARIES ####################################
import os
import sys
import pandas as pd
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
from utilities import get_data, input_filter, clean_data, autogenerate_labels
################################################## INPUTS ################################################
# left_lat = 18.889833
# left_lon = 72.779844
# dist = 35
def data_sourcing(left_lat, left_lon, dist):
lat, lon = input_filter(lat = left_lat, lon=left_lon)
df = get_data(lat, lon, dist)
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
return df
def data_clean_for_training(df):
df = clean_data(df)
df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
return df
if __name__ == '__main__':
# df = data_sourcing() ## testing the data sourcing endpoint
# if df:
# print("Data loaded successfully !!")
# clean_df = data_clean_for_training(df)
df = pd.read_csv(f'{data_folder}/MMR_DATA.csv')
df = clean_data(df)
labelled_df, embeddings_df = autogenerate_labels(df)
labelled_df.to_csv(f'{data_folder}/MMR_DATA_CLEAN_LABELLED.csv', index=False)
embeddings_df.to_csv(f'{data_folder}/MMR_CLEAN_EMBEDDINGS.csv', index=False) |