File size: 1,411 Bytes
c161b3b
 
 
81ebdf3
c161b3b
 
81ebdf3
c161b3b
 
 
 
1662e26
 
 
c161b3b
1662e26
c161b3b
 
 
 
 
 
 
 
 
 
 
 
 
81ebdf3
 
 
c161b3b
81ebdf3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
######################################## IMPORTING REQUIRED LIBRARIES ####################################
import os
import sys
import pandas as pd
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
from utilities import get_data, input_filter, clean_data, autogenerate_labels


################################################## INPUTS ################################################

# left_lat = 18.889833
# left_lon = 72.779844
# dist = 35

def data_sourcing(left_lat, left_lon, dist):
    lat, lon = input_filter(lat = left_lat, lon=left_lon)
    df = get_data(lat, lon, dist)
    df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
    return df

def data_clean_for_training(df):
    df = clean_data(df)
    df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
    return df


if __name__ == '__main__':

    # df = data_sourcing() ## testing the data sourcing endpoint
    # if df:
    #     print("Data loaded successfully !!")

    # clean_df = data_clean_for_training(df)

    df = pd.read_csv(f'{data_folder}/MMR_DATA.csv')
    df = clean_data(df)
    labelled_df, embeddings_df = autogenerate_labels(df)

    labelled_df.to_csv(f'{data_folder}/MMR_DATA_CLEAN_LABELLED.csv', index=False)
    embeddings_df.to_csv(f'{data_folder}/MMR_CLEAN_EMBEDDINGS.csv', index=False)