File size: 2,132 Bytes
3d75a04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import pickle
import pandas as pd
import datetime
import sys

from multiprocessing import Pool

def get_labeled_data_as_df(path):
    trajectory_frames = []

    labelfile = os.path.join(path, "labels.txt")
    _label_df = pd.read_csv(labelfile,sep="\t",header=0,names=["starttime", "endtime", "mode"],parse_dates=[0,1])
    _label_df["startdate"] = _label_df["starttime"].dt.date
    _label_startdate_set = set(_label_df["startdate"])

    datapath = os.path.join(path, "Trajectory")
    for file in os.listdir(datapath):
        df = pd.read_csv(os.path.join(datapath,file),
                         sep=",",
                         header=None,
                         skiprows=6,
                         usecols=[0, 1, 3, 5, 6],
                         names=["lat", "lon", "altitude", "date", "time"])

        df["datetime"] = pd.to_datetime(df['date'] + ' ' + df['time'])
        date_of_traj = datetime.datetime.strptime(file[:8],"%Y%m%d").date()

        if date_of_traj in _label_startdate_set:
            labels_for_date = _label_df[_label_df["startdate"] == date_of_traj]

            def is_in(trajrow):
                for i, row in labels_for_date.iterrows():
                    if row["starttime"] <= trajrow["datetime"] <= row["endtime"]:
                        return row["mode"]

            df["label"] = df.apply(is_in, axis=1)

        trajectory_frames.append(df)
        print("added", datapath, file)
    return trajectory_frames

if __name__ == '__main__':
    '''if len(sys.argv) < 2:
        print("Usage: raw_data_loader.py /path/to/geolife/Data/")
        exit(-1)'''
    path = 'D:\Geolife Trajectories 1.3\Geolife Trajectories 1.3\Data'
    traj_with_labels_paths = []
    for file in os.listdir(path):
        currfile = os.path.join(path, file)
        if os.path.isdir(currfile):
            if "labels.txt" not in os.listdir(currfile):
                continue
            traj_with_labels_paths.append(currfile)

    with Pool(3) as p:
        traj_frames = p.map(get_labeled_data_as_df, traj_with_labels_paths)

    pickle.dump(traj_frames, open( "data/raw_labeled.pkl", "wb"))