File size: 7,056 Bytes
3d75a04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import os
import pickle
from math import cos, sin, atan2

import numpy as np
from geopy import distance

class DataEnrich:

    def __init__(self):
        pass

    def _load_raw_pickle(self):
        return pickle.load(open("data/raw_labeled.pkl","rb"))

    def consolidate_trajectories(self):
        raw_dfs = self._load_raw_pickle()
        trajectories = []
        for traj_of_person in raw_dfs:
            dfs_with_label = []
            for traj in traj_of_person:
                if "label" in traj.columns:
                    traj = traj.replace(to_replace='None', value=np.nan).dropna()
                    traj.reset_index(inplace=True)
                    dfs_with_label.append(traj)
            if dfs_with_label:
                trajectories.extend(dfs_with_label)
        return trajectories

    def _calc_speed(self, distance, ts_a, ts_b):
        time_delta = ts_b - ts_a
        if time_delta.total_seconds() == 0:
            return 0
        return distance / time_delta.total_seconds()  # m/s

    def _calc_accel(self, speed_a, speed_b, ts_a, ts_b):
        time_delta = ts_b - ts_a
        speed_delta = speed_b - speed_a
        if time_delta.total_seconds() == 0:
            return 0
        return speed_delta / time_delta.total_seconds()  # m/s^2

    def _calc_jerk(self, acc_a, acc_b, ts_a, ts_b):
        time_delta = ts_b - ts_a
        acc_delta = acc_b - acc_a
        if time_delta.total_seconds() == 0:
            return 0
        return acc_delta / time_delta.total_seconds()

    def _calc_bearing_rate(self, bearing_a, bearing_b, ts_a, ts_b):
        time_delta = ts_b - ts_a
        bear_delta = bearing_b - bearing_a
        if time_delta.total_seconds() == 0:
            return 0
        return bear_delta / time_delta.total_seconds()

    def calc_dist_for_row(self, trajectory_frame, i):
        lat_1 = trajectory_frame["lat"][i-1]
        lat_2 = trajectory_frame["lat"][i]
        if lat_1 > 90:
            print("Faulty", lat_1)
            lat_1 /= 10
        if lat_2 > 90:
            print("Faulty", lat_2)
            lat_2 /= 10

        point_a = (lat_1, trajectory_frame["lon"][i-1])
        point_b = (lat_2, trajectory_frame["lon"][i])
        if point_a[0] == point_b[0] and point_a[1] == point_b[1]:
            trajectory_frame["dist"][i] = 0
        else:
            trajectory_frame["dist"][i] = distance.distance((point_a[0], point_a[1]), (point_b[0], point_b[1])).m

    def calc_speed_for_row(self, trajectory_frame, i):
        trajectory_frame["speed"][i] = self._calc_speed(trajectory_frame["dist"][i],
                                                        trajectory_frame["datetime"][i-1],
                                                        trajectory_frame["datetime"][i]
                                                        )

    def calc_accel_for_row(self, trajectory_frame, i):
        trajectory_frame["accel"][i] = self._calc_accel(trajectory_frame["speed"][i-1],
                                                        trajectory_frame["speed"][i],
                                                        trajectory_frame["datetime"][i - 1],
                                                        trajectory_frame["datetime"][i]
                                                        )

    def set_sample_rate(self, trajectory_frame, min_sec_distance_between_points):
        i = 1
        indices_to_del = []
        deleted = 1
        while i < len(trajectory_frame)-deleted:
            ts1 = trajectory_frame["datetime"][i]
            ts2 = trajectory_frame["datetime"][i+deleted]
            delta = ts2-ts1
            if delta.seconds < min_sec_distance_between_points:
                deleted+=1
                indices_to_del.append(i)
                continue
            i+=deleted
            deleted = 1
        if indices_to_del:
            trajectory_frame.drop(trajectory_frame.index[indices_to_del],inplace=True)
            trajectory_frame.reset_index(inplace=True)

    def set_time_between_points(self, trajectory_frame, i):
        trajectory_frame["timedelta"][i] = (trajectory_frame["datetime"][i]-trajectory_frame["datetime"][i-1]).total_seconds()

    def calc_jerk_for_row(self, trajectory_frame, i):
        trajectory_frame["jerk"][i] = self._calc_jerk(trajectory_frame["accel"][i - 1],
                                                        trajectory_frame["accel"][i],
                                                        trajectory_frame["datetime"][i - 1],
                                                        trajectory_frame["datetime"][i]
                                                        )

    def calc_bearing_for_row(self, trajectory_frame, i):
        a_lat = trajectory_frame["lat"][i - 1]
        a_lon = trajectory_frame["lon"][i - 1]
        b_lat = trajectory_frame["lat"][i]
        b_lon = trajectory_frame["lon"][i]
        x = cos(b_lat) * sin(b_lon-a_lon)
        y = cos(a_lat) * sin(b_lat) - sin(a_lat) * cos(b_lat) * cos(b_lon-a_lon)
        trajectory_frame["bearing"][i] = atan2(x, y)

    def calc_bearing_rate_for_row(self, trajectory_frame, i):
        trajectory_frame["bearing_rate"][i] = self._calc_bearing_rate(trajectory_frame["bearing"][i - 1],
                                                        trajectory_frame["bearing"][i],
                                                        trajectory_frame["datetime"][i - 1],
                                                        trajectory_frame["datetime"][i]
                                                        )

    def calc_features_for_frame(self, traj_frame):
        traj_frame["dist"] = 0
        traj_frame["timedelta"] = 0
        traj_frame["speed"] = 0
        traj_frame["accel"] = 0
        traj_frame["jerk"] = 0
        traj_frame["bearing"] = 0
        traj_frame["bearing_rate"] = 0

        for i, elem in traj_frame.iterrows():
            if i == 0:
                continue
            self.set_time_between_points(traj_frame, i)
            self.calc_dist_for_row(traj_frame, i)
            self.calc_speed_for_row(traj_frame, i)
            self.calc_accel_for_row(traj_frame, i)
            self.calc_jerk_for_row(traj_frame, i)
            self.calc_bearing_for_row(traj_frame, i)
            self.calc_bearing_rate_for_row(traj_frame, i)

    def get_enriched_data(self, from_pickle):
        if from_pickle:
            if os.path.isfile("data/raw_enriched.pkl"):
                print("Reading raw_enriched.pkl")
                return pickle.load(open("data/raw_enriched.pkl", "rb"))
            else:
                print("No pickled enriched dataset, creating. This will take a while.")
        traj = self.consolidate_trajectories()
        for elem in traj:
            self.set_sample_rate(elem, 5)
            self.calc_features_for_frame(elem)
        print("Done, dumping")
        pickle.dump(traj, open("data/raw_enriched.pkl", "wb"))

        return traj


if __name__ == '__main__':
    a=DataEnrich()
    z=a.get_enriched_data(False)
    print(z)
    print("DOneP")