|
import os |
|
import pickle |
|
from math import cos, sin, atan2 |
|
|
|
import numpy as np |
|
from geopy import distance |
|
|
|
class DataEnrich: |
|
|
|
def __init__(self): |
|
pass |
|
|
|
def _load_raw_pickle(self): |
|
return pickle.load(open("data/raw_labeled.pkl","rb")) |
|
|
|
def consolidate_trajectories(self): |
|
raw_dfs = self._load_raw_pickle() |
|
trajectories = [] |
|
for traj_of_person in raw_dfs: |
|
dfs_with_label = [] |
|
for traj in traj_of_person: |
|
if "label" in traj.columns: |
|
traj = traj.replace(to_replace='None', value=np.nan).dropna() |
|
traj.reset_index(inplace=True) |
|
dfs_with_label.append(traj) |
|
if dfs_with_label: |
|
trajectories.extend(dfs_with_label) |
|
return trajectories |
|
|
|
def _calc_speed(self, distance, ts_a, ts_b): |
|
time_delta = ts_b - ts_a |
|
if time_delta.total_seconds() == 0: |
|
return 0 |
|
return distance / time_delta.total_seconds() |
|
|
|
def _calc_accel(self, speed_a, speed_b, ts_a, ts_b): |
|
time_delta = ts_b - ts_a |
|
speed_delta = speed_b - speed_a |
|
if time_delta.total_seconds() == 0: |
|
return 0 |
|
return speed_delta / time_delta.total_seconds() |
|
|
|
def _calc_jerk(self, acc_a, acc_b, ts_a, ts_b): |
|
time_delta = ts_b - ts_a |
|
acc_delta = acc_b - acc_a |
|
if time_delta.total_seconds() == 0: |
|
return 0 |
|
return acc_delta / time_delta.total_seconds() |
|
|
|
def _calc_bearing_rate(self, bearing_a, bearing_b, ts_a, ts_b): |
|
time_delta = ts_b - ts_a |
|
bear_delta = bearing_b - bearing_a |
|
if time_delta.total_seconds() == 0: |
|
return 0 |
|
return bear_delta / time_delta.total_seconds() |
|
|
|
def calc_dist_for_row(self, trajectory_frame, i): |
|
lat_1 = trajectory_frame["lat"][i-1] |
|
lat_2 = trajectory_frame["lat"][i] |
|
if lat_1 > 90: |
|
print("Faulty", lat_1) |
|
lat_1 /= 10 |
|
if lat_2 > 90: |
|
print("Faulty", lat_2) |
|
lat_2 /= 10 |
|
|
|
point_a = (lat_1, trajectory_frame["lon"][i-1]) |
|
point_b = (lat_2, trajectory_frame["lon"][i]) |
|
if point_a[0] == point_b[0] and point_a[1] == point_b[1]: |
|
trajectory_frame["dist"][i] = 0 |
|
else: |
|
trajectory_frame["dist"][i] = distance.distance((point_a[0], point_a[1]), (point_b[0], point_b[1])).m |
|
|
|
def calc_speed_for_row(self, trajectory_frame, i): |
|
trajectory_frame["speed"][i] = self._calc_speed(trajectory_frame["dist"][i], |
|
trajectory_frame["datetime"][i-1], |
|
trajectory_frame["datetime"][i] |
|
) |
|
|
|
def calc_accel_for_row(self, trajectory_frame, i): |
|
trajectory_frame["accel"][i] = self._calc_accel(trajectory_frame["speed"][i-1], |
|
trajectory_frame["speed"][i], |
|
trajectory_frame["datetime"][i - 1], |
|
trajectory_frame["datetime"][i] |
|
) |
|
|
|
def set_sample_rate(self, trajectory_frame, min_sec_distance_between_points): |
|
i = 1 |
|
indices_to_del = [] |
|
deleted = 1 |
|
while i < len(trajectory_frame)-deleted: |
|
ts1 = trajectory_frame["datetime"][i] |
|
ts2 = trajectory_frame["datetime"][i+deleted] |
|
delta = ts2-ts1 |
|
if delta.seconds < min_sec_distance_between_points: |
|
deleted+=1 |
|
indices_to_del.append(i) |
|
continue |
|
i+=deleted |
|
deleted = 1 |
|
if indices_to_del: |
|
trajectory_frame.drop(trajectory_frame.index[indices_to_del],inplace=True) |
|
trajectory_frame.reset_index(inplace=True) |
|
|
|
def set_time_between_points(self, trajectory_frame, i): |
|
trajectory_frame["timedelta"][i] = (trajectory_frame["datetime"][i]-trajectory_frame["datetime"][i-1]).total_seconds() |
|
|
|
def calc_jerk_for_row(self, trajectory_frame, i): |
|
trajectory_frame["jerk"][i] = self._calc_jerk(trajectory_frame["accel"][i - 1], |
|
trajectory_frame["accel"][i], |
|
trajectory_frame["datetime"][i - 1], |
|
trajectory_frame["datetime"][i] |
|
) |
|
|
|
def calc_bearing_for_row(self, trajectory_frame, i): |
|
a_lat = trajectory_frame["lat"][i - 1] |
|
a_lon = trajectory_frame["lon"][i - 1] |
|
b_lat = trajectory_frame["lat"][i] |
|
b_lon = trajectory_frame["lon"][i] |
|
x = cos(b_lat) * sin(b_lon-a_lon) |
|
y = cos(a_lat) * sin(b_lat) - sin(a_lat) * cos(b_lat) * cos(b_lon-a_lon) |
|
trajectory_frame["bearing"][i] = atan2(x, y) |
|
|
|
def calc_bearing_rate_for_row(self, trajectory_frame, i): |
|
trajectory_frame["bearing_rate"][i] = self._calc_bearing_rate(trajectory_frame["bearing"][i - 1], |
|
trajectory_frame["bearing"][i], |
|
trajectory_frame["datetime"][i - 1], |
|
trajectory_frame["datetime"][i] |
|
) |
|
|
|
def calc_features_for_frame(self, traj_frame): |
|
traj_frame["dist"] = 0 |
|
traj_frame["timedelta"] = 0 |
|
traj_frame["speed"] = 0 |
|
traj_frame["accel"] = 0 |
|
traj_frame["jerk"] = 0 |
|
traj_frame["bearing"] = 0 |
|
traj_frame["bearing_rate"] = 0 |
|
|
|
for i, elem in traj_frame.iterrows(): |
|
if i == 0: |
|
continue |
|
self.set_time_between_points(traj_frame, i) |
|
self.calc_dist_for_row(traj_frame, i) |
|
self.calc_speed_for_row(traj_frame, i) |
|
self.calc_accel_for_row(traj_frame, i) |
|
self.calc_jerk_for_row(traj_frame, i) |
|
self.calc_bearing_for_row(traj_frame, i) |
|
self.calc_bearing_rate_for_row(traj_frame, i) |
|
|
|
def get_enriched_data(self, from_pickle): |
|
if from_pickle: |
|
if os.path.isfile("data/raw_enriched.pkl"): |
|
print("Reading raw_enriched.pkl") |
|
return pickle.load(open("data/raw_enriched.pkl", "rb")) |
|
else: |
|
print("No pickled enriched dataset, creating. This will take a while.") |
|
traj = self.consolidate_trajectories() |
|
for elem in traj: |
|
self.set_sample_rate(elem, 5) |
|
self.calc_features_for_frame(elem) |
|
print("Done, dumping") |
|
pickle.dump(traj, open("data/raw_enriched.pkl", "wb")) |
|
|
|
return traj |
|
|
|
|
|
if __name__ == '__main__': |
|
a=DataEnrich() |
|
z=a.get_enriched_data(False) |
|
print(z) |
|
print("DOneP") |
|
|
|
|
|
|
|
|