Transport_Mode_Detector / data_enrich.py
agueroooooooooo's picture
First Commit
3d75a04
raw history blame
No virus
7.06 kB
import os
import pickle
from math import cos, sin, atan2
import numpy as np
from geopy import distance
class DataEnrich:
def __init__(self):
pass
def _load_raw_pickle(self):
return pickle.load(open("data/raw_labeled.pkl","rb"))
def consolidate_trajectories(self):
raw_dfs = self._load_raw_pickle()
trajectories = []
for traj_of_person in raw_dfs:
dfs_with_label = []
for traj in traj_of_person:
if "label" in traj.columns:
traj = traj.replace(to_replace='None', value=np.nan).dropna()
traj.reset_index(inplace=True)
dfs_with_label.append(traj)
if dfs_with_label:
trajectories.extend(dfs_with_label)
return trajectories
def _calc_speed(self, distance, ts_a, ts_b):
time_delta = ts_b - ts_a
if time_delta.total_seconds() == 0:
return 0
return distance / time_delta.total_seconds() # m/s
def _calc_accel(self, speed_a, speed_b, ts_a, ts_b):
time_delta = ts_b - ts_a
speed_delta = speed_b - speed_a
if time_delta.total_seconds() == 0:
return 0
return speed_delta / time_delta.total_seconds() # m/s^2
def _calc_jerk(self, acc_a, acc_b, ts_a, ts_b):
time_delta = ts_b - ts_a
acc_delta = acc_b - acc_a
if time_delta.total_seconds() == 0:
return 0
return acc_delta / time_delta.total_seconds()
def _calc_bearing_rate(self, bearing_a, bearing_b, ts_a, ts_b):
time_delta = ts_b - ts_a
bear_delta = bearing_b - bearing_a
if time_delta.total_seconds() == 0:
return 0
return bear_delta / time_delta.total_seconds()
def calc_dist_for_row(self, trajectory_frame, i):
lat_1 = trajectory_frame["lat"][i-1]
lat_2 = trajectory_frame["lat"][i]
if lat_1 > 90:
print("Faulty", lat_1)
lat_1 /= 10
if lat_2 > 90:
print("Faulty", lat_2)
lat_2 /= 10
point_a = (lat_1, trajectory_frame["lon"][i-1])
point_b = (lat_2, trajectory_frame["lon"][i])
if point_a[0] == point_b[0] and point_a[1] == point_b[1]:
trajectory_frame["dist"][i] = 0
else:
trajectory_frame["dist"][i] = distance.distance((point_a[0], point_a[1]), (point_b[0], point_b[1])).m
def calc_speed_for_row(self, trajectory_frame, i):
trajectory_frame["speed"][i] = self._calc_speed(trajectory_frame["dist"][i],
trajectory_frame["datetime"][i-1],
trajectory_frame["datetime"][i]
)
def calc_accel_for_row(self, trajectory_frame, i):
trajectory_frame["accel"][i] = self._calc_accel(trajectory_frame["speed"][i-1],
trajectory_frame["speed"][i],
trajectory_frame["datetime"][i - 1],
trajectory_frame["datetime"][i]
)
def set_sample_rate(self, trajectory_frame, min_sec_distance_between_points):
i = 1
indices_to_del = []
deleted = 1
while i < len(trajectory_frame)-deleted:
ts1 = trajectory_frame["datetime"][i]
ts2 = trajectory_frame["datetime"][i+deleted]
delta = ts2-ts1
if delta.seconds < min_sec_distance_between_points:
deleted+=1
indices_to_del.append(i)
continue
i+=deleted
deleted = 1
if indices_to_del:
trajectory_frame.drop(trajectory_frame.index[indices_to_del],inplace=True)
trajectory_frame.reset_index(inplace=True)
def set_time_between_points(self, trajectory_frame, i):
trajectory_frame["timedelta"][i] = (trajectory_frame["datetime"][i]-trajectory_frame["datetime"][i-1]).total_seconds()
def calc_jerk_for_row(self, trajectory_frame, i):
trajectory_frame["jerk"][i] = self._calc_jerk(trajectory_frame["accel"][i - 1],
trajectory_frame["accel"][i],
trajectory_frame["datetime"][i - 1],
trajectory_frame["datetime"][i]
)
def calc_bearing_for_row(self, trajectory_frame, i):
a_lat = trajectory_frame["lat"][i - 1]
a_lon = trajectory_frame["lon"][i - 1]
b_lat = trajectory_frame["lat"][i]
b_lon = trajectory_frame["lon"][i]
x = cos(b_lat) * sin(b_lon-a_lon)
y = cos(a_lat) * sin(b_lat) - sin(a_lat) * cos(b_lat) * cos(b_lon-a_lon)
trajectory_frame["bearing"][i] = atan2(x, y)
def calc_bearing_rate_for_row(self, trajectory_frame, i):
trajectory_frame["bearing_rate"][i] = self._calc_bearing_rate(trajectory_frame["bearing"][i - 1],
trajectory_frame["bearing"][i],
trajectory_frame["datetime"][i - 1],
trajectory_frame["datetime"][i]
)
def calc_features_for_frame(self, traj_frame):
traj_frame["dist"] = 0
traj_frame["timedelta"] = 0
traj_frame["speed"] = 0
traj_frame["accel"] = 0
traj_frame["jerk"] = 0
traj_frame["bearing"] = 0
traj_frame["bearing_rate"] = 0
for i, elem in traj_frame.iterrows():
if i == 0:
continue
self.set_time_between_points(traj_frame, i)
self.calc_dist_for_row(traj_frame, i)
self.calc_speed_for_row(traj_frame, i)
self.calc_accel_for_row(traj_frame, i)
self.calc_jerk_for_row(traj_frame, i)
self.calc_bearing_for_row(traj_frame, i)
self.calc_bearing_rate_for_row(traj_frame, i)
def get_enriched_data(self, from_pickle):
if from_pickle:
if os.path.isfile("data/raw_enriched.pkl"):
print("Reading raw_enriched.pkl")
return pickle.load(open("data/raw_enriched.pkl", "rb"))
else:
print("No pickled enriched dataset, creating. This will take a while.")
traj = self.consolidate_trajectories()
for elem in traj:
self.set_sample_rate(elem, 5)
self.calc_features_for_frame(elem)
print("Done, dumping")
pickle.dump(traj, open("data/raw_enriched.pkl", "wb"))
return traj
if __name__ == '__main__':
a=DataEnrich()
z=a.get_enriched_data(False)
print(z)
print("DOneP")