Spaces:
Runtime error
Runtime error
import os | |
import pickle | |
from math import cos, sin, atan2 | |
import numpy as np | |
from geopy import distance | |
class DataEnrich: | |
def __init__(self): | |
pass | |
def _load_raw_pickle(self): | |
return pickle.load(open("data/raw_labeled.pkl","rb")) | |
def consolidate_trajectories(self): | |
raw_dfs = self._load_raw_pickle() | |
trajectories = [] | |
for traj_of_person in raw_dfs: | |
dfs_with_label = [] | |
for traj in traj_of_person: | |
if "label" in traj.columns: | |
traj = traj.replace(to_replace='None', value=np.nan).dropna() | |
traj.reset_index(inplace=True) | |
dfs_with_label.append(traj) | |
if dfs_with_label: | |
trajectories.extend(dfs_with_label) | |
return trajectories | |
def _calc_speed(self, distance, ts_a, ts_b): | |
time_delta = ts_b - ts_a | |
if time_delta.total_seconds() == 0: | |
return 0 | |
return distance / time_delta.total_seconds() # m/s | |
def _calc_accel(self, speed_a, speed_b, ts_a, ts_b): | |
time_delta = ts_b - ts_a | |
speed_delta = speed_b - speed_a | |
if time_delta.total_seconds() == 0: | |
return 0 | |
return speed_delta / time_delta.total_seconds() # m/s^2 | |
def _calc_jerk(self, acc_a, acc_b, ts_a, ts_b): | |
time_delta = ts_b - ts_a | |
acc_delta = acc_b - acc_a | |
if time_delta.total_seconds() == 0: | |
return 0 | |
return acc_delta / time_delta.total_seconds() | |
def _calc_bearing_rate(self, bearing_a, bearing_b, ts_a, ts_b): | |
time_delta = ts_b - ts_a | |
bear_delta = bearing_b - bearing_a | |
if time_delta.total_seconds() == 0: | |
return 0 | |
return bear_delta / time_delta.total_seconds() | |
def calc_dist_for_row(self, trajectory_frame, i): | |
lat_1 = trajectory_frame["lat"][i-1] | |
lat_2 = trajectory_frame["lat"][i] | |
if lat_1 > 90: | |
print("Faulty", lat_1) | |
lat_1 /= 10 | |
if lat_2 > 90: | |
print("Faulty", lat_2) | |
lat_2 /= 10 | |
point_a = (lat_1, trajectory_frame["lon"][i-1]) | |
point_b = (lat_2, trajectory_frame["lon"][i]) | |
if point_a[0] == point_b[0] and point_a[1] == point_b[1]: | |
trajectory_frame["dist"][i] = 0 | |
else: | |
trajectory_frame["dist"][i] = distance.distance((point_a[0], point_a[1]), (point_b[0], point_b[1])).m | |
def calc_speed_for_row(self, trajectory_frame, i): | |
trajectory_frame["speed"][i] = self._calc_speed(trajectory_frame["dist"][i], | |
trajectory_frame["datetime"][i-1], | |
trajectory_frame["datetime"][i] | |
) | |
def calc_accel_for_row(self, trajectory_frame, i): | |
trajectory_frame["accel"][i] = self._calc_accel(trajectory_frame["speed"][i-1], | |
trajectory_frame["speed"][i], | |
trajectory_frame["datetime"][i - 1], | |
trajectory_frame["datetime"][i] | |
) | |
def set_sample_rate(self, trajectory_frame, min_sec_distance_between_points): | |
i = 1 | |
indices_to_del = [] | |
deleted = 1 | |
while i < len(trajectory_frame)-deleted: | |
ts1 = trajectory_frame["datetime"][i] | |
ts2 = trajectory_frame["datetime"][i+deleted] | |
delta = ts2-ts1 | |
if delta.seconds < min_sec_distance_between_points: | |
deleted+=1 | |
indices_to_del.append(i) | |
continue | |
i+=deleted | |
deleted = 1 | |
if indices_to_del: | |
trajectory_frame.drop(trajectory_frame.index[indices_to_del],inplace=True) | |
trajectory_frame.reset_index(inplace=True) | |
def set_time_between_points(self, trajectory_frame, i): | |
trajectory_frame["timedelta"][i] = (trajectory_frame["datetime"][i]-trajectory_frame["datetime"][i-1]).total_seconds() | |
def calc_jerk_for_row(self, trajectory_frame, i): | |
trajectory_frame["jerk"][i] = self._calc_jerk(trajectory_frame["accel"][i - 1], | |
trajectory_frame["accel"][i], | |
trajectory_frame["datetime"][i - 1], | |
trajectory_frame["datetime"][i] | |
) | |
def calc_bearing_for_row(self, trajectory_frame, i): | |
a_lat = trajectory_frame["lat"][i - 1] | |
a_lon = trajectory_frame["lon"][i - 1] | |
b_lat = trajectory_frame["lat"][i] | |
b_lon = trajectory_frame["lon"][i] | |
x = cos(b_lat) * sin(b_lon-a_lon) | |
y = cos(a_lat) * sin(b_lat) - sin(a_lat) * cos(b_lat) * cos(b_lon-a_lon) | |
trajectory_frame["bearing"][i] = atan2(x, y) | |
def calc_bearing_rate_for_row(self, trajectory_frame, i): | |
trajectory_frame["bearing_rate"][i] = self._calc_bearing_rate(trajectory_frame["bearing"][i - 1], | |
trajectory_frame["bearing"][i], | |
trajectory_frame["datetime"][i - 1], | |
trajectory_frame["datetime"][i] | |
) | |
def calc_features_for_frame(self, traj_frame): | |
traj_frame["dist"] = 0 | |
traj_frame["timedelta"] = 0 | |
traj_frame["speed"] = 0 | |
traj_frame["accel"] = 0 | |
traj_frame["jerk"] = 0 | |
traj_frame["bearing"] = 0 | |
traj_frame["bearing_rate"] = 0 | |
for i, elem in traj_frame.iterrows(): | |
if i == 0: | |
continue | |
self.set_time_between_points(traj_frame, i) | |
self.calc_dist_for_row(traj_frame, i) | |
self.calc_speed_for_row(traj_frame, i) | |
self.calc_accel_for_row(traj_frame, i) | |
self.calc_jerk_for_row(traj_frame, i) | |
self.calc_bearing_for_row(traj_frame, i) | |
self.calc_bearing_rate_for_row(traj_frame, i) | |
def get_enriched_data(self, from_pickle): | |
if from_pickle: | |
if os.path.isfile("data/raw_enriched.pkl"): | |
print("Reading raw_enriched.pkl") | |
return pickle.load(open("data/raw_enriched.pkl", "rb")) | |
else: | |
print("No pickled enriched dataset, creating. This will take a while.") | |
traj = self.consolidate_trajectories() | |
for elem in traj: | |
self.set_sample_rate(elem, 5) | |
self.calc_features_for_frame(elem) | |
print("Done, dumping") | |
pickle.dump(traj, open("data/raw_enriched.pkl", "wb")) | |
return traj | |
if __name__ == '__main__': | |
a=DataEnrich() | |
z=a.get_enriched_data(False) | |
print(z) | |
print("DOneP") | |