Spaces:
Runtime error
Runtime error
import os | |
import pickle | |
import pandas as pd | |
import datetime | |
import sys | |
from multiprocessing import Pool | |
def get_labeled_data_as_df(path): | |
trajectory_frames = [] | |
labelfile = os.path.join(path, "labels.txt") | |
_label_df = pd.read_csv(labelfile,sep="\t",header=0,names=["starttime", "endtime", "mode"],parse_dates=[0,1]) | |
_label_df["startdate"] = _label_df["starttime"].dt.date | |
_label_startdate_set = set(_label_df["startdate"]) | |
datapath = os.path.join(path, "Trajectory") | |
for file in os.listdir(datapath): | |
df = pd.read_csv(os.path.join(datapath,file), | |
sep=",", | |
header=None, | |
skiprows=6, | |
usecols=[0, 1, 3, 5, 6], | |
names=["lat", "lon", "altitude", "date", "time"]) | |
df["datetime"] = pd.to_datetime(df['date'] + ' ' + df['time']) | |
date_of_traj = datetime.datetime.strptime(file[:8],"%Y%m%d").date() | |
if date_of_traj in _label_startdate_set: | |
labels_for_date = _label_df[_label_df["startdate"] == date_of_traj] | |
def is_in(trajrow): | |
for i, row in labels_for_date.iterrows(): | |
if row["starttime"] <= trajrow["datetime"] <= row["endtime"]: | |
return row["mode"] | |
df["label"] = df.apply(is_in, axis=1) | |
trajectory_frames.append(df) | |
print("added", datapath, file) | |
return trajectory_frames | |
if __name__ == '__main__': | |
'''if len(sys.argv) < 2: | |
print("Usage: raw_data_loader.py /path/to/geolife/Data/") | |
exit(-1)''' | |
path = 'D:\Geolife Trajectories 1.3\Geolife Trajectories 1.3\Data' | |
traj_with_labels_paths = [] | |
for file in os.listdir(path): | |
currfile = os.path.join(path, file) | |
if os.path.isdir(currfile): | |
if "labels.txt" not in os.listdir(currfile): | |
continue | |
traj_with_labels_paths.append(currfile) | |
with Pool(3) as p: | |
traj_frames = p.map(get_labeled_data_as_df, traj_with_labels_paths) | |
pickle.dump(traj_frames, open( "data/raw_labeled.pkl", "wb")) |