Transport_Mode_Detector / raw_data_loader.py
agueroooooooooo's picture
First Commit
3d75a04
raw
history blame contribute delete
No virus
2.13 kB
import os
import pickle
import pandas as pd
import datetime
import sys
from multiprocessing import Pool
def get_labeled_data_as_df(path):
trajectory_frames = []
labelfile = os.path.join(path, "labels.txt")
_label_df = pd.read_csv(labelfile,sep="\t",header=0,names=["starttime", "endtime", "mode"],parse_dates=[0,1])
_label_df["startdate"] = _label_df["starttime"].dt.date
_label_startdate_set = set(_label_df["startdate"])
datapath = os.path.join(path, "Trajectory")
for file in os.listdir(datapath):
df = pd.read_csv(os.path.join(datapath,file),
sep=",",
header=None,
skiprows=6,
usecols=[0, 1, 3, 5, 6],
names=["lat", "lon", "altitude", "date", "time"])
df["datetime"] = pd.to_datetime(df['date'] + ' ' + df['time'])
date_of_traj = datetime.datetime.strptime(file[:8],"%Y%m%d").date()
if date_of_traj in _label_startdate_set:
labels_for_date = _label_df[_label_df["startdate"] == date_of_traj]
def is_in(trajrow):
for i, row in labels_for_date.iterrows():
if row["starttime"] <= trajrow["datetime"] <= row["endtime"]:
return row["mode"]
df["label"] = df.apply(is_in, axis=1)
trajectory_frames.append(df)
print("added", datapath, file)
return trajectory_frames
if __name__ == '__main__':
'''if len(sys.argv) < 2:
print("Usage: raw_data_loader.py /path/to/geolife/Data/")
exit(-1)'''
path = 'D:\Geolife Trajectories 1.3\Geolife Trajectories 1.3\Data'
traj_with_labels_paths = []
for file in os.listdir(path):
currfile = os.path.join(path, file)
if os.path.isdir(currfile):
if "labels.txt" not in os.listdir(currfile):
continue
traj_with_labels_paths.append(currfile)
with Pool(3) as p:
traj_frames = p.map(get_labeled_data_as_df, traj_with_labels_paths)
pickle.dump(traj_frames, open( "data/raw_labeled.pkl", "wb"))