agueroooooooooo commited on
Commit
3d75a04
1 Parent(s): 5f49e05

First Commit

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Datasource local storage ignored files
5
+ /dataSources/
6
+ /dataSources.local.xml
7
+ # Editor-based HTTP Client requests
8
+ /httpRequests/
.idea/Transport_Mode_Detector.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.9 (pytorchbook) (2)" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ </profile>
6
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (pytorchbook) (2)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Transport_Mode_Detector.iml" filepath="$PROJECT_DIR$/.idea/Transport_Mode_Detector.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
Model_Wieghts ADDED
Binary file (2.15 MB). View file
__pycache__/helper.cpython-39.pyc ADDED
Binary file (2.62 kB). View file
__pycache__/modality_lstm.cpython-39.pyc ADDED
Binary file (1.84 kB). View file
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ from modality_lstm import ModalityLSTM
5
+ import torch.nn as nn
6
+ from helper import score_to_modality
7
+ from PIL import Image
8
+
9
+ label_mapping = {
10
+ 'car': [0,'images/Cars.jpg'],
11
+ 'walk': [1,'images/walk.jpg'],
12
+ 'bus': [2,'images/bus.jpg'],
13
+ 'train': [3,'images/train.jpg'],
14
+ 'subway': [4,'images/subway.jpg'],
15
+ 'bike': [5,'images/bike.jpg'],
16
+ 'run': [6,'images/walk.jpg'],
17
+ 'boat': [7,'images/walk.jpg'],
18
+ 'airplane': [8,'images/walk.jpg'],
19
+ 'motorcycle': [9,'images/walk.jpg'],
20
+ 'taxi': [10,'images/taxi.jpg']
21
+ }
22
+
23
+ def pred(dist,speed,accel,timedelta,jerk,bearing,bearing_rate):
24
+
25
+
26
+
27
+ batch_size = 1
28
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
29
+ train_on_gpu = False
30
+ output_size = 5
31
+ hidden_dim = 128
32
+ trip_dim = 7
33
+ n_layers = 2
34
+ drop_prob = 0.2
35
+ net = ModalityLSTM(trip_dim, output_size, batch_size, hidden_dim, n_layers, train_on_gpu, drop_prob, lstm_drop_prob=0.2)
36
+ net.load_state_dict(torch.load("Model_Wieghts"))
37
+ net.eval()
38
+
39
+ a=torch.tensor([[dist,speed,accel,timedelta,jerk,bearing,bearing_rate]])
40
+ a=a.float()
41
+ a=a.unsqueeze(0)
42
+ l = torch.tensor([1]).long()
43
+ b,c=net(a,l)
44
+ b=b.squeeze(0)
45
+ b=score_to_modality(b)
46
+ b=b[0]
47
+ print(b)
48
+ for k,v in label_mapping.items():
49
+ if b == v[0]:
50
+ return (str(k),Image.open(v[1]))
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+ def greet(name):
71
+ return "Hello " + name + "!!"
72
+
73
+ iface = gr.Interface(fn=pred, inputs=['number',"number","number",'number',"number","number","number"], outputs=["text",gr.outputs.Image(type="pil")])
74
+ iface.launch()
classifier.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from torch import nn
4
+
5
+ from data_loader import DataLoader
6
+ from helper import ValTest
7
+ from modality_lstm import ModalityLSTM
8
+
9
+ batch_size = 32
10
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
11
+ train_on_gpu = True
12
+ output_size = 5
13
+ hidden_dim = 128
14
+ trip_dim = 7
15
+ n_layers = 2
16
+ drop_prob = 0.2
17
+ net = ModalityLSTM(trip_dim, output_size, batch_size, hidden_dim, n_layers, train_on_gpu, drop_prob, lstm_drop_prob=0.2)
18
+ lr=0.001
19
+ loss_function = nn.CrossEntropyLoss(ignore_index=-1)
20
+ optimizer = torch.optim.Adam(net.parameters(), lr=lr)
21
+ epochs = 6
22
+ print_every = 5
23
+ log_every = 1
24
+ evaluate_every = 100
25
+
26
+ clip = 0.2 # gradient clipping
27
+
28
+ if train_on_gpu:
29
+ net.cuda()
30
+ net.train()
31
+
32
+ dl = DataLoader(batchsize=batch_size, read_from_pickle=True)
33
+ dl.prepare_data()
34
+
35
+ def pad_trajs(trajs, lengths):
36
+ for w, elem in enumerate(trajs):
37
+ while len(elem) < lengths[0]:
38
+ elem.append([-1] * trip_dim)
39
+ return trajs
40
+
41
+
42
+ losses, avg_losses = [], []
43
+
44
+ validator = ValTest(dl.val_batches, net, trip_dim, batch_size, device, loss_function, output_size, dl.get_val_size())
45
+ test = ValTest(dl.test_batches, net, trip_dim, batch_size, device, loss_function, output_size, dl.get_test_size())
46
+
47
+ for e in range(1,epochs+1):
48
+ print("epoch ",e)
49
+ hidden = net.init_hidden()
50
+ counter = 0
51
+ torch.cuda.empty_cache()
52
+ for train_sorted, labels_sorted in dl.batches():
53
+
54
+ counter += 1
55
+ lengths = [len(x) for x in train_sorted]
56
+ print("Lengths are ", lengths)
57
+ print("SUm of lengths",sum(lengths))
58
+ train_sorted = pad_trajs(train_sorted, lengths)
59
+
60
+ X = np.asarray(train_sorted, dtype=np.float)
61
+ input_tensor = torch.from_numpy(X)
62
+ print("Input tensor is ",input_tensor.shape)
63
+ input_tensor = input_tensor.to(device)
64
+
65
+ net.zero_grad()
66
+ output, max_padding_for_this_batch = net(input_tensor, lengths)
67
+ print("Output is",output.shape)
68
+
69
+ for labelz in labels_sorted:
70
+ while len(labelz) < max_padding_for_this_batch:
71
+ labelz.append(-1)
72
+
73
+ labels_for_loss = torch.tensor(labels_sorted).view(max_padding_for_this_batch * batch_size, -1).squeeze(
74
+ 1).long().to(device)
75
+
76
+ print("Labels for loss is",len(labels_for_loss))
77
+
78
+ loss = loss_function(output.view(
79
+ max_padding_for_this_batch*batch_size, -1),
80
+ labels_for_loss)
81
+ loss.backward()
82
+ nn.utils.clip_grad_norm_(net.parameters(), clip)
83
+ optimizer.step()
84
+
85
+ if counter % log_every == 0:
86
+ losses.append(loss.item())
87
+ if counter % print_every == 0:
88
+ avg_losses.append(sum(losses[-50:]) / 50)
89
+ print(
90
+ f'Epoch: {e:2d}. {counter:d} of {int(dl.get_train_size() / batch_size):d} {avg_losses[len(avg_losses) - 1]:f} Loss: {loss.item():.4f}')
91
+ if counter % evaluate_every == 0:
92
+ validator.run()
93
+
94
+ torch.save(net.state_dict(),"Model_Wieghts")
95
+ print("Testing")
96
+
97
+ test.run()
98
+
99
+ torch.save(net.state_dict(),"Model_Wieghts")
data_enrich.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from math import cos, sin, atan2
4
+
5
+ import numpy as np
6
+ from geopy import distance
7
+
8
+ class DataEnrich:
9
+
10
+ def __init__(self):
11
+ pass
12
+
13
+ def _load_raw_pickle(self):
14
+ return pickle.load(open("data/raw_labeled.pkl","rb"))
15
+
16
+ def consolidate_trajectories(self):
17
+ raw_dfs = self._load_raw_pickle()
18
+ trajectories = []
19
+ for traj_of_person in raw_dfs:
20
+ dfs_with_label = []
21
+ for traj in traj_of_person:
22
+ if "label" in traj.columns:
23
+ traj = traj.replace(to_replace='None', value=np.nan).dropna()
24
+ traj.reset_index(inplace=True)
25
+ dfs_with_label.append(traj)
26
+ if dfs_with_label:
27
+ trajectories.extend(dfs_with_label)
28
+ return trajectories
29
+
30
+ def _calc_speed(self, distance, ts_a, ts_b):
31
+ time_delta = ts_b - ts_a
32
+ if time_delta.total_seconds() == 0:
33
+ return 0
34
+ return distance / time_delta.total_seconds() # m/s
35
+
36
+ def _calc_accel(self, speed_a, speed_b, ts_a, ts_b):
37
+ time_delta = ts_b - ts_a
38
+ speed_delta = speed_b - speed_a
39
+ if time_delta.total_seconds() == 0:
40
+ return 0
41
+ return speed_delta / time_delta.total_seconds() # m/s^2
42
+
43
+ def _calc_jerk(self, acc_a, acc_b, ts_a, ts_b):
44
+ time_delta = ts_b - ts_a
45
+ acc_delta = acc_b - acc_a
46
+ if time_delta.total_seconds() == 0:
47
+ return 0
48
+ return acc_delta / time_delta.total_seconds()
49
+
50
+ def _calc_bearing_rate(self, bearing_a, bearing_b, ts_a, ts_b):
51
+ time_delta = ts_b - ts_a
52
+ bear_delta = bearing_b - bearing_a
53
+ if time_delta.total_seconds() == 0:
54
+ return 0
55
+ return bear_delta / time_delta.total_seconds()
56
+
57
+ def calc_dist_for_row(self, trajectory_frame, i):
58
+ lat_1 = trajectory_frame["lat"][i-1]
59
+ lat_2 = trajectory_frame["lat"][i]
60
+ if lat_1 > 90:
61
+ print("Faulty", lat_1)
62
+ lat_1 /= 10
63
+ if lat_2 > 90:
64
+ print("Faulty", lat_2)
65
+ lat_2 /= 10
66
+
67
+ point_a = (lat_1, trajectory_frame["lon"][i-1])
68
+ point_b = (lat_2, trajectory_frame["lon"][i])
69
+ if point_a[0] == point_b[0] and point_a[1] == point_b[1]:
70
+ trajectory_frame["dist"][i] = 0
71
+ else:
72
+ trajectory_frame["dist"][i] = distance.distance((point_a[0], point_a[1]), (point_b[0], point_b[1])).m
73
+
74
+ def calc_speed_for_row(self, trajectory_frame, i):
75
+ trajectory_frame["speed"][i] = self._calc_speed(trajectory_frame["dist"][i],
76
+ trajectory_frame["datetime"][i-1],
77
+ trajectory_frame["datetime"][i]
78
+ )
79
+
80
+ def calc_accel_for_row(self, trajectory_frame, i):
81
+ trajectory_frame["accel"][i] = self._calc_accel(trajectory_frame["speed"][i-1],
82
+ trajectory_frame["speed"][i],
83
+ trajectory_frame["datetime"][i - 1],
84
+ trajectory_frame["datetime"][i]
85
+ )
86
+
87
+ def set_sample_rate(self, trajectory_frame, min_sec_distance_between_points):
88
+ i = 1
89
+ indices_to_del = []
90
+ deleted = 1
91
+ while i < len(trajectory_frame)-deleted:
92
+ ts1 = trajectory_frame["datetime"][i]
93
+ ts2 = trajectory_frame["datetime"][i+deleted]
94
+ delta = ts2-ts1
95
+ if delta.seconds < min_sec_distance_between_points:
96
+ deleted+=1
97
+ indices_to_del.append(i)
98
+ continue
99
+ i+=deleted
100
+ deleted = 1
101
+ if indices_to_del:
102
+ trajectory_frame.drop(trajectory_frame.index[indices_to_del],inplace=True)
103
+ trajectory_frame.reset_index(inplace=True)
104
+
105
+ def set_time_between_points(self, trajectory_frame, i):
106
+ trajectory_frame["timedelta"][i] = (trajectory_frame["datetime"][i]-trajectory_frame["datetime"][i-1]).total_seconds()
107
+
108
+ def calc_jerk_for_row(self, trajectory_frame, i):
109
+ trajectory_frame["jerk"][i] = self._calc_jerk(trajectory_frame["accel"][i - 1],
110
+ trajectory_frame["accel"][i],
111
+ trajectory_frame["datetime"][i - 1],
112
+ trajectory_frame["datetime"][i]
113
+ )
114
+
115
+ def calc_bearing_for_row(self, trajectory_frame, i):
116
+ a_lat = trajectory_frame["lat"][i - 1]
117
+ a_lon = trajectory_frame["lon"][i - 1]
118
+ b_lat = trajectory_frame["lat"][i]
119
+ b_lon = trajectory_frame["lon"][i]
120
+ x = cos(b_lat) * sin(b_lon-a_lon)
121
+ y = cos(a_lat) * sin(b_lat) - sin(a_lat) * cos(b_lat) * cos(b_lon-a_lon)
122
+ trajectory_frame["bearing"][i] = atan2(x, y)
123
+
124
+ def calc_bearing_rate_for_row(self, trajectory_frame, i):
125
+ trajectory_frame["bearing_rate"][i] = self._calc_bearing_rate(trajectory_frame["bearing"][i - 1],
126
+ trajectory_frame["bearing"][i],
127
+ trajectory_frame["datetime"][i - 1],
128
+ trajectory_frame["datetime"][i]
129
+ )
130
+
131
+ def calc_features_for_frame(self, traj_frame):
132
+ traj_frame["dist"] = 0
133
+ traj_frame["timedelta"] = 0
134
+ traj_frame["speed"] = 0
135
+ traj_frame["accel"] = 0
136
+ traj_frame["jerk"] = 0
137
+ traj_frame["bearing"] = 0
138
+ traj_frame["bearing_rate"] = 0
139
+
140
+ for i, elem in traj_frame.iterrows():
141
+ if i == 0:
142
+ continue
143
+ self.set_time_between_points(traj_frame, i)
144
+ self.calc_dist_for_row(traj_frame, i)
145
+ self.calc_speed_for_row(traj_frame, i)
146
+ self.calc_accel_for_row(traj_frame, i)
147
+ self.calc_jerk_for_row(traj_frame, i)
148
+ self.calc_bearing_for_row(traj_frame, i)
149
+ self.calc_bearing_rate_for_row(traj_frame, i)
150
+
151
+ def get_enriched_data(self, from_pickle):
152
+ if from_pickle:
153
+ if os.path.isfile("data/raw_enriched.pkl"):
154
+ print("Reading raw_enriched.pkl")
155
+ return pickle.load(open("data/raw_enriched.pkl", "rb"))
156
+ else:
157
+ print("No pickled enriched dataset, creating. This will take a while.")
158
+ traj = self.consolidate_trajectories()
159
+ for elem in traj:
160
+ self.set_sample_rate(elem, 5)
161
+ self.calc_features_for_frame(elem)
162
+ print("Done, dumping")
163
+ pickle.dump(traj, open("data/raw_enriched.pkl", "wb"))
164
+
165
+ return traj
166
+
167
+
168
+ if __name__ == '__main__':
169
+ a=DataEnrich()
170
+ z=a.get_enriched_data(False)
171
+ print(z)
172
+ print("DOneP")
173
+
174
+
175
+
data_loader.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from operator import itemgetter
3
+
4
+ from data_enrich import DataEnrich
5
+
6
+
7
+ class DataLoader:
8
+
9
+ label_mapping = {
10
+ 'car': 0,
11
+ 'walk': 1,
12
+ 'bus': 2,
13
+ 'train': 3,
14
+ 'subway': 4,
15
+ 'bike': 5,
16
+ 'run': 6,
17
+ 'boat': 7,
18
+ 'airplane': 8,
19
+ 'motorcycle': 9,
20
+ 'taxi': 10
21
+ }
22
+
23
+ fields_to_feed = ["dist", "speed", "accel", "timedelta", "jerk", "bearing", "bearing_rate"]
24
+ labels_to_remove = ["boat", "motorcycle", "airplane", "run", "bike"]
25
+
26
+
27
+ def __init__(self, test_ratio=0.2, val_ratio=0.1, batchsize=4, read_from_pickle=True):
28
+ de = DataEnrich()
29
+ self._raw = de.get_enriched_data(read_from_pickle)
30
+ self._test_ratio = test_ratio
31
+ self._val_ratio = val_ratio
32
+ self._batchsize = batchsize
33
+
34
+ def _remove_traj_containing_labels(self):
35
+ cleaned = []
36
+ for elem in self._raw:
37
+ if len(elem) == 0:
38
+ continue
39
+ if all(x not in list(elem["label"]) for x in self.labels_to_remove):
40
+ cleaned.append(elem)
41
+ self._raw = cleaned
42
+
43
+ def _merge_labels(self, target_label, label_to_remove):
44
+ for elem in self._raw:
45
+ if label_to_remove in list(elem["label"]):
46
+ elem["label"] = elem["label"].replace(to_replace=label_to_remove, value=target_label)
47
+
48
+ def _labels_to_int_repr(self):
49
+ for elem in self._raw:
50
+ elem["label"] = elem["label"].apply(lambda x: self.label_mapping[x])
51
+
52
+ def _get_split_indices(self, traj):
53
+ train_size = int((1 - self._test_ratio) * len(traj))
54
+ val_size = len(traj) - int((1 - self._val_ratio) * len(traj))
55
+
56
+ indices = [x for x in range(len(traj))]
57
+
58
+ indices_for_training = random.sample(indices, train_size)
59
+ indices_for_validation = random.sample(indices_for_training, val_size)
60
+ indices_for_training = set(indices_for_training) - set(indices_for_validation)
61
+ indices_for_testing = set(indices) - indices_for_training
62
+ indices_for_testing = list(indices_for_testing)
63
+
64
+ return list(indices_for_training), list(indices_for_testing), list(indices_for_validation)
65
+
66
+ def _set_splitted_data(self, traj, labels):
67
+
68
+ i_train, i_test, i_val = self._get_split_indices(traj)
69
+
70
+ random.shuffle(i_train)
71
+
72
+ self.test_data = list(itemgetter(*i_test)(traj))
73
+ self.val_data = list(itemgetter(*i_val)(traj))
74
+ self.train_data = list(itemgetter(*i_train)(traj))
75
+ self.test_labels = list(itemgetter(*i_test)(labels))
76
+ self.val_labels = list(itemgetter(*i_val)(labels))
77
+ self.train_labels = list(itemgetter(*i_train)(labels))
78
+
79
+ def _split_too_long_traj(self, traj, labels, max_points):
80
+ if len(traj) > max_points*2:
81
+ splitted_traj, splitted_labels = [],[]
82
+ num_subsets = len(traj) // max_points
83
+ print("Splitting trajectory with length ", len(traj), "in ", num_subsets, "trajectories")
84
+ for i in range(num_subsets):
85
+ end_pointer = len(traj)-1 if ((i+1)*max_points)+max_points > len(traj) else (i*max_points)+max_points
86
+ traj_subset = traj[i*max_points:end_pointer]
87
+ labels_subset = labels[i*max_points:end_pointer]
88
+ assert len(traj_subset) == len(labels_subset)
89
+ splitted_traj.append(traj_subset)
90
+ splitted_labels.append(labels_subset)
91
+ return splitted_traj, splitted_labels
92
+ return [traj], [labels]
93
+
94
+ def prepare_data(self):
95
+ trajs = []
96
+ labels = []
97
+
98
+ self._remove_traj_containing_labels()
99
+ self._merge_labels("car", "taxi")
100
+ self._labels_to_int_repr()
101
+
102
+ for elem in self._raw:
103
+ assert len(elem) > 0
104
+ data_ = elem[self.fields_to_feed].values.tolist()
105
+ label_ = elem["label"].values.tolist()
106
+ data_, label_ = self._split_too_long_traj(data_, label_, 350)
107
+ trajs.extend(data_)
108
+ labels.extend(label_)
109
+
110
+ self._set_splitted_data(trajs, labels)
111
+
112
+ def batches(self):
113
+ for i in range(0, len(self.train_data), self._batchsize):
114
+
115
+ if len(self.train_data[i:i + self._batchsize]) < self._batchsize:
116
+ break # drop last incomplete batch
117
+
118
+ labels_sorted = sorted(self.train_labels[i:i + self._batchsize:], key=len, reverse=True)
119
+ train_sorted = sorted(self.train_data[i:i + self._batchsize:], key=len, reverse=True)
120
+ for p in range(len(labels_sorted)):
121
+ assert len(labels_sorted[p]) == len(train_sorted[p])
122
+ yield train_sorted, labels_sorted
123
+
124
+ def val_batches(self):
125
+ for i in range(0, len(self.val_data), self._batchsize):
126
+
127
+ if len(self.val_data[i:i + self._batchsize]) < self._batchsize:
128
+ break # drop last incomplete batch
129
+
130
+ labels_sorted = sorted(self.val_labels[i:i + self._batchsize:], key=len, reverse=True)
131
+ val_sorted = sorted(self.val_data[i:i + self._batchsize:], key=len, reverse=True)
132
+ for p in range(len(labels_sorted)):
133
+ assert len(labels_sorted[p]) == len(val_sorted[p])
134
+ yield val_sorted, labels_sorted
135
+
136
+ def test_batches(self):
137
+ for i in range(0, len(self.test_data), self._batchsize):
138
+
139
+ if len(self.test_data[i:i + self._batchsize]) < self._batchsize:
140
+ break # drop last incomplete batch
141
+
142
+ labels_sorted = sorted(self.test_labels[i:i + self._batchsize:], key=len, reverse=True)
143
+ test_sorted = sorted(self.test_data[i:i + self._batchsize:], key=len, reverse=True)
144
+ for p in range(len(labels_sorted)):
145
+ assert len(labels_sorted[p]) == len(test_sorted[p])
146
+ yield test_sorted, labels_sorted
147
+
148
+ def get_train_size(self):
149
+ return len(self.train_data)
150
+
151
+ def get_val_size(self):
152
+ return len(self.val_data)
153
+
154
+ def get_test_size(self):
155
+ return len(self.test_data)
helper.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+
4
+ def score_to_modality(scores: torch.Tensor):
5
+ tensor_list = scores.tolist()
6
+ modality = []
7
+ for row in tensor_list:
8
+ modality.append(row.index(max(row)))
9
+ return modality
10
+
11
+ class ValTest:
12
+ accuracy = []
13
+
14
+ def __init__(self, dl_generator, net, trip_dim, batch_size, device, loss_function, num_modes, datasize):
15
+ self.dl_generator = dl_generator
16
+ self.net = net
17
+ self.trip_dim = trip_dim
18
+ self.batch_size = batch_size
19
+ self.device = device
20
+ self.loss_function = loss_function
21
+ self.num_modes = num_modes
22
+ self.datasize = datasize
23
+
24
+ def run(self):
25
+
26
+ correct = 0
27
+ total = 0
28
+ val_losses = []
29
+ total_per_mode = [0] * self.num_modes
30
+ correct_per_mode = [0] * self.num_modes
31
+ journeys_eighty_percent_correct = 0
32
+
33
+ self.net.eval() # put net in eval mode
34
+
35
+ for data, labels in self.dl_generator():
36
+
37
+ self.net.zero_grad()
38
+ lengths = [len(x) for x in data]
39
+ for i, elem in enumerate(data):
40
+ while len(elem) < lengths[0]:
41
+ elem.append([-1] * self.trip_dim)
42
+
43
+ X = np.asarray(data, dtype=np.float)
44
+ input_tensor = torch.from_numpy(X)
45
+ input_tensor = input_tensor.to(self.device)
46
+
47
+ output, max_padding_for_this_batch = self.net(input_tensor, lengths)
48
+
49
+ for labelz in labels:
50
+ while len(labelz) < max_padding_for_this_batch:
51
+ labelz.append(-1)
52
+
53
+ labels_for_loss = torch.tensor(labels) \
54
+ .view(max_padding_for_this_batch * self.batch_size, -1).squeeze(1).long().to(self.device)
55
+
56
+ loss = self.loss_function(output.view(
57
+ max_padding_for_this_batch * self.batch_size, -1),
58
+ labels_for_loss)
59
+ val_losses.append(loss.item())
60
+
61
+ for k, journey in enumerate(output):
62
+ journey_correct = 0
63
+ predicted = score_to_modality(journey)
64
+
65
+ o = 0
66
+ for o, elem in enumerate(predicted):
67
+ if labels[k][o] == -1:
68
+ break
69
+ total_per_mode[int(labels[k][o])] += 1
70
+ if labels[k][o] == predicted[o]:
71
+ correct_per_mode[predicted[o]] += 1
72
+ correct += 1
73
+ journey_correct += 1
74
+ total += 1
75
+ if journey_correct >= (o * 0.80):
76
+ journeys_eighty_percent_correct += 1
77
+
78
+ mode_statistics = []
79
+ for k in range(len(correct_per_mode)):
80
+ if correct_per_mode[k] == 0 or total_per_mode[k] == 0:
81
+ mode_statistics.append(0)
82
+ continue
83
+ mode_statistics.append(1 / (total_per_mode[k] / correct_per_mode[k]))
84
+
85
+ print('Accuracy: %d %%' % (100 * correct / total))
86
+ print('%% of journeys at least 80%% correct: %d of %d, %d %%' % (
87
+ journeys_eighty_percent_correct, self.datasize, (100 * journeys_eighty_percent_correct / self.datasize)))
88
+ print("Loss: {:.6f}".format(np.mean(val_losses)))
89
+ print("Mode-correct:")
90
+ print(total_per_mode)
91
+ print(mode_statistics)
92
+
93
+ self.net.train()
images/Cars.jpg ADDED
images/bike.jpg ADDED
images/bus.jpg ADDED
images/download.png ADDED
images/subway.jpg ADDED
images/taxi.jpg ADDED
images/train.jpg ADDED
images/walk.jpg ADDED
modality_lstm.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.autograd import Variable
4
+
5
+
6
+ class ModalityLSTM(nn.Module):
7
+ def __init__(self, trip_dimension, output_size, batch_size, hidden_dim, n_layers, gpu, drop_prob, lstm_drop_prob=0.5):
8
+ super().__init__()
9
+ self.trip_dimension = trip_dimension
10
+ self.output_size = output_size
11
+ self.n_layers = n_layers
12
+ self.batch_size = batch_size
13
+ self.hidden_dim = hidden_dim
14
+ self.on_gpu = gpu
15
+ self.lstm_drop_prob = lstm_drop_prob
16
+ self.drop_prob = drop_prob
17
+
18
+ self.lstm = nn.LSTM(
19
+ input_size=self.trip_dimension,
20
+ hidden_size=self.hidden_dim,
21
+ num_layers=self.n_layers,
22
+ batch_first=True,
23
+ dropout = self.drop_prob,
24
+ bidirectional=True
25
+ )
26
+ self.dropout = nn.Dropout(drop_prob)
27
+ self.linear_fc = nn.Linear(self.hidden_dim * 2, self.output_size)
28
+
29
+
30
+ def init_hidden(self):
31
+ # the weights are of the form (nb_layers, batch_size, nb_lstm_units)
32
+ hidden_a = torch.randn(self.n_layers*2, self.batch_size, self.hidden_dim)
33
+ hidden_b = torch.randn(self.n_layers*2, self.batch_size, self.hidden_dim)
34
+
35
+ if self.on_gpu:
36
+ hidden_a = hidden_a.cuda()
37
+ hidden_b = hidden_b.cuda()
38
+
39
+ hidden_a = Variable(hidden_a)
40
+ hidden_b = Variable(hidden_b)
41
+
42
+ return (hidden_a, hidden_b)
43
+
44
+ def forward(self, input_tensor, lengths):
45
+ # shape of X: [batch_size, max_seq_len, feature_size]
46
+
47
+ # get unpadded sequence lengths (padding: -1)
48
+ self.hidden = self.init_hidden()
49
+
50
+ # pack the padded sequences, length contains unpadded lengths (eg., [43,46,67,121])
51
+ x_packed = torch.nn.utils.rnn.pack_padded_sequence(input_tensor, lengths, batch_first=True)
52
+
53
+ # feed to lstm
54
+ lstm_out, self.hidden = self.lstm(x_packed.float(), self.hidden)
55
+
56
+ # unpack
57
+ x_unpacked, seq_len = torch.nn.utils.rnn.pad_packed_sequence(lstm_out, batch_first=True)
58
+
59
+ out = self.dropout(x_unpacked)
60
+
61
+ outs = [] # save all predictions
62
+ for point in out:
63
+ outs.append(self.linear_fc(point))
64
+ return torch.stack(outs, dim=0),max(lengths)
raw_data_loader.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import pandas as pd
4
+ import datetime
5
+ import sys
6
+
7
+ from multiprocessing import Pool
8
+
9
+ def get_labeled_data_as_df(path):
10
+ trajectory_frames = []
11
+
12
+ labelfile = os.path.join(path, "labels.txt")
13
+ _label_df = pd.read_csv(labelfile,sep="\t",header=0,names=["starttime", "endtime", "mode"],parse_dates=[0,1])
14
+ _label_df["startdate"] = _label_df["starttime"].dt.date
15
+ _label_startdate_set = set(_label_df["startdate"])
16
+
17
+ datapath = os.path.join(path, "Trajectory")
18
+ for file in os.listdir(datapath):
19
+ df = pd.read_csv(os.path.join(datapath,file),
20
+ sep=",",
21
+ header=None,
22
+ skiprows=6,
23
+ usecols=[0, 1, 3, 5, 6],
24
+ names=["lat", "lon", "altitude", "date", "time"])
25
+
26
+ df["datetime"] = pd.to_datetime(df['date'] + ' ' + df['time'])
27
+ date_of_traj = datetime.datetime.strptime(file[:8],"%Y%m%d").date()
28
+
29
+ if date_of_traj in _label_startdate_set:
30
+ labels_for_date = _label_df[_label_df["startdate"] == date_of_traj]
31
+
32
+ def is_in(trajrow):
33
+ for i, row in labels_for_date.iterrows():
34
+ if row["starttime"] <= trajrow["datetime"] <= row["endtime"]:
35
+ return row["mode"]
36
+
37
+ df["label"] = df.apply(is_in, axis=1)
38
+
39
+ trajectory_frames.append(df)
40
+ print("added", datapath, file)
41
+ return trajectory_frames
42
+
43
+ if __name__ == '__main__':
44
+ '''if len(sys.argv) < 2:
45
+ print("Usage: raw_data_loader.py /path/to/geolife/Data/")
46
+ exit(-1)'''
47
+ path = 'D:\Geolife Trajectories 1.3\Geolife Trajectories 1.3\Data'
48
+ traj_with_labels_paths = []
49
+ for file in os.listdir(path):
50
+ currfile = os.path.join(path, file)
51
+ if os.path.isdir(currfile):
52
+ if "labels.txt" not in os.listdir(currfile):
53
+ continue
54
+ traj_with_labels_paths.append(currfile)
55
+
56
+ with Pool(3) as p:
57
+ traj_frames = p.map(get_labeled_data_as_df, traj_with_labels_paths)
58
+
59
+ pickle.dump(traj_frames, open( "data/raw_labeled.pkl", "wb"))
requirements.txt ADDED
Binary file (36.9 kB). View file