-
Notifications
You must be signed in to change notification settings - Fork 63
/
social_utils.py
244 lines (206 loc) · 9.8 KB
/
social_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import os
import pickle
import numpy as np
import ipdb
# The data loader class that loads data from the datasets considering
# each frame as a datapoint and a sequence of consecutive frames as the
# sequence.
class SocialDataLoader():
def __init__(self, batch_size=50, seq_length=5, maxNumPeds=40, datasets=[0, 1, 2, 3, 4], forcePreProcess=False):
'''
Initialiser function for the SocialDataLoader class
params:
batch_size : Size of the mini-batch
grid_size : Size of the social grid constructed
forcePreProcess : Flag to forcefully preprocess the data again from csv files
'''
# List of data directories where raw data resides
# self.data_dirs = ['./data/eth/univ', './data/eth/hotel',
# './data/ucy/zara/zara01', './data/ucy/zara/zara02',
# './data/ucy/univ']
self.data_dirs = ['./data/eth/univ', './data/eth/hotel']
self.used_data_dirs = [self.data_dirs[x] for x in datasets]
# Number of datasets
self.numDatasets = len(self.data_dirs)
# Data directory where the pre-processed pickle file resides
self.data_dir = './data'
# Maximum number of peds in a single frame (Number obtained by checking the datasets)
self.maxNumPeds = maxNumPeds
# Store the arguments
self.batch_size = batch_size
self.seq_length = seq_length
# Define the path in which the process data would be stored
data_file = os.path.join(self.data_dir, "social-trajectories.cpkl")
# If the file doesn't exist or forcePreProcess is true
if not(os.path.exists(data_file)) or forcePreProcess:
print("Creating pre-processed data from raw data")
# Preprocess the data from the csv files of the datasets
# Note that this data is processed in frames
self.frame_preprocess(self.used_data_dirs, data_file)
# Load the processed data from the pickle file
self.load_preprocessed(data_file)
# Reset all the data pointers of the dataloader object
self.reset_batch_pointer()
def frame_preprocess(self, data_dirs, data_file):
'''
Function that will pre-process the pixel_pos.csv files of each dataset
into data with occupancy grid that can be used
params:
data_dirs : List of directories where raw data resides
data_file : The file into which all the pre-processed data needs to be stored
'''
# all_frame_data would be a list of numpy arrays corresponding to each dataset
# Each numpy array would be of size (numFrames, maxNumPeds, 3) where each pedestrian's
# pedId, x, y , in each frame is stored
all_frame_data = []
# frameList_data would be a list of lists corresponding to each dataset
# Each list would contain the frameIds of all the frames in the dataset
frameList_data = []
# numPeds_data would be a list of lists corresponding to each dataset
# Ech list would contain the number of pedestrians in each frame in the dataset
numPeds_data = []
# Index of the current dataset
dataset_index = 0
# For each dataset
for directory in data_dirs:
# Define path of the csv file of the current dataset
file_path = os.path.join(directory, 'pixel_pos.csv')
# Load the data from the csv file
data = np.genfromtxt(file_path, delimiter=',')
# Frame IDs of the frames in the current dataset
frameList = np.unique(data[0, :]).tolist()
# Number of frames
numFrames = len(frameList)
# Add the list of frameIDs to the frameList_data
frameList_data.append(frameList)
# Initialize the list of numPeds for the current dataset
numPeds_data.append([])
# Initialize the numpy array for the current dataset
all_frame_data.append(np.zeros((numFrames, self.maxNumPeds, 3)))
# index to maintain the current frame
curr_frame = 0
for frame in frameList:
# Extract all pedestrians in current frame
pedsInFrame = data[:, data[0, :] == frame]
# Extract peds list
pedsList = pedsInFrame[1, :].tolist()
# Helper print statement to figure out the maximum number of peds in any frame in any dataset
# if len(pedsList) > 1:
# print len(pedsList)
# DEBUG
# continue
# Add number of peds in the current frame to the stored data
numPeds_data[dataset_index].append(len(pedsList))
# Initialize the row of the numpy array
pedsWithPos = []
# For each ped in the current frame
for ped in pedsList:
# Extract their x and y positions
current_x = pedsInFrame[3, pedsInFrame[1, :] == ped][0]
current_y = pedsInFrame[2, pedsInFrame[1, :] == ped][0]
# Add their pedID, x, y to the row of the numpy array
pedsWithPos.append([ped, current_x, current_y])
# Add the details of all the peds in the current frame to all_frame_data
all_frame_data[dataset_index][curr_frame, 0:len(pedsList), :] = np.array(pedsWithPos)
# Increment the frame index
curr_frame += 1
# Increment the dataset index
dataset_index += 1
# Save the tuple (all_frame_data, frameList_data, numPeds_data) in the pickle file
f = open(data_file, "wb")
pickle.dump((all_frame_data, frameList_data, numPeds_data), f, protocol=2)
f.close()
def load_preprocessed(self, data_file):
'''
Function to load the pre-processed data into the DataLoader object
params:
data_file : the path to the pickled data file
'''
# Load data from the pickled file
f = open(data_file, 'rb')
self.raw_data = pickle.load(f)
f.close()
# Get all the data from the pickle file
self.data = self.raw_data[0]
self.frameList = self.raw_data[1]
self.numPedsList = self.raw_data[2]
counter = 0
# For each dataset
for dataset in range(len(self.data)):
# get the frame data for the current dataset
all_frame_data = self.data[dataset]
print len(all_frame_data)
# Increment the counter with the number of sequences in the current dataset
counter += int(len(all_frame_data) / (self.seq_length+2))
# Calculate the number of batches
self.num_batches = int(counter/self.batch_size)
def next_batch(self):
'''
Function to get the next batch of points
'''
# Source data
x_batch = []
# Target data
y_batch = []
# Dataset data
d = []
# Iteration index
i = 0
while i < self.batch_size:
# Extract the frame data of the current dataset
frame_data = self.data[self.dataset_pointer]
# Get the frame pointer for the current dataset
idx = self.frame_pointer
# While there is still seq_length number of frames left in the current dataset
if idx + self.seq_length < frame_data.shape[0]:
# All the data in this sequence
seq_frame_data = frame_data[idx:idx+self.seq_length+1, :]
seq_source_frame_data = frame_data[idx:idx+self.seq_length, :]
seq_target_frame_data = frame_data[idx+1:idx+self.seq_length+1, :]
# Number of unique peds in this sequence of frames
pedID_list = np.unique(seq_frame_data[:, :, 0])
numUniquePeds = pedID_list.shape[0]
sourceData = np.zeros((self.seq_length, self.maxNumPeds, 3))
targetData = np.zeros((self.seq_length, self.maxNumPeds, 3))
for seq in range(self.seq_length):
sseq_frame_data = seq_source_frame_data[seq, :]
tseq_frame_data = seq_target_frame_data[seq, :]
for ped in range(numUniquePeds):
pedID = pedID_list[ped]
if pedID == 0:
continue
else:
sped = sseq_frame_data[sseq_frame_data[:, 0] == pedID, :]
tped = np.squeeze(tseq_frame_data[tseq_frame_data[:, 0] == pedID, :])
if sped.size != 0:
sourceData[seq, ped, :] = sped
if tped.size != 0:
targetData[seq, ped, :] = tped
x_batch.append(sourceData)
y_batch.append(targetData)
self.frame_pointer += self.seq_length
d.append(self.dataset_pointer)
i += 1
else:
# Not enough frames left
# Increment the dataset pointer and set the frame_pointer to zero
self.tick_batch_pointer()
return x_batch, y_batch, d
def tick_batch_pointer(self):
'''
Advance the dataset pointer
'''
# Go to the next dataset
self.dataset_pointer += 1
# Set the frame pointer to zero for the current dataset
self.frame_pointer = 0
# If all datasets are done, then go to the first one again
if self.dataset_pointer >= len(self.data):
self.dataset_pointer = 0
def reset_batch_pointer(self):
'''
Reset all pointers
'''
# Go to the first frame of the first dataset
self.dataset_pointer = 0
self.frame_pointer = 0