diff --git a/data_utils.py b/data_utils.py index 7ce5742..86094ca 100644 --- a/data_utils.py +++ b/data_utils.py @@ -1,72 +1,120 @@ import numpy as np import torch import random def computeVelocity(coords, timestep=4): ''' Compute speed of pedestrian at each timestep given its coordinates during the sequence timestep = 4 seconds by default (observations each 10 frames with a 2.5 framerate) Returns velocity vector (vx, vy) ''' npos = coords.size()[0] #number of observed positions velocity = np.zeros((npos,2)) x = coords[:,0] y = coords[:,1] for i in range(1, npos): velocity[i,0] = (x[i]-x[i-1])/timestep velocity[i,1] = (y[i]-y[i-1])/timestep velocity = torch.from_numpy(velocity) return velocity def getTargets(coords): ''' returns a target array for given coordinates for supervised training ''' targets = coords[1:] return targets def flipPosition(traindata,method): ''' Flips given coordinates of all pedestrian in traindata method: 1: change signs of x position (vertical symmetry) 2: change signs of y position (horizontal symmetry) 3: change signs of x and y positions ''' traindata_new=traindata for p in range(0, len(traindata)): #loop of over each pedestrian #print(traindata[p].coords) if traindata[p].neighbors is None: continue #print('coords unflipped') #print(traindata[p].coords) if method == 1: #change signs of x position #print('befor') traindata[p].coords[:,0] = - traindata[p].coords[:,0] traindata[p].neighbors[:,2] = - traindata[p].neighbors[:,2] #print(traindata[p].coords) elif method == 2: #change signs of y position traindata[p].coords[:,1] = - traindata[p].coords[:,1] traindata[p].neighbors[:,3] = - traindata[p].neighbors[:,3] #coords[:,1] = -coords[:,1] else: #change both signs traindata[p].coords = - traindata[p].coords traindata[p].neighbors[:,2:4] = - traindata[p].neighbors[:,2:4] return traindata -def getCoords(velocity, timestep = 4): +def getCoords(velocity, init_coords, timestep = 4): ''' - Compute x and y coordinates given a velocity array + Compute x and y coordinates given a velocity array and the initial coordinates ''' - x_pos = velocity[:,0]*timestep - y_pos = velocity[:,1]*timestep + count = 0; + x_old = init_coords[:,0] + y_old = init_coords[:,1] + x_pos = [] + y_pos = [] + + for i in range(velocity.size()[0]): + + x_new = velocity[i,0]*timestep + x_old + y_new = velocity[i,1]*timestep + y_old + + x_pos += [x_new] + y_pos += [y_new] + + x_old = x_new + y_old = y_new + + + x_pos = torch.cat(x_pos,0) + y_pos = torch.cat(y_pos,0) return x_pos, y_pos + +def getOccupancyTensor(pedestrian): + ''' + Compute the occupancy map pooling (Alahi et al.) + pedestrian : pedestrian object + ''' + + current_frames = pedestrian.frames #frames in which the current pedestrian appears + ped_coords = pedestrian.coords + neighbors = pedestrian.neighbors + + if neighbors is None: + + occupancy = None + + else: + + neigh_frames = neighbors[:,0] #frames in which the neighbors appear + neigh_coords = neighbors[:,2:4] #coordinates of the neighbors + + occupancy = [] + for i, frame in enumerate(current_frames): + pooled_x = torch.sum(neigh_coords[(neigh_frames == frame).nonzero(),0]-ped_coords[i, 0]) + pooled_y = torch.sum(neigh_coords[(neigh_frames == frame).nonzero(),1]-ped_coords[i, 1]) + + occupancy += [(pooled_x, pooled_y)] + + occupancy = torch.cat(occupancy, 0) + + return occupancy diff --git a/lstm.py b/lstm.py index 39b2970..b3b3578 100644 --- a/lstm.py +++ b/lstm.py @@ -1,77 +1,67 @@ import torch import torch.nn as nn from torch.autograd import Variable class LSTMmodel(nn.Module): ''' Class representing the LSTM model ''' def __init__(self): super(LSTMmodel,self).__init__() # Store required sizes self.hidden_size = 128 #self.grid_size = args.grid_size self.embedding_size = 64 #self.pooling_size = args.pooling_size #pooling window self.input_size = 2 self.output_size = 2 #parameters of bivariate distribution #self.neighborhood_size = args.neighborhood_size # The LSTM cell. (Social LSTM) embedding size = 64 self.lstm= nn.LSTM(self.embedding_size, self.hidden_size, dropout = 0.2) # Linear layer to embed the input position into LSTM self.input_embedding_layer = nn.Linear(self.input_size, self.embedding_size) # Linear layer to embed the social tensor #self.tensor_embedding_layer = nn.Linear(self.neighborhood_size*self.neighborhood_size*self.hidden_size, self.embedding_size) # Linear layer to map the hidden state of LSTM to output self.output_layer = nn.Linear(self.hidden_size, self.output_size) # ReLU and dropout unit self.relu = nn.ReLU() #self.dropout = nn.Dropout(0.5) def forward(self, peds, future = 0): ''' Forward pass for the model params: peds: coordinates of pedestrians in frame hidden_states: Hidden states of the pedestrians cell_states: Cell states of the peds returns: weigths: Outputs corresponding to bivariate Gaussian distributions hidden_states cell_states ''' outputs = [] hidden_states = Variable(torch.zeros(1,1,self.hidden_size)) cell_states = Variable(torch.zeros(1,1,self.hidden_size)) #input = self.dropout(self.relu(self.input_embedding_layer(peds))) input = self.relu(self.input_embedding_layer(peds)) h_peds, c_peds = self.lstm(input, (hidden_states, cell_states)) output = self.output_layer(h_peds) outputs += [output] - print(output.size()) + for i in range(future): #predict future new_out = self.relu(self.input_embedding_layer(output)) h_peds, c_peds = self.lstm(new_out, (hidden_states, cell_states)) output = self.output_layer(h_peds) - print(output.size()) outputs += [output] outputs = torch.cat(outputs, 0) return outputs - - - - - - - #weigths = torch.cat((weights,output),0) - - return output, h_peds, c_peds diff --git a/lstm2.py b/lstm2.py deleted file mode 100644 index 0c17a40..0000000 --- a/lstm2.py +++ /dev/null @@ -1,35 +0,0 @@ -import torch -import torch.nn as nn -from torch.autograd import Variable - -class LSTMmodel(nn.Module): - ''' - Class representing the LSTM model - ''' - def __init__(self): - super(LSTMmodel,self).__init__() - - #self.inputlinear = nn.Linear(1, 64) - self.lstm1 = nn.LSTMCell(1, 128) - self.lstm2 = nn.LSTMCell(128, 128) - self.outputlinear = nn.Linear(128, 1) - - def forward(self, input, future = 0): - outputs = [] - h_t = torch.zeros(input.size(0), 128, dtype=torch.double) - c_t = torch.zeros(input.size(0), 128, dtype=torch.double) - h_t2 = torch.zeros(input.size(0), 128, dtype=torch.double) - c_t2 = torch.zeros(input.size(0), 128, dtype=torch.double) - - for i, input_t in enumerate(input.chunk(input.size(1), dim=1)): - h_t, c_t = self.lstm1(input_t, (h_t, c_t)) - h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) - output = self.linear(h_t2) - outputs += [output] - for i in range(future):# if we should predict the future - h_t, c_t = self.lstm1(output, (h_t, c_t)) - h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) - output = self.linear(h_t2) - outputs += [output] - outputs = torch.stack(outputs, 1).squeeze(2) - return outputs diff --git a/lstm.py b/sociallstm.py similarity index 65% copy from lstm.py copy to sociallstm.py index 39b2970..0346855 100644 --- a/lstm.py +++ b/sociallstm.py @@ -1,77 +1,68 @@ import torch import torch.nn as nn from torch.autograd import Variable +import project.data_utils as du -class LSTMmodel(nn.Module): +class SocialLSTM(nn.Module): ''' - Class representing the LSTM model + Class representing the Social LSTM model ''' def __init__(self): - super(LSTMmodel,self).__init__() + super(SocialLSTM,self).__init__() # Store required sizes self.hidden_size = 128 #self.grid_size = args.grid_size self.embedding_size = 64 #self.pooling_size = args.pooling_size #pooling window self.input_size = 2 self.output_size = 2 #parameters of bivariate distribution #self.neighborhood_size = args.neighborhood_size # The LSTM cell. (Social LSTM) embedding size = 64 - self.lstm= nn.LSTM(self.embedding_size, self.hidden_size, dropout = 0.2) - + self.lstm= nn.LSTM(2*self.embedding_size, self.hidden_size, dropout = 0.2) + self.lstm2 = nn.LSTM(self.embedding_size, self.hidden_size, dropout = 0.2) # Linear layer to embed the input position into LSTM self.input_embedding_layer = nn.Linear(self.input_size, self.embedding_size) # Linear layer to embed the social tensor #self.tensor_embedding_layer = nn.Linear(self.neighborhood_size*self.neighborhood_size*self.hidden_size, self.embedding_size) # Linear layer to map the hidden state of LSTM to output self.output_layer = nn.Linear(self.hidden_size, self.output_size) # ReLU and dropout unit self.relu = nn.ReLU() #self.dropout = nn.Dropout(0.5) - def forward(self, peds, future = 0): + def forward(self, peds, social_tensor, future = 0): ''' Forward pass for the model params: - peds: coordinates of pedestrians in frame - hidden_states: Hidden states of the pedestrians - cell_states: Cell states of the peds - - returns: - weigths: Outputs corresponding to bivariate Gaussian distributions - hidden_states - cell_states + peds: pedestrian coords ''' outputs = [] hidden_states = Variable(torch.zeros(1,1,self.hidden_size)) cell_states = Variable(torch.zeros(1,1,self.hidden_size)) - #input = self.dropout(self.relu(self.input_embedding_layer(peds))) - input = self.relu(self.input_embedding_layer(peds)) - h_peds, c_peds = self.lstm(input, (hidden_states, cell_states)) + + if social_tensor is None: + h_peds, c_peds = self.lstm2(input, (hidden_states, cell_states)) + else: + social_embed = self.relu(self.input_embedding_layer(social_tensor)) + concat_embed = torch.cat((input,social_embed),0) + h_peds, c_peds = self.lstm(concat_embed, (hidden_states, cell_states)) + output = self.output_layer(h_peds) + outputs += [output] - print(output.size()) + for i in range(future): #predict future new_out = self.relu(self.input_embedding_layer(output)) - h_peds, c_peds = self.lstm(new_out, (hidden_states, cell_states)) + #concat_embed = torch.cat((new_out,social_embed),0) + h_peds, c_peds = self.lstm2(new_out, (hidden_states, cell_states)) output = self.output_layer(h_peds) - print(output.size()) outputs += [output] outputs = torch.cat(outputs, 0) return outputs - - - - - - - #weigths = torch.cat((weights,output),0) - - return output, h_peds, c_peds