diff --git a/model/net.py b/model/net.py index 7adefe6..93bbdb3 100644 --- a/model/net.py +++ b/model/net.py @@ -323,20 +323,21 @@ def finetune(self, dataloaders, dataset_size, log_dir, ckpt_path, epochs=10): rgbs = rgbs.float().to(self.device) # B, S, H, W, C rgbs = (2 * (rgbs / 255.0) - 1.0) # normalizing [-1 1] - trajs_g = trajs_g.permute(0, 2, 1, 3).to(self.device) - valids = visibs_g.permute(0, 2, 1).to(self.device) #B, S, N + trajs_g = trajs_g.permute(0, 2, 1, 3).to(self.device) #B, N, S, 2 format:(x,y) + valids = visibs_g.permute(0, 2, 1).to(self.device) #B, N, S B, S, H, W, C = rgbs.shape trajs_g[...,0] *= W - 1 trajs_g[...,1] *= H - 1 _, N, _, _ = trajs_g.shape - points_0 = trajs_g[:,0,:,:] # taking all points from frame 0 - # from (x, y) to (t, x, y) + points_0_xy = trajs_g[:,:,0,:] #B, N, 2 taking all points from frame 0 + # from (x, y) to (t, y, x) + points_0 = points_0_xy[:,:,[1,0]] #format:(y,x) # preparing the time dimension to be concatenated #points_0 = points_0.cpu().numpy() time_dim = torch.zeros((points_0.shape[0], points_0.shape[1], 1)).to(self.device) # prepending a column to be -> (B, N, 3) - points_0 = torch.concatenate((time_dim, points_0), axis=-1) + points_0 = torch.concatenate((time_dim, points_0), axis=-1) #format:(t, y, x) if phase == 'train': self.model.train() # Set model to training mode @@ -350,16 +351,19 @@ def finetune(self, dataloaders, dataset_size, log_dir, ckpt_path, epochs=10): outs, loss = self.model(video=rgbs, query_points=points_0, points_gt=trajs_g, visibs_gt=valids) - trajs_e = outs['tracks'] + trajs_e = outs['tracks'] #B, N, S, 2 format:(x,y) + occlusions = outs['occlusion'] + expected_dist = outs['expected_dist'] + visibs_e = (1 - F.sigmoid(occlusions)) * (1 - F.sigmoid(expected_dist)) > 0.5 trajs_e[...,0] /= W - 1 trajs_e[...,1] /= H - 1 trajs_g[...,0] /= W - 1 trajs_g[...,1] /= H - 1 - points_0[...,0] /= W - 1 - points_0[...,1] /= H - 1 + points_0_xy[...,0] /= W - 1 + points_0_xy[...,1] /= H - 1 #print(points_0.shape, trajs_g.shape, trajs_e.shape, visibs_g.shape) - outputs = evaluate.compute_metrics(points_0.cpu().numpy(), trajs_g.permute(0, 2, 1, 3).cpu().numpy(), - visibs_g.cpu().numpy(), trajs_e.detach().cpu().numpy(), visibs_g.cpu().numpy()) + outputs = evaluate.compute_metrics(points_0_xy.cpu().numpy(), trajs_g.cpu().numpy(), + valids.cpu().numpy(), trajs_e.detach().cpu().numpy(), visibs_e.cpu().numpy()) for key, value in outputs.items(): metrics_b[key] += value diff --git a/utils/trainer.py b/utils/trainer.py index 40f897a..3e8fd9f 100644 --- a/utils/trainer.py +++ b/utils/trainer.py @@ -17,7 +17,7 @@ def sequence_loss(flow_preds, flow_gt, vis, valids, gamma=0.8): flow_loss = 0.0 for i in range(n_predictions): i_weight = gamma**(n_predictions - i - 1) - flow_pred = flow_preds[i] + flow_pred = flow_preds[i].permute(0, 2, 1, 3) i_loss = (flow_pred - flow_gt).abs() # B,S,N,2 i_loss = torch.mean(i_loss, dim=3) # B,S,N #flow_loss += i_weight * basic.reduce_masked_mean(i_loss, valids)