perf(loss): 批量处理

zjZSTU · zjZSTU · commit 7ee913f4a07e · 2020-04-22T13:39:50.000+08:00
diff --git a/py/lib/models/multi_part_loss.py b/py/lib/models/multi_part_loss.py
@@ -10,6 +10,11 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from torch.utils.data import DataLoader
+import torchvision.transforms as transforms
+
+from models.location_dataset import LocationDataset
+from models.yolo_v1 import YOLO_v1
 
 
 class MultiPartLoss(nn.Module):
@@ -32,87 +37,135 @@ def forward(self, preds, targets):
         :param targets: (N, S*S, (B*5+C))
         :return:
         """
-        # ## 预测
-        # # 提取每个网格的分类概率
-        # pred_probs = preds[-1, :self.S * self.S * self.C].reshape(-1, self.S, self.S, self.C)
-        # # 提取每个网格的置信度
-        # pred_confidences = preds[-1, self.S * self.S * self.C: self.S * self.S * (self.B + self.C)] \
-        #     .reshape(-1, self.S, self.S, self.B)
-        # # 提取每个网格的预测边界框坐标
-        # pred_bboxs = preds[-1, self.S * self.S * (self.B + self.C): self.S * self.S * (self.B * 5 + self.C)] \
-        #     .reshape(-1, self.S, self.S, 4)
+        N = preds.shape[0]
+        ## 预测
+        # 提取每个网格的分类概率
+        # [N, S*S, C] -> [N*S*S, C]
+        pred_probs = preds[:, :, :self.C].reshape(-1, self.C)
+        # 提取每个网格的置信度
+        # [N, S*S, B] -> [N*S*S, B]
+        pred_confidences = preds[:, :, self.C: (self.B + self.C)].reshape(-1, self.B)
+        # 提取每个网格的预测边界框坐标
+        # [N, S*S, B*4] -> [N*S*S, B*4] -> [N*S*S, B, 4]
+        pred_bboxs = preds[:, :, (self.B + self.C): (self.B * 5 + self.C)] \
+            .reshape(-1, self.B * 4) \
+            .reshape(-1, self.B, 4)
+
+        ## 目标
+        # 提取每个网格的分类概率
+        # [N, S*S, C] -> [N*S*S, C]
+        target_probs = targets[:, :, :self.C].reshape(-1, self.C)
+        # 提取每个网格的置信度
+        # [N, S*S, B] -> [N*S*S, B]
+        target_confidences = targets[:, :, self.C: (self.B + self.C)].reshape(-1, self.B)
+        # 提取每个网格的边界框坐标
+        # [N, S*S, B*4] -> [N*S*S, B*4] -> [N*S*S, B, 4]
+        target_bboxs = targets[:, :, (self.B + self.C): (self.B * 5 + self.C)] \
+            .reshape(-1, self.B * 4) \
+            .reshape(-1, self.B, 4)
+
+        ## 首先计算所有边界框的置信度损失（假定不存在obj）
+        loss = self.noobj * self.sum_squared_error(pred_confidences, target_confidences)
+
+        # 选取每个网格中置信度最高的边界框
+        top_idxs = torch.argmax(pred_confidences, dim=1)
+        top_len = len(top_idxs)
+        # 获取相应的置信度以及边界框
+        top_pred_confidences = pred_confidences[range(top_len), top_idxs]
+        top_pred_bboxs = pred_bboxs[range(top_len), top_idxs]
+
+        top_target_confidences = target_confidences[range(top_len), top_idxs]
+        top_target_bboxs = target_bboxs[range(top_len), top_idxs]
+        print(top_pred_confidences.shape)
+        print(top_pred_bboxs.shape)
+
+        # 选取存在目标的网格
+        obj_idxs = torch.sum(target_probs, dim=1) == 1
+        print(obj_idxs)
+
+        obj_pred_confidences = top_pred_confidences[obj_idxs]
+        obj_pred_bboxs = top_pred_bboxs[obj_idxs]
+        obj_pred_probs = pred_probs[obj_idxs]
+
+        obj_target_confidences = top_target_confidences[obj_idxs]
+        obj_target_bboxs = top_target_bboxs[obj_idxs]
+        obj_target_probs = target_probs[obj_idxs]
+
+        ## 计算目标边界框的置信度损失
+        loss += (1 - self.noobj) * self.sum_squared_error(obj_pred_confidences, obj_target_confidences)
+        ## 计算分类概率损失
+        loss += self.sum_squared_error(obj_pred_probs, obj_target_probs)
+        ## 计算边界框坐标损失
+        loss += self.sum_squared_error(obj_pred_bboxs[:, :2], obj_target_bboxs[:, :2])
+        loss += self.sum_squared_error(torch.sqrt(obj_pred_bboxs[:, 2:]), torch.sqrt(obj_target_bboxs[:, 2:]))
+
+        return loss / N
+
+        # N = preds.shape[0]
+        # total_loss = 0.0
+        # print(preds.shape)
+        # print(targets.shape)
+        # for pred, target in zip(preds, targets):
+        #     """
+        #     逐个图像计算
+        #     pred: [S*S, (B*5+C)]
+        #     target: [S*S, (B*5+C)]
+        #     """
+        #     # 分类概率
+        #     pred_probs = pred[:, :self.C]
+        #     target_probs = target[:, :self.C]
+        #     # 置信度
+        #     pred_confidences = pred[:, self.C:(self.C + self.B)]
+        #     target_confidences = target[:, self.C:(self.C + self.B)]
+        #     # 边界框坐标
+        #     pred_bboxs = pred[:, (self.C + self.B):]
+        #     target_bboxs = target[:, (self.C + self.B):]
         #
-        # ## 目标
-        # # 每个网格的分类
-        # target_probs = targets[-1, :self.S * self.S].reshape(-1, self.S, self.S)
-        # # 置信度
-        # target_confidences = targets[-1, self.S * self.S: self.S * self.S * 2].reshape(-1, self.S, self.S)
-        # # 坐标
-        # target_bboxs = targets[-1, self.S * self.S * 2:self.S * self.S * 6].reshape(-1, self.S, self.S, 4)
+        #     for i in range(self.S * self.S):
+        #         """
+        #         逐个网格计算
+        #         """
+        #         pred_single_probs = pred_probs[i]
+        #         target_single_probs = target_probs[i]
         #
-        # # 图像中哪些网格包含了目标（根据分类判断）
-        # objs = torch.where(target_probs != -1)
-        # # 哪些不包含目标
-        # nobjs = torch.where(target_probs == -1)
+        #         pred_single_confidences = pred_confidences[i]
+        #         target_single_confidences = target_confidences[i]
         #
-        # ## 首先计算包含了分类的
-
-        N = preds.shape[0]
-        total_loss = 0.0
-        for pred, target in zip(preds, targets):
-            """
-            逐个图像计算
-            pred: [S*S, (B*5+C)]
-            target: [S*S, (B*5+C)]
-            """
-            # 分类概率
-            pred_probs = pred[:, :self.C]
-            target_probs = target[:, :self.C]
-            # 置信度
-            pred_confidences = pred[:, self.C:(self.C + self.B)]
-            target_confidences = target[:, self.C:(self.C + self.B)]
-            # 边界框坐标
-            pred_bboxs = pred[:, (self.C + self.B):]
-            target_bboxs = target[:, (self.C + self.B):]
-
-            for i in range(self.S * self.S):
-                """
-                逐个网格计算
-                """
-                pred_single_probs = pred_probs[i]
-                target_single_probs = target_probs[i]
-
-                pred_single_confidences = pred_confidences[i]
-                target_single_confidences = target_confidences[i]
-
-                pred_single_bboxs = pred_bboxs[i]
-                target_single_bboxs = target_bboxs[i]
-
-                # 是否存在置信度（如果存在，则target的置信度必然大于0）
-                is_obj = target_single_confidences[0] > 0
-                # 计算置信度损失 假定该网格不存在对象
-                total_loss += self.noobj * self.sum_squared_error(pred_single_confidences, target_single_confidences)
-                if is_obj:
-                    # 如果存在
-                    # 计算分类损失
-                    total_loss += self.sum_squared_error(pred_single_probs, target_single_probs)
-
-                    # 计算所有预测边界框和标注边界框的IoU
-                    pred_single_bboxs = pred_single_bboxs.reshape(-1, 4)
-                    target_single_bboxs = target_single_bboxs.reshape(-1, 4)
-
-                    scores = self.iou(pred_single_bboxs, target_single_bboxs)
-                    # 提取IoU最大的下标
-                    bbox_idx = torch.argmax(scores)
-                    # 计算置信度损失
-                    total_loss += (1 - self.noobj) * \
-                                  self.sum_squared_error(pred_single_confidences[bbox_idx],
-                                                         target_single_confidences[bbox_idx])
-                    # 计算边界框损失
-                    total_loss += self.coord * self.bbox_loss(pred_single_bboxs[bbox_idx].reshape(-1, 4),
-                                                              target_single_bboxs[bbox_idx].reshape(-1, 4))
-
-        return total_loss / N
+        #         pred_single_bboxs = pred_bboxs[i]
+        #         target_single_bboxs = target_bboxs[i]
+        #
+        #         # 是否存在置信度（如果存在，则target的置信度必然大于0）
+        #         is_obj = target_single_confidences[0] > 0
+        #         # 计算置信度损失 假定该网格不存在对象
+        #         total_loss += self.noobj * self.sum_squared_error(pred_single_confidences, target_single_confidences)
+        #         print(total_loss)
+        #         if is_obj:
+        #             print('i = %d' % (i))
+        #             # 如果存在
+        #             # 计算分类损失
+        #             total_loss += self.sum_squared_error(pred_single_probs, target_single_probs)
+        #             print(total_loss)
+        #
+        #             # 计算所有预测边界框和标注边界框的IoU
+        #             pred_single_bboxs = pred_single_bboxs.reshape(-1, 4)
+        #             target_single_bboxs = target_single_bboxs.reshape(-1, 4)
+        #
+        #             scores = self.iou(pred_single_bboxs, target_single_bboxs)
+        #             # 提取IoU最大的下标
+        #             bbox_idx = torch.argmax(scores)
+        #             # 计算置信度损失
+        #             total_loss += (1 - self.noobj) * \
+        #                           self.sum_squared_error(pred_single_confidences[bbox_idx],
+        #                                                  target_single_confidences[bbox_idx])
+        #             print(total_loss)
+        #             # 计算边界框损失
+        #             total_loss += self.coord * self.bbox_loss(pred_single_bboxs[bbox_idx].reshape(-1, 4),
+        #                                                       target_single_bboxs[bbox_idx].reshape(-1, 4))
+        #             print(total_loss)
+        #
+        #             print('done')
+        #
+        # return total_loss / N
 
     def sum_squared_error(self, preds, targets):
         return torch.sum((preds - targets) ** 2)
@@ -155,11 +208,42 @@ def iou(self, pred_boxs, target_boxs):
         return torch.from_numpy(scores)
 
 
-if __name__ == '__main__':
-    criterion = MultiPartLoss(S=7, B=2, C=3)
+def load_data(data_root_dir, cate_list, S=7, B=2, C=20):
+    transform = transforms.Compose([
+        transforms.ToPILImage(),
+        transforms.Resize((448, 448)),
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+
+    data_set = LocationDataset(data_root_dir, cate_list, transform=transform, S=S, B=B, C=C)
+    data_loader = DataLoader(data_set, batch_size=1, num_workers=8)
 
-    preds = torch.arange(637).reshape(1, 7 * 7, 13) * 0.01
-    targets = torch.ones((1, 7 * 7, 13)) * 0.01
+    return data_loader
 
-    loss = criterion.forward(preds, targets)
-    print(loss)
+
+if __name__ == '__main__':
+    S = 7
+    B = 2
+    C = 3
+    cate_list = ['cucumber', 'eggplant', 'mushroom']
+
+    criterion = MultiPartLoss(S=7, B=2, C=3)
+    # preds = torch.arange(637).reshape(1, 7 * 7, 13) * 0.01
+    # targets = torch.ones((1, 7 * 7, 13)) * 0.01
+    # loss = criterion(preds, targets)
+    # print(loss)
+    data_loader = load_data('../../data/location_dataset', cate_list, S=S, B=B, C=C)
+    model = YOLO_v1(S=S, B=B, C=C)
+
+    for inputs, labels in data_loader:
+        inputs = inputs
+        labels = labels
+        print(inputs.shape)
+        print(labels.shape)
+
+        with torch.set_grad_enabled(False):
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            print(loss)
+            exit(0)