物体检测

计算机视觉中目标检测、跟踪、识别是最基本的几个task,尤其又以检测最为重要和基础。同时基本上所有的检测任务都需要在给出物体的bounding box之后,给出物体的类别(也就是给识别物体)。

其思想就是在目标区域用bounding box随机框出一些区域,验证该区域是否有可以检测到的物体,并在一次次的纠错训练中找到物体真正的所在位置。以下是针对灵活使用bounding box的一些练习。

练习一

1、为图片添加3*3个bounding box,并在每个bounding box 里添加3个bounding box。
2、在每个bounding box里随机嵌入更小的bounding box。

  • 优点:为整张图片均匀地加了bounding box,不存在漏掉哪部份的情况。
  • 缺点:bounding boxes 之间无重叠,相互独立,容易将完整的物体分割成几部分,容易破坏物体的完整性,加大了物体检测的难度。
In [25]:
import os
import sys
import cv2
import numpy as np
import IPython
#-------------------------------------------------------------------
def imshow(img):
    _,ret = cv2.imencode('.jpg', img)
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

#-------------------------------------------------------------------
def grid_boxes(n,m,width,height):
    h = width/n
    v = height/m
    boxes = []
    for i in range(m):
        for j in range(n):
            xmin = h * j
            ymin = v * i
            xmax = h *(j+1)
            ymax = v *(i+1)
            box = [int(xmin),int(ymin),int(xmax),int(ymax)]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def random_boxes(numbers,width,height):
    boxes = []
    for i in range(numbers):
        xmin = np.random.randint(width)
        ymin = np.random.randint(height)
        xmax = xmin + np.random.randint(100)*i
        ymax = ymin + np.random.randint(50)*2*i
        if xmax <= width and ymax <= height:
            box = [xmin,ymin,xmax,ymax]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def draw_boxes(boxes,img):
    for box in boxes:
        b = np.random.randint(10,255)
        g = np.random.randint(10,255)
        r = np.random.randint(10,255)
        img = cv2.rectangle(img,(box[0]+2,box[1]+2),(box[2]-2,box[3]-2),(b,g,r),2)
    return img

def scaling(boxes):
    scales = [1.0/8, 1.0/4, 0.3, 0.7, 0.4]
    boxes1 = []
    for box in boxes:
        boxes1.append(box)
        for s in scales:
            w = int((box[2] - box[0])*s)
            h = int((box[3] - box[1])*s)

            b =  [box[0] + w,
                  box[1] + h,
                  box[2] - w,
                  box[3] - h]
            boxes1.append(b)

    return boxes1

#-------------------------------------------------------------------
width = 800
height= 500
filename = "../test/images/shanghai.jpg"
img = cv2.imread(filename)
img = cv2.resize(img,(width,height))
boxes = random_boxes(100,width,height)
boxes = grid_boxes(3,3,width, height)

boxes = scaling(boxes)
img   = draw_boxes(boxes,img)
imshow(img)

练习二

吸取上一训练的经验,这个练习里我在图片的任意位置上随机添加任意大小、任意颜色的bounding box。

  • 优点:由于添加bounding box的位置以及bounding box的大小、方向都随机,提升了被检测物体的完整度,物体被检测到的几率更大。
  • 缺点:由于bounding boxes都是随机添加的,这些边框容易集中在某些固定的区域,从而漏掉一些的部分,所以仍需改善。
In [24]:
import os
import sys
import cv2
import numpy as np
import IPython

#-------------------------------------------------------------------
def imshow(img):
    _,ret = cv2.imencode('.jpg', img)
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

#-------------------------------------------------------------------
def grid_boxes(n,m,width,height):
    h = width/n
    v = height/m
    boxes = []
    for i in range(m):
        for j in range(n):
            xmin = h * j
            ymin = v * i
            xmax = h *(j+1)
            ymax = v *(i+1)
            box = [int(xmin),int(ymin),int(xmax),int(ymax)]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def random_boxes(numbers,width,height):
    boxes = []
    for i in range(numbers):
        xmin = np.random.randint(width)
        ymin = np.random.randint(height)
        xmax = xmin + np.random.randint(100)*i
        ymax = ymin + np.random.randint(50)*2*i
        if xmax <= width and ymax <= height:
            box = [xmin,ymin,xmax,ymax]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def draw_boxes(boxes,img):
    for box in boxes:
        b = np.random.randint(10,255)
        g = np.random.randint(10,255)
        r = np.random.randint(10,255)
        img = cv2.rectangle(img,(box[0]+2,box[1]+2),(box[2]-2,box[3]-2),(b,g,r),2)
    return img

def scaling(boxes):
    scales = [1.0/8, 1.0/4, 0.3, 0.7, 0.4]
    boxes1 = []
    for box in boxes:
        boxes1.append(box)
        for s in scales:
            w = int((box[2] - box[0])*s)
            h = int((box[3] - box[1])*s)

            b =  [box[0] + w,
                  box[1] + h,
                  box[2] - w,
                  box[3] - h]
            boxes1.append(b)

    return boxes1

def aspect_ratio(boxes,width,height):
    scales = [1.0/8, 1.0/4]
    boxes1 = []
    for box in boxes:
        boxes1.append(box)
        for s in scales:
            w = int((box[2] - box[0])*s)
            h = int((box[3] - box[1])*s)

            X1 = max(box[0] + w, 0)
            Y1 = max(box[1] - h, 0)
            X2 = min(box[2] - w, width)
            Y2 = min(box[3] + h, height)

            b = [X1,Y1,X2,Y2]

            x1 = max(box[0] - w, 0)
            y1 = max(box[1] + h, 0)
            x2 = min(box[2] + w, width)
            y2 = min(box[3] - h, height)

            b1 = [x1,y1,x2,y2]

            boxes1.append(b)
            boxes1.append(b1)

    return boxes1

#-------------------------------------------------------------------
width = 800
height= 500
filename = "../test/images/shanghai.jpg"
img = cv2.imread(filename)
img = cv2.resize(img,(width,height))
boxes = random_boxes(100,width,height)
# boxes = grid_boxes(3,3,width, height)
boxes1 = scaling(boxes)
boxes2 = aspect_ratio(boxes,width,height)
boxes  = boxes1 + boxes2

img   = draw_boxes(boxes,img)
imshow(img)

练习三

1、结合练习一和练习二,首先为图片整体一层一层添加可重叠的边框,从横向纵向来说考虑到每个位置,没有漏掉哪部份。
2、再结合练习二,在任意位置随机添加大小不一的边框,更能提升检测率。

In [45]:
import os
import sys
import cv2
import numpy as np
import IPython

#-------------------------------------------------------------------
def imshow(img):
    _,ret = cv2.imencode('.jpg', img)
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

#-------------------------------------------------------------------
def sliding(img,w,h,stride_w,stride_h):
    boxes = []
    width = img.shape[1]
    height= img.shape[0]

    for i in range(0,height-h,stride_h):
        for j in range(0,width-w,stride_w):
            xmin = j
            ymin = i
            xmax = w + j
            ymax = h + i
            box = [int(xmin),int(ymin),int(xmax),int(ymax)]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def random_boxes(numbers,width,height):
    boxes = []
    for i in range(numbers):
        xmin = np.random.randint(width)
        ymin = np.random.randint(height)
        xmax = xmin + np.random.randint(100)*i
        ymax = ymin + np.random.randint(50)*2*i
        if xmax <= width and ymax <= height:
            box = [xmin,ymin,xmax,ymax]
            boxes.append(box)
    return boxes

#-------------------------------------------------------------------
def draw_boxes(boxes,img):
    for box in boxes:
        b = np.random.randint(10,255)
        g = np.random.randint(10,255)
        r = np.random.randint(10,255)
        img = cv2.rectangle(img,(box[0]+2,box[1]+2),(box[2]-2,box[3]-2),(b,g,r),2)
    return img

def scaling(boxes):
    scales = [1.0/8, 1.0/4, 0.3, 0.7, 0.4]
    boxes1 = []
    for box in boxes:
        boxes1.append(box)
        for s in scales:
            w = int((box[2] - box[0])*s)
            h = int((box[3] - box[1])*s)

            b =  [box[0] + w,
                  box[1] + h,
                  box[2] - w,
                  box[3] - h]
            boxes1.append(b)

    return boxes1

def aspect_ratio(boxes,width,height):
    scales = [1.0/8, 1.0/4]
    boxes1 = []
    for box in boxes:
        boxes1.append(box)
        for s in scales:
            w = int((box[2] - box[0])*s)
            h = int((box[3] - box[1])*s)

            X1 = max(box[0] + w, 0)
            Y1 = max(box[1] - h, 0)
            X2 = min(box[2] - w, width)
            Y2 = min(box[3] + h, height)

            b = [X1,Y1,X2,Y2]

            x1 = max(box[0] - w, 0)
            y1 = max(box[1] + h, 0)
            x2 = min(box[2] + w, width)
            y2 = min(box[3] - h, height)

            b1 = [x1,y1,x2,y2]

            boxes1.append(b)
            boxes1.append(b1)

    return boxes1

#-------------------------------------------------------------------
width = 800
height= 500
filename = "../test/images/shanghai.jpg"
img = cv2.imread(filename)
img = cv2.resize(img,(width,height))
# boxes = random_boxes(100,width,height)
boxes = sliding(img,200,100,50,20)
boxes1 = scaling(boxes)
boxes2 = aspect_ratio(boxes,width,height)
boxes  = boxes1 + boxes2

img   = draw_boxes(boxes[:60],img)
imshow(img)