xref: /OK3568_Linux_fs/external/rknn-toolkit2/examples/onnx/yolov5/test.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1import os
2import urllib
3import traceback
4import time
5import sys
6import numpy as np
7import cv2
8from rknn.api import RKNN
9
10ONNX_MODEL = 'yolov5s.onnx'
11RKNN_MODEL = 'yolov5s.rknn'
12IMG_PATH = './bus.jpg'
13DATASET = './dataset.txt'
14
15QUANTIZE_ON = True
16
17OBJ_THRESH = 0.25
18NMS_THRESH = 0.45
19IMG_SIZE = 640
20
21CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
22           "fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
23           "bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
24           "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
25           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
26           "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop	", "mouse	", "remote ", "keyboard ", "cell phone", "microwave ",
27           "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")
28
29
30def sigmoid(x):
31    return 1 / (1 + np.exp(-x))
32
33
34def xywh2xyxy(x):
35    # Convert [x, y, w, h] to [x1, y1, x2, y2]
36    y = np.copy(x)
37    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
38    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
39    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
40    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
41    return y
42
43
44def process(input, mask, anchors):
45
46    anchors = [anchors[i] for i in mask]
47    grid_h, grid_w = map(int, input.shape[0:2])
48
49    box_confidence = sigmoid(input[..., 4])
50    box_confidence = np.expand_dims(box_confidence, axis=-1)
51
52    box_class_probs = sigmoid(input[..., 5:])
53
54    box_xy = sigmoid(input[..., :2])*2 - 0.5
55
56    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
57    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
58    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
59    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
60    grid = np.concatenate((col, row), axis=-1)
61    box_xy += grid
62    box_xy *= int(IMG_SIZE/grid_h)
63
64    box_wh = pow(sigmoid(input[..., 2:4])*2, 2)
65    box_wh = box_wh * anchors
66
67    box = np.concatenate((box_xy, box_wh), axis=-1)
68
69    return box, box_confidence, box_class_probs
70
71
72def filter_boxes(boxes, box_confidences, box_class_probs):
73    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!
74
75    # Arguments
76        boxes: ndarray, boxes of objects.
77        box_confidences: ndarray, confidences of objects.
78        box_class_probs: ndarray, class_probs of objects.
79
80    # Returns
81        boxes: ndarray, filtered boxes.
82        classes: ndarray, classes for boxes.
83        scores: ndarray, scores for boxes.
84    """
85    boxes = boxes.reshape(-1, 4)
86    box_confidences = box_confidences.reshape(-1)
87    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
88
89    _box_pos = np.where(box_confidences >= OBJ_THRESH)
90    boxes = boxes[_box_pos]
91    box_confidences = box_confidences[_box_pos]
92    box_class_probs = box_class_probs[_box_pos]
93
94    class_max_score = np.max(box_class_probs, axis=-1)
95    classes = np.argmax(box_class_probs, axis=-1)
96    _class_pos = np.where(class_max_score >= OBJ_THRESH)
97
98    boxes = boxes[_class_pos]
99    classes = classes[_class_pos]
100    scores = (class_max_score* box_confidences)[_class_pos]
101
102    return boxes, classes, scores
103
104
105def nms_boxes(boxes, scores):
106    """Suppress non-maximal boxes.
107
108    # Arguments
109        boxes: ndarray, boxes of objects.
110        scores: ndarray, scores of objects.
111
112    # Returns
113        keep: ndarray, index of effective boxes.
114    """
115    x = boxes[:, 0]
116    y = boxes[:, 1]
117    w = boxes[:, 2] - boxes[:, 0]
118    h = boxes[:, 3] - boxes[:, 1]
119
120    areas = w * h
121    order = scores.argsort()[::-1]
122
123    keep = []
124    while order.size > 0:
125        i = order[0]
126        keep.append(i)
127
128        xx1 = np.maximum(x[i], x[order[1:]])
129        yy1 = np.maximum(y[i], y[order[1:]])
130        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
131        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
132
133        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
134        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
135        inter = w1 * h1
136
137        ovr = inter / (areas[i] + areas[order[1:]] - inter)
138        inds = np.where(ovr <= NMS_THRESH)[0]
139        order = order[inds + 1]
140    keep = np.array(keep)
141    return keep
142
143
144def yolov5_post_process(input_data):
145    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
146    anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
147               [59, 119], [116, 90], [156, 198], [373, 326]]
148
149    boxes, classes, scores = [], [], []
150    for input, mask in zip(input_data, masks):
151        b, c, s = process(input, mask, anchors)
152        b, c, s = filter_boxes(b, c, s)
153        boxes.append(b)
154        classes.append(c)
155        scores.append(s)
156
157    boxes = np.concatenate(boxes)
158    boxes = xywh2xyxy(boxes)
159    classes = np.concatenate(classes)
160    scores = np.concatenate(scores)
161
162    nboxes, nclasses, nscores = [], [], []
163    for c in set(classes):
164        inds = np.where(classes == c)
165        b = boxes[inds]
166        c = classes[inds]
167        s = scores[inds]
168
169        keep = nms_boxes(b, s)
170
171        nboxes.append(b[keep])
172        nclasses.append(c[keep])
173        nscores.append(s[keep])
174
175    if not nclasses and not nscores:
176        return None, None, None
177
178    boxes = np.concatenate(nboxes)
179    classes = np.concatenate(nclasses)
180    scores = np.concatenate(nscores)
181
182    return boxes, classes, scores
183
184
185def draw(image, boxes, scores, classes):
186    """Draw the boxes on the image.
187
188    # Argument:
189        image: original image.
190        boxes: ndarray, boxes of objects.
191        classes: ndarray, classes of objects.
192        scores: ndarray, scores of objects.
193        all_classes: all classes name.
194    """
195    for box, score, cl in zip(boxes, scores, classes):
196        top, left, right, bottom = box
197        print('class: {}, score: {}'.format(CLASSES[cl], score))
198        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
199        top = int(top)
200        left = int(left)
201        right = int(right)
202        bottom = int(bottom)
203
204        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
205        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
206                    (top, left - 6),
207                    cv2.FONT_HERSHEY_SIMPLEX,
208                    0.6, (0, 0, 255), 2)
209
210
211def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
212    # Resize and pad image while meeting stride-multiple constraints
213    shape = im.shape[:2]  # current shape [height, width]
214    if isinstance(new_shape, int):
215        new_shape = (new_shape, new_shape)
216
217    # Scale ratio (new / old)
218    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
219
220    # Compute padding
221    ratio = r, r  # width, height ratios
222    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
223    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
224
225    dw /= 2  # divide padding into 2 sides
226    dh /= 2
227
228    if shape[::-1] != new_unpad:  # resize
229        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
230    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
231    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
232    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
233    return im, ratio, (dw, dh)
234
235
236if __name__ == '__main__':
237
238    # Create RKNN object
239    rknn = RKNN(verbose=True)
240
241    # pre-process config
242    print('--> Config model')
243    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]])
244    print('done')
245
246    # Load ONNX model
247    print('--> Loading model')
248    ret = rknn.load_onnx(model=ONNX_MODEL)
249    if ret != 0:
250        print('Load model failed!')
251        exit(ret)
252    print('done')
253
254    # Build model
255    print('--> Building model')
256    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
257    if ret != 0:
258        print('Build model failed!')
259        exit(ret)
260    print('done')
261
262    # Export RKNN model
263    print('--> Export rknn model')
264    ret = rknn.export_rknn(RKNN_MODEL)
265    if ret != 0:
266        print('Export rknn model failed!')
267        exit(ret)
268    print('done')
269
270    # Init runtime environment
271    print('--> Init runtime environment')
272    ret = rknn.init_runtime()
273    # ret = rknn.init_runtime('rk3566')
274    if ret != 0:
275        print('Init runtime environment failed!')
276        exit(ret)
277    print('done')
278
279    # Set inputs
280    img = cv2.imread(IMG_PATH)
281    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
282    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
283    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
284
285    # Inference
286    print('--> Running model')
287    outputs = rknn.inference(inputs=[img])
288    np.save('./onnx_yolov5_0.npy', outputs[0])
289    np.save('./onnx_yolov5_1.npy', outputs[1])
290    np.save('./onnx_yolov5_2.npy', outputs[2])
291    print('done')
292
293    # post process
294    input0_data = outputs[0]
295    input1_data = outputs[1]
296    input2_data = outputs[2]
297
298    input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))
299    input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))
300    input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))
301
302    input_data = list()
303    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
304    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
305    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))
306
307    boxes, classes, scores = yolov5_post_process(input_data)
308
309    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
310    if boxes is not None:
311        draw(img_1, boxes, scores, classes)
312        cv2.imwrite('result.jpg', img_1)
313
314    rknn.release()
315