Hi,
I just converted the mobilenet model 608x608 to TVM It is taking 0.2 seconds per frame and giving me 5 FPS on 1050 TI with full cuda cores usage. Time per frame: 0.20082473754882812 0.19168806076049805 0.19301962852478027 0.19402718544006348 0.1933760643005371 0.1952970027923584 0.2037806510925293 0.1919691562652588 0.20432472229003906 0.20802545547485352 0.20062041282653809
nvidia-smi -i 0 --query-gpu=index,timestamp,utilization.gpu,power.draw,temperature.gpu --format=csv -l 1 0, 2019/04/22 15:39:58.050, 90 %, [Not Supported], 56 0, 2019/04/22 15:39:59.050, 88 %, [Not Supported], 56 0, 2019/04/22 15:40:00.051, 90 %, [Not Supported], 56 0, 2019/04/22 15:40:01.051, 88 %, [Not Supported], 56 0, 2019/04/22 15:40:02.051, 88 %, [Not Supported], 57 0, 2019/04/22 15:40:03.052, 96 %, [Not Supported], 57 0, 2019/04/22 15:40:04.052, 100 %, [Not Supported], 57 0, 2019/04/22 15:40:05.053, 91 %, [Not Supported], 57 0, 2019/04/22 15:40:06.053, 89 %, [Not Supported], 58 0, 2019/04/22 15:40:07.053, 88 %, [Not Supported], 58
Am i doing something wrong or is this current performance benchmark ?
Code:
def display(img, out, thresh=0.5): pens = dict() for det in out: cid = int(det[0]) if cid < 0: continue score = det[1] if score < thresh: continue scales = [img.shape[1], img.shape[0]] * 2 xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) cv2.putText(img, class_names[cid],(xmin,ymin), font, 1, (200,0,0), 3, cv2.LINE_AA) cv2.imshow('frame', img)
cap = cv2.VideoCapture(“rtsp://admin:admin123@192.168.1.193:554/Streaming/Channels/101”) while(cap.isOpened()): t0 = time.time() ret, image = cap.read() #image = cv2.imread(frame) img_data = cv2.resize(image, (data_shape[2], data_shape[3])) img_data = img_data[:, :, (2, 1, 0)].astype(np.float32) #img_data -= np.array([123, 117, 104]) img_data = np.transpose(np.array(img_data), (2, 0, 1)) img_data = np.expand_dims(img_data, axis=0)
module.run(data=img_data) tvm_output = module.get_output(0) #print(tvm_output) display(image, tvm_output.asnumpy()[0], thresh=0.25) if cv2.waitKey(1) & 0xFF == ord('q'): break print(time.time()-t0)
cap.release() cv2.destroyAllWindows()