YOLO OPENCV

YOLO - object detection

YOLO — You Only Look Once — is an extremely fast multi object detection algorithm which uses convolutional neural network (CNN) to detect and identify objects.

https://opencv-tutorial.readthedocs.io/en/latest/yolo/yolo.html

#使用Anaconda / Jupyter 操作

#需要先安裝 pip install opencv-python 及 pip install numpy

# YOLO object detection

import cv2 as cv

import numpy as np

import time

#可自行上傳照片並更換照片路徑,例如:與程式相同路徑的road.jpg

img = cv.imread('road.jpg')

cv.imshow('window', img)

cv.waitKey(1)


# Load names of classes and get random colors

classes = open('coco.names').read().strip().split('\n')

np.random.seed(42)

colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')


# Give the configuration and weight files for the model and load the network.

net = cv.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')

net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)

# net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)


# determine the output layer

ln = net.getLayerNames()

ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]


# construct a blob from the image

blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)

r = blob[0, 0, :, :]


cv.imshow('blob', r)

text = 'Blob shape={blob.shape}'

cv.displayOverlay('blob', text)

cv.waitKey(1)


net.setInput(blob)

t0 = time.time()

outputs = net.forward(ln)

t = time.time()

print('time=', t-t0)


print(len(outputs))

for out in outputs:

print(out.shape)


def trackbar2(x):

confidence = x/100

r = r0.copy()

for output in np.vstack(outputs):

if output[4] > confidence:

x, y, w, h = output[:4]

p0 = int((x-w/2)*416), int((y-h/2)*416)

p1 = int((x+w/2)*416), int((y+h/2)*416)

cv.rectangle(r, p0, p1, 1, 1)

cv.imshow('blob', r)

text = 'Bbox confidence={confidence}'

cv.displayOverlay('blob', text)


r0 = blob[0, 0, :, :]

r = r0.copy()

cv.imshow('blob', r)

cv.createTrackbar('confidence', 'blob', 50, 101, trackbar2)

trackbar2(50)


boxes = []

confidences = []

classIDs = []

h, w = img.shape[:2]


for output in outputs:

for detection in output:

scores = detection[5:]

classID = np.argmax(scores)

confidence = scores[classID]

if confidence > 0.5:

box = detection[:4] * np.array([w, h, w, h])

(centerX, centerY, width, height) = box.astype("int")

x = int(centerX - (width / 2))

y = int(centerY - (height / 2))

box = [x, y, int(width), int(height)]

boxes.append(box)

confidences.append(float(confidence))

classIDs.append(classID)


indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

if len(indices) > 0:

for i in indices.flatten():

(x, y) = (boxes[i][0], boxes[i][1])

(w, h) = (boxes[i][2], boxes[i][3])

color = [int(c) for c in colors[classIDs[i]]]

cv.rectangle(img, (x, y), (x + w, y + h), color, 2)

text = "{}: {:.4f}".format(classes[classIDs[i]], confidences[i])

cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)


cv.imshow('window', img)

cv.waitKey(0)

cv.destroyAllWindows()

#USB WebCam

import cv2


cap = cv2.VideoCapture(0)

if not cap.isOpened():

print("Cannot open camera")

exit()


while(True):

# Capture frame-by-frame

ret, frame = cap.read()


# if frame is read correctly ret is True

if not ret:

print("Can't receive frame (stream end?). Exiting ...")

break


# color to gray

gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)


# 顯示圖片

cv2.imshow('frame', frame)

# 按下 q 鍵離開迴圈

if cv2.waitKey(1) == ord('q'):

break


# 釋放該攝影機裝置

cap.release()

cv2.destroyAllWindows()