以Python執行Yolov3

在開始之前，請先將路徑設為「D:\darknetYolo\darknet」，方能讀取。

載入模組以及讀取組態檔及權重檔

import cv2

import numpy as np

net = cv2.dnn.readNetFromDarknet("cfg/yolov3.cfg","yolov3.weights") #讀取模型

設定輸出圖形格式

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] -1] for i in net.getUnconnectedOutLayers()] #輸出圖形規格

讀取分類標籤

classes = [line.strip() for line in open("data/coco.names")] #分類標籤

設定框選物件的顏色。

colors = [(255,0,0),(0,255,0),(0,0,255),(127,0,255),(0,125,255)] #框選顏色

讀取圖片，需要用到OpenCV

img = cv2.imread("C:\\Users\\user\\Desktop\\test\\person\\1.jpg")

取得圖片高、寬、顏色通道數

height , width , channels = img.shape

將圖片進行預處理以符合Yolov3圖片輸入規格

blob = cv2.dnn.blobFromImage(img , 1/255.0,(416,415),(0,0,0),True,crop=False) #圖形預處理以符合輸入圖片規格

cv2.dnn.blobFromImage的作用

均值減法（Mean subtraction）
縮放（scaling）
可選的頻道交換

blob = blobFromImage(image, scalefactor=None, size=None, mean=None, swapRB=None, crop=None,

ddepth=None):

img = 想要預處理的圖片，之後會將圖片放入深度神經網路來分類

scalefactor：在均值減法處理後，我們可以用這個參數對圖片進行縮放。這個參數默認爲1。

size：設置卷積神經網路訓練時輸入的圖片的大小。

mean：這是均值減法中的均值，可以是3通道RGB的均值也可以是每個通道的均值，若是每個通道的均值需要用圖片的每個通道減掉每個通道的均值。要確保通道順序是RGB，尤其是swapRB=True時。

swapRB：opencv設定的圖片是BGR通道順序，然而，均值減法中要求我們使用RGB順序。爲了解決這個衝突，將該參數設爲Ture，將R和B通道調換。該參數默認爲Ture

crop:圖片裁減，默認為False，當值為True時，先按比例縮放，然後從中心裁剪成size尺寸。

將圖片輸入Yolov3模型。

net.setInput(blob)

進行物體偵測。

outs = net.forward(output_layers) #偵測結果

儲存標籤索引、信心指數、矩形座標

class_ids = [] #存標籤索引

confidences = [] #存信心指數

boxes = [] #存矩形座標

逐一處理偵測到物體的矩形座標

for out in outs:

for detection in out:

tx, ty, tw, th, confidence = detection[0:5] #取得座標及信心資料。(tx,ty)為中心點座標比例，tw及th為矩形寬及高的比例，confidence是信心指數

scores = detection[5:] #取得標籤索引

class_id = np.argmax(scores) #取得標籤索引

if confidence >0.3: #信心指數大於0.3才視為偵測到物體

center_x = int(tx * width) #取得偵測物體的中心點座標

center_y = int(ty * height)#取得偵測物體的中心點座標

w = int(tw * width) #取得偵測物體的寬度

h = int(th * height)#取得偵測物體的高度

#計算矩形方框資訊

x = int(center_x -w /2) #取得偵測物體左上角的座標

y = int(center_y -h /2) #取得偵測物體左上角的座標

boxes.append([x,y,w,h]) #將偵測物體的座標、信心指數及標籤索引加入對應的串列中

confidences.append(float(confidence))

class_ids.append(class_id)

消除重疊框選。如此可提高偵測物體的正確率。

indexes = cv2.dnn.NMSBoxes(boxes , confidences,0.3,0.4) #消除重疊框選

設定在圖片中顯示文字的字形

font = cv2.FONT_HERSHEY_PLAIN

劃出框選矩形及物件標籤。

for j in range(len(boxes)):

if j in indexes:

x,y,w,h = boxes[j]

label = str(classes[class_ids[j]]) #由索引標籤取得物體名稱

color = colors[class_ids[i]%5] #設定顯示顏色

cv2.rectangle(img , (x,y),(x+w,y+h),color,1) #畫出矩形

cv2.putText(img , label,(x,y-5),font,1,color,2) 畫出物體名稱

顯示圖形。

cv2.imshow(f'1',img)

cv2.waitKey(0)

cv2.destroyAllWindows()

全部程式碼：

import cv2

import numpy as np

net = cv2.dnn.readNetFromDarknet("cfg/yolov3.cfg","yolov3.weights") #讀取模型

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] -1] for i in net.getUnconnectedOutLayers()] #輸出圖形規格

classes = [line.strip() for line in open("data/coco.names")] #分類標籤

colors = [(255,0,0),(0,255,0),(0,0,255),(127,0,255),(0,125,255)] #框選顏色

img = cv2.imread(f"C:\\Users\\user\\Desktop\\test\\person\\1.jpg")

height , width , channels = img.shape

blob = cv2.dnn.blobFromImage(img , 1/255.0,(416,415),(0,0,0),True,crop=False) #圖形預處理以符合輸入圖片規格

net.setInput(blob)

outs = net.forward(output_layers) #偵測結果

class_ids = [] #存標籤索引

confidences = [] #存信心指數

boxes = [] #存矩形座標

for out in outs:

for detection in out:

tx, ty, tw, th, confidence = detection[0:5] #取得座標及信心資料

scores = detection[5:]

class_id = np.argmax(scores)

if confidence >0.3: #信心指數大於0.3才算

center_x = int(tx * width)

center_y = int(ty * height)

w = int(tw * width)

h = int(th * height)

#計算矩形方框資訊

x = int(center_x -w /2)

y = int(center_y -h /2)

boxes.append([x,y,w,h])