mediapipe control volume

conda env list

conda create --name voc python=3.10

#執行與關閉虛擬環境

# To activate this environment, use

#

# $ conda activate vc

#

# To deactivate an active environment, use

#

# $ conda deactivate

執行 conda activate voc

安裝numpy,mediapipe, pycaw,PyAutoGUI

安裝(先更新pip)

python -m pip install --upgrade pip

pip install opencv-python

這時候會連numpy一起安裝了

pip install mediapipe

這個時候可能會移除原有的numpy，安裝適合的版本的

pip install pycaw

pip install PyAutoGUI

程式碼說明請參閱vol.py

執行程式 python vol.py

source code https://github.com/Aaru77/Volume-control-using-hand-gesture-using-python-and-openCv/blob/5c3523192faad1bbb4c90c29d73746e9093b3753/VOLUME%20CONTROL%20USING%20HAND%20GESTURE.ipynb

import cv2

import mediapipe as mp

#安裝open-cv跟mediapipe，並匯入所需要的函式庫

from math import hypot

#

#ctypes是可以用來呼叫外部函式庫，這裡呼叫cast跟POINTER

#要取得電腦的音量設定就需要這三個，只是要安裝pycaw

from ctypes import cast, POINTER

from comtypes import CLSCTX_ALL

from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

#從電腦設備中來獲取跟電腦音量設定的相關參數

import numpy as np

#安裝 pycaw設置電腦的因量

cap = cv2.VideoCapture(0) #從webcam取得視訊串流

mpHands = mp.solutions.hands #detects hand/finger

hands = mpHands.Hands() #complete the initialization configuration of hands

mpDraw = mp.solutions.drawing_utils

#使用mediapipe的solution來偵測與繪製線條

#To access speaker through the library pycaw

devices = AudioUtilities.GetSpeakers()

interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)

volume = cast(interface, POINTER(IAudioEndpointVolume))

#要取得音量設定就要有上面那三行

volbar=400

volper=0

#獲取音量範圍(最小值,最大值,增量) ,下面的程式碼同[0:2]，也就是取的List裡的最小值與最大值

#List教學參考 https://medium.com/ccclub/ccclub-python-for-beginners-tutorial-c15425c12009

volMin,volMax = volume.GetVolumeRange()[:2]

while True:

#如果有讀到串流就執行下面的程式碼

success,img = cap.read() #從影像中取得frame(這裡名稱叫做success)

imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #把影像轉換為RGB

#Collection of gesture information

results = hands.process(imgRGB) #做影像的處理，也就是開始偵測手部跟上面的21個點.

lmList = [] #建立一個empty list,landmark的list

if results.multi_hand_landmarks: #list of all hands detected.

#By accessing the list, we can get the information of each hand's corresponding flag bit

for handlandmark in results.multi_hand_landmarks: #從0到20,共21個點

for id,lm in enumerate(handlandmark.landmark): #每一個節點的id跟x,y值,還有節點的數字

#adding counter and returning it

h,w,_ = img.shape

# Get finger joint points

#取得圖片或frame的高度、寬度與圖片的channel(我們是RGB圖片，所以是3，如果channel是1就是灰階)

#h,w, c = img.shape

#landmark就是21個點的集合，所以handlandmark.landmark[8]指的是食指的頂點座標，有x.y.z三個值

#我們只需要x,y

cx,cy = int(lm.x*w),int(lm.y*h)

lmList.append([id,cx,cy]) #把每一個點[0-20]跟對應的x,y座標寫入List裡

mpDraw.draw_landmarks(img,handlandmark,mpHands.HAND_CONNECTIONS)

#把每一個點跟點的連線畫出來

if lmList != []:#如果有取得每一張照片的21個點

#getting the value at a point

#x #y

#lmList裡面每一筆資訊有id,x,y

#五根拇指的頂點(tips)4,8,12,16,20

x1,y1 = lmList[4][1],lmList[4][2] #thumb 大拇指([4])的x座標([1])，大拇指([4])的y座標([2])

x2,y2 = lmList[8][1],lmList[8][2] #index finger 食指([8])的x座標([1])，食指([8])的y座標([2])

#creating circle at the tips of thumb and index finger

cv2.circle(img,(x1,y1),13,(255,0,0),cv2.FILLED) #在大拇指頂點畫圓圈，其實是一個點#image #座標 #半徑 #rgb

cv2.circle(img,(x2,y2),13,(255,0,0),cv2.FILLED) #在食指頂點畫圓圈，其實是一個點#image #座標 #半徑 #rgb

cv2.line(img,(x1,y1),(x2,y2),(255,0,0),3) #create a line b/w tips of index finger and thumb

length = hypot(x2-x1,y2-y1) #distance b/w tips using hypotenuse 斜邊公式

# from numpy we find our length,by converting hand range in terms of volume range ie b/w -63.5 to 0

vol = np.interp(length,[30,350],[volMin,volMax])#這裡把量測出來大拇指與食指頂端兩個點的長度

#以及距離的極大極小值與音量做轉換

#以下是顯示因量的部分

volbar=np.interp(length,[30,350],[400,150])#150-400是顯示圖形的外框

volper=np.interp(length,[30,350],[0,100])#0-100是顯示的百分比

print(vol,int(length))

#設定音量為

volume.SetMasterVolumeLevel(vol, None)

# Hand range 30 - 350

# Volume range -65.25 - 0.0(每台電腦不一樣)

#creating volume bar for volume level

#畫出音量的圖形

cv2.rectangle(img,(50,150),(85,400),(0,0,255),4) # vid ,initial position ,ending position ,rgb ,thickness

#紅色的外框

cv2.rectangle(img,(50,int(volbar)),(85,400),(0,0,255),cv2.FILLED)

#填滿的紅色音量部分

cv2.putText(img,f"{int(volper)}%",(10,40),cv2.FONT_ITALIC,1,(0, 255, 98),3)

#顯示音量的文字內容

#tell the volume percentage ,location,font of text,length,rgb color,thickness

cv2.imshow('Image',img) #Show the video

if cv2.waitKey(1) & 0xff==ord(' '): #按空白建結束

break

cap.release() #stop cam

cv2.destroyAllWindows() #close window

剪刀石頭布就下載play.py

使用字典方式來調整音量的方式，可以參考，但是我覺得這樣好辛苦啊，而且每台電腦的聲音最大最小值不太一樣，我還是會建議使用我找到利用公式轉換的方式

https://pythonmana.com/2022/03/202203020555199514.html

from ctypes import cast, POINTER

from comtypes import CLSCTX_ALL

from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# 获取自己的音频设备及其参数

devices = AudioUtilities.GetSpeakers()

interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)

volume = cast(interface, POINTER(IAudioEndpointVolume))

# 获取音量范围，我的电脑经测试是(-65.25, 0.0, 0.03125)，第一个代表最小值，第二个代表最大值，第三个是增量。

vol_range = volume.GetVolumeRange()

print(vol_range)

# 获取当前的音量值

vol_now = volume.GetMasterVolumeLevel()

#由于vol_range与0-100这个不是对应的关系，不方便设置实际的声音，故需要进行装换，但是无法得知其转换关系，只能通过字典的形式查询：

def vol_tansfer(x):

dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,

9: -34.63, 10: -33.24,

11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,

20: -23.65,

21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,

30: -17.82,

31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,

40: -13.62,

41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,

50: -10.33,

51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,

60: -7.63,

61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,

71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,

81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,

90: -1.58,

91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}

return dict[x]

#设置声音大小

volume.SetMasterVolumeLevel(vol_tansfer(40), None)

# 判断是否静音，mute为1代表是静音，为0代表不是静音

mute = volume.GetMute()

print(mute)