mediapipe control volume
conda env list
conda create --name voc python=3.10
#執行與關閉虛擬環境
# To activate this environment, use
#
# $ conda activate vc
#
# To deactivate an active environment, use
#
# $ conda deactivate
執行 conda activate voc
安裝numpy,mediapipe, pycaw,PyAutoGUI
安裝(先更新pip)
python -m pip install --upgrade pip
pip install opencv-python
這時候會連numpy一起安裝了
pip install mediapipe
這個時候可能會移除原有的numpy,安裝適合的版本的
pip install pycaw
pip install PyAutoGUI
程式碼說明請參閱vol.py
執行程式 python vol.py
import cv2
import mediapipe as mp
#安裝open-cv跟mediapipe,並匯入所需要的函式庫
from math import hypot
#
#ctypes是可以用來呼叫外部函式庫,這裡呼叫cast跟POINTER
#要取得電腦的音量設定就需要這三個,只是要安裝pycaw
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
#從電腦設備中來獲取跟電腦音量設定的相關參數
import numpy as np
#安裝 pycaw設置電腦的因量
cap = cv2.VideoCapture(0) #從webcam取得視訊串流
mpHands = mp.solutions.hands #detects hand/finger
hands = mpHands.Hands() #complete the initialization configuration of hands
mpDraw = mp.solutions.drawing_utils
#使用mediapipe的solution來偵測與繪製線條
#To access speaker through the library pycaw
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
#要取得音量設定就要有上面那三行
volbar=400
volper=0
#獲取音量範圍(最小值,最大值,增量) ,下面的程式碼同[0:2],也就是取的List裡的最小值與最大值
#List教學參考 https://medium.com/ccclub/ccclub-python-for-beginners-tutorial-c15425c12009
volMin,volMax = volume.GetVolumeRange()[:2]
while True:
#如果有讀到串流就執行下面的程式碼
success,img = cap.read() #從影像中取得frame(這裡名稱叫做success)
imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #把影像轉換為RGB
#Collection of gesture information
results = hands.process(imgRGB) #做影像的處理,也就是開始偵測手部跟上面的21個點.
lmList = [] #建立一個empty list,landmark的list
if results.multi_hand_landmarks: #list of all hands detected.
#By accessing the list, we can get the information of each hand's corresponding flag bit
for handlandmark in results.multi_hand_landmarks: #從0到20,共21個點
for id,lm in enumerate(handlandmark.landmark): #每一個節點的id跟x,y值,還有節點的數字
#adding counter and returning it
h,w,_ = img.shape
# Get finger joint points
#取得圖片或frame的高度、寬度與圖片的channel(我們是RGB圖片,所以是3,如果channel是1就是灰階)
#h,w, c = img.shape
#landmark就是21個點的集合,所以handlandmark.landmark[8]指的是食指的頂點座標,有x.y.z三個值
#我們只需要x,y
cx,cy = int(lm.x*w),int(lm.y*h)
lmList.append([id,cx,cy]) #把每一個點[0-20]跟對應的x,y座標寫入List裡
mpDraw.draw_landmarks(img,handlandmark,mpHands.HAND_CONNECTIONS)
#把每一個點跟點的連線畫出來
if lmList != []:#如果有取得每一張照片的21個點
#getting the value at a point
#x #y
#lmList裡面每一筆資訊有id,x,y
#五根拇指的頂點(tips)4,8,12,16,20
x1,y1 = lmList[4][1],lmList[4][2] #thumb 大拇指([4])的x座標([1]),大拇指([4])的y座標([2])
x2,y2 = lmList[8][1],lmList[8][2] #index finger 食指([8])的x座標([1]),食指([8])的y座標([2])
#creating circle at the tips of thumb and index finger
cv2.circle(img,(x1,y1),13,(255,0,0),cv2.FILLED) #在大拇指頂點畫圓圈,其實是一個點#image #座標 #半徑 #rgb
cv2.circle(img,(x2,y2),13,(255,0,0),cv2.FILLED) #在食指頂點畫圓圈,其實是一個點#image #座標 #半徑 #rgb
cv2.line(img,(x1,y1),(x2,y2),(255,0,0),3) #create a line b/w tips of index finger and thumb
length = hypot(x2-x1,y2-y1) #distance b/w tips using hypotenuse 斜邊公式
# from numpy we find our length,by converting hand range in terms of volume range ie b/w -63.5 to 0
vol = np.interp(length,[30,350],[volMin,volMax])#這裡把量測出來大拇指與食指頂端兩個點的長度
#以及距離的極大極小值與音量做轉換
#以下是顯示因量的部分
volbar=np.interp(length,[30,350],[400,150])#150-400是顯示圖形的外框
volper=np.interp(length,[30,350],[0,100])#0-100是顯示的百分比
print(vol,int(length))
#設定音量為
volume.SetMasterVolumeLevel(vol, None)
# Hand range 30 - 350
# Volume range -65.25 - 0.0(每台電腦不一樣)
#creating volume bar for volume level
#畫出音量的圖形
cv2.rectangle(img,(50,150),(85,400),(0,0,255),4) # vid ,initial position ,ending position ,rgb ,thickness
#紅色的外框
cv2.rectangle(img,(50,int(volbar)),(85,400),(0,0,255),cv2.FILLED)
#填滿的紅色音量部分
cv2.putText(img,f"{int(volper)}%",(10,40),cv2.FONT_ITALIC,1,(0, 255, 98),3)
#顯示音量的文字內容
#tell the volume percentage ,location,font of text,length,rgb color,thickness
cv2.imshow('Image',img) #Show the video
if cv2.waitKey(1) & 0xff==ord(' '): #按空白建結束
break
cap.release() #stop cam
cv2.destroyAllWindows() #close window
剪刀石頭布就下載play.py
使用字典方式來調整音量的方式,可以參考,但是我覺得這樣好辛苦啊,而且每台電腦的聲音最大最小值不太一樣,我還是會建議使用我找到利用公式轉換的方式
https://pythonmana.com/2022/03/202203020555199514.html
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
# 获取自己的音频设备及其参数
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# 获取音量范围,我的电脑经测试是(-65.25, 0.0, 0.03125),第一个代表最小值,第二个代表最大值,第三个是增量。
vol_range = volume.GetVolumeRange()
print(vol_range)
# 获取当前的音量值
vol_now = volume.GetMasterVolumeLevel()
#由于vol_range与0-100这个不是对应的关系,不方便设置实际的声音,故需要进行装换,但是无法得知其转换关系,只能通过字典的形式查询:
def vol_tansfer(x):
dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
9: -34.63, 10: -33.24,
11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
20: -23.65,
21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
30: -17.82,
31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
40: -13.62,
41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
50: -10.33,
51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
60: -7.63,
61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
90: -1.58,
91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
return dict[x]
#设置声音大小
volume.SetMasterVolumeLevel(vol_tansfer(40), None)
# 判断是否静音,mute为1代表是静音,为0代表不是静音
mute = volume.GetMute()
print(mute)