conda create --name voc python=3.10


# To activate this environment, use


#     $ conda activate vc


# To deactivate an active environment, use


#     $ conda deactivate

執行 conda activate voc

安裝numpy,mediapipe, pycaw,PyAutoGUI 


python -m pip install --upgrade pip

pip install opencv-python


pip install  mediapipe


pip install  pycaw

pip install  PyAutoGUI


 執行程式 python

source code 

import cv2

import mediapipe as mp


from math import hypot




from ctypes import cast, POINTER

from comtypes import CLSCTX_ALL

from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume


import numpy as np

#安裝 pycaw設置電腦的因量


cap = cv2.VideoCapture(0) #從webcam取得視訊串流


mpHands = #detects hand/finger

hands = mpHands.Hands()   #complete the initialization configuration of hands

mpDraw =



#To access speaker through the library pycaw 

devices = AudioUtilities.GetSpeakers()

interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)

volume = cast(interface, POINTER(IAudioEndpointVolume))




#獲取音量範圍(最小值,最大值,增量) ,下面的程式碼同[0:2],也就是取的List裡的最小值與最大值


volMin,volMax = volume.GetVolumeRange()[:2]


while True:


    success,img = #從影像中取得frame(這裡名稱叫做success)

    imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #把影像轉換為RGB


    #Collection of gesture information

    results = hands.process(imgRGB) #做影像的處理,也就是開始偵測手部跟上面的21個點.


    lmList = [] #建立一個empty list,landmark的list


    if results.multi_hand_landmarks: #list of all hands detected.

        #By accessing the list, we can get the information of each hand's corresponding flag bit

        for handlandmark in results.multi_hand_landmarks: #從0到20,共21個點


            for id,lm in enumerate(handlandmark.landmark): #每一個節點的id跟x,y值,還有節點的數字

                #adding counter and returning it

                h,w,_ = img.shape

                # Get finger joint points


                #h,w, c = img.shape



                cx,cy = int(lm.x*w),int(lm.y*h)                

                lmList.append([id,cx,cy]) #把每一個點[0-20]跟對應的x,y座標寫入List裡





    if lmList != []:#如果有取得每一張照片的21個點

        #getting the value at a point

                        #x      #y



        x1,y1 = lmList[4][1],lmList[4][2]  #thumb 大拇指([4])的x座標([1]),大拇指([4])的y座標([2])

        x2,y2 = lmList[8][1],lmList[8][2]  #index finger 食指([8])的x座標([1]),食指([8])的y座標([2])

        #creating circle at the tips of thumb and index finger,(x1,y1),13,(255,0,0),cv2.FILLED) #在大拇指頂點畫圓圈,其實是一個點#image #座標 #半徑 #rgb,(x2,y2),13,(255,0,0),cv2.FILLED) #在食指頂點畫圓圈,其實是一個點#image #座標 #半徑 #rgb

        cv2.line(img,(x1,y1),(x2,y2),(255,0,0),3)  #create a line b/w tips of index finger and thumb


        length = hypot(x2-x1,y2-y1) #distance b/w tips using hypotenuse 斜邊公式

 # from numpy we find our length,by converting hand range in terms of volume range ie b/w -63.5 to 0

        vol = np.interp(length,[30,350],[volMin,volMax])#這裡把量測出來大拇指與食指頂端兩個點的長度









        volume.SetMasterVolumeLevel(vol, None)


        # Hand range 30 - 350

        # Volume range -65.25 - 0.0(每台電腦不一樣)

        #creating volume bar for volume level 


        cv2.rectangle(img,(50,150),(85,400),(0,0,255),4) # vid ,initial position ,ending position ,rgb ,thickness




        cv2.putText(img,f"{int(volper)}%",(10,40),cv2.FONT_ITALIC,1,(0, 255, 98),3)


        #tell the volume percentage ,location,font of text,length,rgb color,thickness


    cv2.imshow('Image',img) #Show the video 

    if cv2.waitKey(1) & 0xff==ord(' '): #按空白建結束



cap.release()     #stop cam       

cv2.destroyAllWindows() #close window



def vol_tansfer(x):

dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,

9: -34.63, 10: -33.24,

11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,

20: -23.65,

21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,

30: -17.82,

31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,

40: -13.62,

41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,

50: -10.33,

51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,

60: -7.63,

61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,

71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,

81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,

90: -1.58,

91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}

return dict[x]


volume.SetMasterVolumeLevel(vol_tansfer(40), None)

