!pip install mediapipe
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import cv2
from google.colab.patches import cv2_imshow
import mediapipe as mp
import matplotlib.pyplot as plt
import matplotlib as mpl
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cap = cv2.VideoCapture('/content/drive/MyDrive/Project Docments/lip sync/1.mp4')
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh(max_num_faces=1)
X_data = []
Y_data = []
Z_data = []
while True:
success,img = cap.read()
#img =cv2.cvtColor(img,cv2.COLOR_GRB2RGB)
results = faceMesh.process(img)
if results.multi_face_landmarks:
for faceLms in results.multi_face_landmarks:
mpDraw.draw_landmarks(img, faceLms)
# tempx = []
# tempy = []
# tempz = []
# for lm in faceLms.landmark:
# tempx.append(lm.x)
# tempy.append(lm.y)
# tempz.append(lm.z)
# X_data.append(tempx)
# Y_data.append(tempy)
# Z_data.append(tempz)
# print(np.array(X_data).shape)
cv2_imshow(img)
#print(results.shape)
#print(img.shape)
cv2.waitKey(1)
X_data = []
Y_data = []
Z_data = []
aud = []
# X_data = np.zeros([0,468])
# Y_data = np.zeros([ length, 468])
# Z_data = np.zeros([ length, 468])
for files in vid_ids:
video = vid_dir +'/'+files+'.mp4'
audio = aud_dir + '/' + files+'.wav'
cap = cv2.VideoCapture(video) #reading the video
Fs,audio = wavfile.read(audio) #reading the audio
#finding the fps
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
if int(major_ver) < 3 :
fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
#print ("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps))
else :
fps = cap.get(cv2.CAP_PROP_FPS)
#print ("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
#length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#creating facepoints
for i in range(length):
success,img = cap.read()
results = faceMesh.process(img)
if results.multi_face_landmarks:
tempx = []
tempy = []
tempz = []
for faceLms in results.multi_face_landmarks:
#mpDraw.draw_landmarks(img, faceLms)
# tempx = []
# tempy = []
# tempz = []
for lm in faceLms.landmark:
tempx.append(lm.x)
tempy.append(lm.y)
tempz.append(lm.z)
X_data.append(tempx)
Y_data.append(tempy)
Z_data.append(tempz)
cap.release()
#creating audio
aud = np.array(np.concatenate((aud, audio.T[0])))
#print(np.array(audio.T[0]).shape)
video----->
'1.mp4'
'2.mp4'
'3.mp4'
etc...
audio ----->
'1.wav'
'2.wav'
'3.wav'
etc...
Here 1,2,3 are file names. you can name it anything you prefer.but make sure to have the exact same name to the corresponding audio file. All the files should be same fps and same duration
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
connection_lips = get_unique( mpFaceMesh.FACEMESH_LIPS )#FaceMesh(max_num_faces=1)
connection_face = get_unique( mpFaceMesh.FACEMESH_FACE_OVAL )
#print(connection_lips)
with mpFaceMesh.FaceMesh(static_image_mode = True, max_num_faces=1, refine_landmarks = True, min_detection_confidence = 0.5) as faceMesh:
success,img = cap.read()
results = faceMesh.process( cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
for face_landmark in results.multi_face_landmarks:
lms = face_landmark.landmark
d = {}
for index in connection_lips:
#print(index, lms[index].x)
x = int(lms[index].x * img.shape[1] )
y = int(lms[index].y * img.shape[0] )
d[index] = (x, y)
#draw
for index in connection_lips:
cv2.circle(img, ( d[index][0], d[index][1] ) , 2, (0,255,0), -1 )
cv2_imshow(img)
#for image face point
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
connection_lips = get_unique( mpFaceMesh.FACEMESH_LIPS )#FaceMesh(max_num_faces=1)
connection_face = get_unique( mpFaceMesh.FACEMESH_FACE_OVAL )
print(connection_lips)
#connection_face2 = [connection_face[0],connection_face[3],connection_face[6],connection_face[10],connection_face[12],connection_face[20],connection_face[24],connection_face[27], connection_face[29], connection_face[32], connection_face[32] ]
with mpFaceMesh.FaceMesh(static_image_mode = True, max_num_faces=1, refine_landmarks = True, min_detection_confidence = 0.5) as faceMesh:
for i in range(36):
success,img = cap.read()
results = faceMesh.process( cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
for face_landmark in results.multi_face_landmarks:
lms = face_landmark.landmark
d = {}
for index in connection_face:
#print(index, lms[index].x)
x = int(lms[index].x * img.shape[1] )
y = int(lms[index].y * img.shape[0] )
d[index] = (x, y)
cv2.circle(img, ( x, y ) , 2, (255,0,0), -1 )
#for index in connection_face[i]:
index = connection_face[i]
#print(index, lms[index].x)
x = int(lms[index].x * img.shape[1] )
y = int(lms[index].y * img.shape[0] )
d[index] = (x, y)
cv2.circle(img, ( x, y ) , 2, (0,255,0), -1 )
#draw
# for index in connection_face:
# cv2.circle(img, ( d[index][0], d[index][1] ) , 2, (0,255,0), -1 )
print(i)
cv2_imshow(img)
print(connection_face2)
#for image face point
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
connection_lips = get_unique( mpFaceMesh.FACEMESH_LIPS )#FaceMesh(max_num_faces=1)
connection_face = get_unique( mpFaceMesh.FACEMESH_FACE_OVAL )
print(connection_lips)
#connection_face2 = [connection_face[0],connection_face[3],connection_face[6],connection_face[10],connection_face[12],connection_face[20],connection_face[24],connection_face[27], connection_face[29], connection_face[32], connection_face[32] ]
facepoints = [connection_face[3],connection_face[10],connection_face[20],connection_face[24] ]
with mpFaceMesh.FaceMesh(static_image_mode = True, max_num_faces=1, refine_landmarks = True, min_detection_confidence = 0.5) as faceMesh:
success,img = cap.read()
results = faceMesh.process( cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
for face_landmark in results.multi_face_landmarks:
lms = face_landmark.landmark
x1,y1 = midpoint(lms[facepoints[0]] ,lms[facepoints[1]],lms[facepoints[2]] ,lms[facepoints[3]], img)
d = {}
for index in connection_lips:
#print(index, lms[index].x)
x = int(lms[index].x * img.shape[1] -x1)
y = int(lms[index].y * img.shape[0] -y1)
d[index] = (x, y)
cv2.circle(img, ( x, y ) , 2, (255,0,0), -1 )
cv2.circle(img, ( x1, y1 ) , 2, (0,255,0), -1 )
cv2_imshow(img)
print(connection_face2)
def midpoint(point1,point2,point3,point4, img):
x = int( (point1.x * img.shape[1] + point2.x * img.shape[1] + point3.x * img.shape[1] + point4.x * img.shape[1] )/4 )
y = int( (point1.y * img.shape[0] + point2.y * img.shape[0] + point3.y * img.shape[0] + point4.y * img.shape[0] )/4 )
return x,y
def angleTheta(x,y):
return math.atan2(y,x)
def Facetransform(x0,y0,theta):
x1 = int(x0*math.sin(theta) - y0* math.cos(theta) )
y1 = int(x0*math.cos(theta) + y0* math.sin(theta) )
return x1,y1
#for image face point
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
connection_lips = get_unique( mpFaceMesh.FACEMESH_LIPS )#FaceMesh(max_num_faces=1)
connection_face = get_unique( mpFaceMesh.FACEMESH_FACE_OVAL )
print(connection_lips)
#connection_face2 = [connection_face[0],connection_face[3],connection_face[6],connection_face[10],connection_face[12],connection_face[20],connection_face[24],connection_face[27], connection_face[29], connection_face[32], connection_face[32] ]
facepoints = [connection_face[3],connection_face[10],connection_face[20],connection_face[24] ]
with mpFaceMesh.FaceMesh(static_image_mode = True, max_num_faces=1, refine_landmarks = True, min_detection_confidence = 0.5) as faceMesh:
success,img = cap.read()
results = faceMesh.process( cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
for face_landmark in results.multi_face_landmarks:
lms = face_landmark.landmark
x1,y1 = midpoint(lms[facepoints[0]] ,lms[facepoints[1]],lms[facepoints[2]] ,lms[facepoints[3]], img)
theta = angleTheta( int(lms[facepoints[1]].x* img.shape[1]-x1) ,int(lms[facepoints[1]].y* img.shape[0]-y1) )
d = {}
for index in connection_lips:
#print(index, lms[index].x)
x = int(lms[index].x * img.shape[1] -x1)
y = int(lms[index].y * img.shape[0] -y1)
x,y = Facetransform(x,y,theta)
x = int(x+x1)
y = int(y+y1)
d[index] = (x, y)
cv2.circle(img, ( x, y ) , 2, (255,0,0), -1 )
print(math.tan(theta))
cv2.circle(img, ( x1, y1 ) , 2, (0,255,0), -1 )
cv2_imshow(img)
Here's the lip movement related to face. Here I have transformed the lips to the coordinate system I defined on face. So when it's plot again the lips appear as corresponding to a straight face rather than a tilted face here. But this is essentially the lips relatively to the face.
https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/
https://pytorch.org/docs/stable/generated/torch.nn.RNN.html
https://discuss.pytorch.org/t/one-to-many-lstm/96932/2
http://grail.cs.washington.edu/projects/AudioToObama/siggraph17_obama.pdf
https://google.github.io/mediapipe/solutions/face_mesh.html