音声ファイルに字幕を付ける

音声だけのファイルに字幕を付ける方法をメモる。基本的には、字幕の付いたビデオを保存すると同じだが、字幕を入れるには映像トラックが必要なので、それを準備するところが面倒なことになっているので、別の項目としてみた。まずは、そちらに目を通してから、こちらを見てください。

まずは、音声ファイルを読み込んで、AVMutableComposition オブジェクトを作り、読み込んだ音声を入れる。

asset = AVAsset.assetWithURL(url)

composition = AVMutableComposition.composition

composition.insertTimeRange(CMTimeRangeMake(KCMTimeZero,asset.duration),ofAsset:asset,atTime:KCMTimeZero,error:nil)

ここが、ちょっとめんどい。

videoTrack = composition.addMutableTrackWithMediaType(AVMediaTypeVideo,preferredTrackID:KCMPersistentTrackID_Invalid)

blankAsset = AVAsset.assetWithURL(NSURL.fileURLWithPath(pathToBlankMovFile)

blankVideoTrack = blankAsset.tracksWithMediaType(AVMediaTypeVideo)[0]

videoTrack.insertTimeRange(CMTimeRangeMake(KCMTimeZero,blankAsset.duration),ofTrack:blankVideoTrack,atTime:KCMTimeZero,error:nil)

videoTrack.scaleTimeRange(CMTimeRangeMake(KCMTimeZero,blankAsset.duration),toDuration:composition.duration)

何をしているかというと、一秒くらいの、ブランク映像(黒いだけ)の mov ファイルを用意しておいて、それを読み込んで処理している。まずは、空の videoTrack を作る。そしたら、ブランク映像ファイルを読み込んで、映像トラックを取り出す。取り出したら、空の videoTrack に追加して、それを引き延ばして、音声ファイル全体の長さと一致するようにしている。

次に、AVMutableVideoComposition の準備。ここでは、出力したい映像の大きさを指定して、フレームレートは 30 fps にしている。

videoSize = CGSizeMake(480,200)

videoComposition = AVMutableVideoComposition.videoComposition

videoComposition.frameDuration = CMTimeMake(1, 30)

videoComposition.renderSize = videoSize

同期させる AVPlayerItem を作って、synchLayer を用意する。

playerItem = AVPlayerItem.playerItemWithAsset(composition)

synchLayer = AVSynchronizedLayer.synchronizedLayerWithPlayerItem(playerItem)

synchLayer.setFrame(CGRectMake(0, 0, videoSize.width, videoSize.height))

synchLayer.setMasksToBounds(false)

synchLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

synchLayer.anchorPoint = CGPointMake(0,0)

synchLayer.position = CGPointMake(0,0)

そして、字幕の layer の準備。

overlayLayer = CALayer.layer

overlayLayer.frame = CGRectMake(0, 0, @maxWidth, @maxHeight)

overlayLayer.anchorPoint = CGPointMake(0.5,0.5)

overlayLayer.position = CGPointMake(videoSize.width/2,videoSize.height/2)

overlayLayer.contentsGravity = KCAGravityResizeAspect

anim = CAKeyframeAnimation.animationWithKeyPath("contents")

anim.beginTime = AVCoreAnimationBeginTimeAtZero

anim.duration = CMTimeGetSeconds(playerItem.duration)

anim.values = subTImages.map{|x| x.CGImage}

anim.keyTimes = subtitleTimes

anim.calculationMode = KCAAnimationDiscrete

anim.removedOnCompletion = false

anim.fillMode = KCAFillModeForwards

overlayLayer.addAnimation(anim,forKey:"contents")

synchLayer に直接 overlayLayer を入れてもいいのだが、字幕の背景色を決めたいときは、overlayLayer の範囲だけでなく、全体を同じ色にしたいので、その色を指定するためだけの layer を用意した。そして、この layer に overlayLayer を追加して、この layer を synchLayer に追加する。

aLayer = CALayer.layer

aLayer.frame = synchLayer.frame

aLayer.backgroundColor = NSColor.blackColor.CGColor

aLayer.addSublayer(overlayLayer)

aLayer.anchorPoint = CGPointMake(0,0)

aLayer.position = CGPointMake(0,0)

synchLayer.addSublayer(aLayer)

字幕を含む layer を videoLayer に合成する処理。この辺りも、字幕の付いたビデオを保存すると同じ。

parentLayer = CALayer.layer

videoLayer = CALayer.layer

parentLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

videoLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

parentLayer.addSublayer(videoLayer)

parentLayer.addSublayer(synchLayer)

parentLayer.anchorPoint = CGPointMake(0,0)

videoLayer.anchorPoint = CGPointMake(0,0)

parentLayer.position = CGPointMake(0,0)

videoLayer.position = CGPointMake(0,0)

videoComposition.animationTool = AVVideoCompositionCoreAnimationTool.videoCompositionCoreAnimationToolWithPostProcessingAsVideoLayer(videoLayer,inLayer:parentLayer)

ここから先も、特に変わったことはないので、説明は省略。

instruction = AVMutableVideoCompositionInstruction.videoCompositionInstruction

instruction.timeRange = CMTimeRangeMake(KCMTimeZero, composition.duration)

layerInstruction = AVMutableVideoCompositionLayerInstruction.videoCompositionLayerInstructionWithAssetTrack(videoTrack)

instruction.layerInstructions = [layerInstruction]

videoComposition.instructions = [instruction]

exportSession = AVAssetExportSession.alloc.initWithAsset(composition,presetName:AVAssetExportPresetAppleM4V480pSD)

exportSession.videoComposition = videoComposition

exportSession.outputURL = url

exportSession.outputFileType = AVFileTypeAppleM4V

exportSession.exportAsynchronouslyWithCompletionHandler(Proc.new{

case @exportSession.status

when AVAssetExportSessionStatusCompleted

when AVAssetExportSessionStatusFailed

end

})

こんな感じで、音声だけのファイルに字幕を付けて映像として保存できる(はず)。

composition = AVMutableComposition.composition

composition.insertTimeRange(CMTimeRangeMake(KCMTimeZero,asset.duration),ofAsset:asset,atTime:KCMTimeZero,error:nil)

blankAsset = AVAsset.assetWithURL(NSURL.fileURLWithPath(pathToBlankMovFile)

videoTrack = composition.addMutableTrackWithMediaType(AVMediaTypeVideo,preferredTrackID:KCMPersistentTrackID_Invalid)

blankVideoTrack = blankAsset.tracksWithMediaType(AVMediaTypeVideo)[0]

videoTrack.insertTimeRange(CMTimeRangeMake(KCMTimeZero,blankAsset.duration),ofTrack:blankVideoTrack,atTime:KCMTimeZero,error:nil)

videoTrack.scaleTimeRange(CMTimeRangeMake(KCMTimeZero,blankAsset.duration),toDuration:composition.duration)

playerItem = AVPlayerItem.playerItemWithAsset(composition)

videoSize = CGSizeMake(480,200)

videoComposition = AVMutableVideoComposition.videoComposition

videoComposition.frameDuration = CMTimeMake(1, 30)

videoComposition.renderSize = videoSize

synchLayer = AVSynchronizedLayer.synchronizedLayerWithPlayerItem(playerItem)

synchLayer.setFrame(CGRectMake(0, 0, videoSize.width, videoSize.height))

synchLayer.setMasksToBounds(false)

synchLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

synchLayer.anchorPoint = CGPointMake(0,0)

synchLayer.position = CGPointMake(0,0)

overlayLayer = CALayer.layer

overlayLayer.frame = CGRectMake(0, 0, @maxWidth, @maxHeight)

overlayLayer.anchorPoint = CGPointMake(0.5,0.5)

overlayLayer.position = CGPointMake(videoSize.width/2,videoSize.height/2)

overlayLayer.contentsGravity = KCAGravityResizeAspect

overlayLayer.cornerRadius = 10.0

anim = CAKeyframeAnimation.animationWithKeyPath("contents")

anim.beginTime = AVCoreAnimationBeginTimeAtZero

anim.duration = CMTimeGetSeconds(playerItem.duration)

anim.values = subTImages.map{|x| x.CGImage}

anim.keyTimes = subtitleTimes

anim.calculationMode = KCAAnimationDiscrete

anim.removedOnCompletion = false

anim.fillMode = KCAFillModeForwards

overlayLayer.addAnimation(anim,forKey:"contents")

aLayer = CALayer.layer

aLayer.frame = synchLayer.frame

aLayer.backgroundColor = NSColor.blackColor.CGColor

aLayer.addSublayer(overlayLayer)

aLayer.anchorPoint = CGPointMake(0,0)

aLayer.position = CGPointMake(0,0)

synchLayer.addSublayer(aLayer)

parentLayer = CALayer.layer

videoLayer = CALayer.layer

parentLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

videoLayer.bounds = CGRectMake(0, 0, videoSize.width, videoSize.height)

parentLayer.addSublayer(videoLayer)

parentLayer.addSublayer(synchLayer)

parentLayer.anchorPoint = CGPointMake(0,0)

videoLayer.anchorPoint = CGPointMake(0,0)

parentLayer.position = CGPointMake(0,0)

videoLayer.position = CGPointMake(0,0)

videoComposition.animationTool = AVVideoCompositionCoreAnimationTool.videoCompositionCoreAnimationToolWithPostProcessingAsVideoLayer(videoLayer,inLayer:parentLayer)

instruction = AVMutableVideoCompositionInstruction.videoCompositionInstruction

instruction.timeRange = CMTimeRangeMake(KCMTimeZero, composition.duration)

layerInstruction = AVMutableVideoCompositionLayerInstruction.videoCompositionLayerInstructionWithAssetTrack(videoTrack)

instruction.layerInstructions = [layerInstruction]

videoComposition.instructions = [instruction]

exportSession = AVAssetExportSession.alloc.initWithAsset(composition,presetName:AVAssetExportPresetAppleM4V480pSD)

exportSession.videoComposition = videoComposition

exportSession.outputURL = @panel.URL

exportSession.outputFileType = AVFileTypeAppleM4V

exportSession.exportAsynchronouslyWithCompletionHandler(Proc.new{

case @exportSession.status

when AVAssetExportSessionStatusCompleted

when AVAssetExportSessionStatusFailed

end

})