Instead of using text (optionally using HTML for styling, multi lines, etc.), it is also possible to use JSON objects as cue values that can be manipulated from JavaScript. JSON means "JavaScript Object Notation". It's an open standard for describing JavaScript objects as plain text.
Here is an example cue from a WebVTT file encoded as JSON instead of plain text. JSON is useful for describing "structured data"', and processing such data from JavaScript is easier than parsing plain text.
WEBVTT
Wikipedia
00:01:15.200 --> 00:02:18.800
{
"title": "State of Wikipedia",
"description": "Jimmy Wales talking ...",
"src": "https://upload.wikimedia.org/...../120px-Wikipedia-logo-v2.svg.png",
"href": "https://en.wikipedia.org/wiki/Wikipedia"
}
This JSON object (in bold green) is a JavaScript object encoded as a text string. If we listen for cue events or if we read a WebVTT file as done in previous examples, we can extract this text content using the cue.text property. For example:
var videoElement = document.querySelector("#myvideo");
var textTracks = videoElement.textTracks; // one for each track element
var textTrack = textTracks[0]; // corresponds to the first track element
var cues = textTrack.cues;
var cue = cues[0]; // first cue
// cue.text is in JSON format, with JSON.parse we turn it back
// to a real JavaScript object
var obj = JSON.parse(cue.text);
var title = obj.title; // "State of Wikipedia"
var description = obj.description; // Jimmy Wales talking...
etc...
This is a powerful way of embedding metadata, especially when used in conjunction with listening for cue and track events.
Earlier we saw an example that could display chapter markers as clickable text on the right of a video.
This example used only standard plain text content for the cues:
WEBVTT
chapter-1
00:00:00.000 --> 00:00:26.000
Introduction
chapter-2
00:00:28.206 --> 00:01:02.000
Watch out!
...
We used this example to manually capture the images from the video that correspond to each of the seven chapters:
We clicked on each chapter link on the right, then paused the video,
then we used a screen capture tool to grab each image that corresponds to the beginning of chapter,
Finally, we resized the images with Photoshop to approximately 200x400 pixels.
(For advanced users: it's possible to semi-automatize this process using the ffmepg command line tool, see for example this and that).
Here are the images which correspond to the seven chapters of the video from the previous example:
To associate these images with its chapter description, we will use JSON objects as cue contents:
elephants-dream-chapters-en-JSON.vtt:
WEBVTT
chapter-1
00:00:00.000 --> 00:00:26.000
{
"description": "Introduction",
"image": "introduction.jpg"
}
chapter-2
00:00:28.206 --> 00:01:02.000
{
"description": "Watch out!",
"image": "watchOut.jpg"
}
...
Before explaining the code, let's look at the codepen that uses this new .vtt file:
HTML code:
<html lang="en">
<head>
<meta charset=utf-8>
<title>Video player with chapter menu</title>
</head>
<body>
<section id="all">
<h1>Using JSON to describe chapter markers</h1>
This example uses a WebVTT file with kind=chapters, that contains JSON cues for associating images and chapter marker descriptions.
<p>
<video id="myVideo" preload="metadata" controls crossOrigin="anonymous">
<source src="https://mainline.i3s.unice.fr/mooc/elephants-dream-medium.mp4" type="video/mp4">
<source src="https://mainline.i3s.unice.fr/mooc/elephants-dream-medium.webm" type="video/webm">
<track label="English subtitles" kind="subtitles" srclang="en" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-subtitles-en.vtt" >
<track label="Deutsch subtitles" kind="subtitles" srclang="de" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-subtitles-de.vtt" default>
<track label="English chapters" kind="chapters" srclang="en" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-chapters-en-JSON.vtt">
</video>
<h2>Chapter menu</h2>
<div id="chapterMenu"></div>
</section>
</body>
</html>
It's the same code we had in the first example, except that this time we use a new WebVTT file that uses JSON cues to describe each chapter. For the sake of simplicity, we also removed the buttons and all the code for displaying a clickable transcript of the subtitles/captions on the right of the video.
CSS code:
#all {
background-color: lightgrey;
border-radius:10px;
padding: 20px;
border:1px solid;
display:inline-block;
/*height:500px;*/
margin:30px;
width:90%;
}
#myVideo {
border-radius:10px;
border:1px solid;
display: block;
margin-right: 2.85714%;
width: 100%;
background-color: black;
position: relative;
box-shadow: 5px 5px 5px grey;
}
#chapterMenuSection {
background-color: lightgrey;
border-radius:10px;
padding: 20px;
border:1px solid;
display:inline-block;
margin:0px 30px 30px 30px;
width:90%;
}
figure.img {
margin: 2px;
float: left;
}
figcaption.desc {
text-align: center;
font-weight: normal;
margin: 2px;
}
.thumb {
height: 75px;
border: 1px solid #000;
margin: 10px 5px 0 0;
box-shadow: 5px 5px 5px grey;
transition: all 0.5s;
}
.thumb:hover {
box-shadow: 5px 5px 5px black;
}
JS code:
let video, chapterMenuDiv;
let tracks, trackElems, tracksURLs = [];
window.onload = () => {
console.log("init");
// when the page is loaded
video = document.querySelector("#myVideo");
chapterMenuDiv = document.querySelector("#chapterMenu");
// The tracks as HTML elements
trackElems = document.querySelectorAll("track");
for(let i = 0; i < trackElems.length; i++) {
let currentTrackElem = trackElems[i];
tracksURLs[i] = currentTrackElem.src;
}
// The tracks as JS objects
tracks = video.textTracks;
buildChapterMenu('en', 'chapters');
};
function buildChapterMenu(lang, kind) {
// Locate the track with language = lang and kind="chapters"
for(let i = 0; i < tracks.length; i++) {
// current track
let track = tracks[i];
let trackAsHtmlElem = trackElems[i];
if((track.language === lang) && (track.kind === kind)) {
// the track must be active if we want to highlight the
// current chapter while the video is playing
track.mode="showing";
if(trackAsHtmlElem.readyState === 2) {
// the track has already been loaded
displayChapterMarkers(track);
} else {
displayChapterMarkersAfterTrackLoaded(trackAsHtmlElem, track);
}
}
}
}
function displayChapterMarkers(track) {
let cues = track.cues;
// We should not see the cues on the video.
track.mode="hidden";
// Iterate on cues
for(let i=0, len = cues.length; i < len; i++) {
let cue = cues[i];
//addCueListeners(cue);
let cueObject = JSON.parse(cue.text);
let description = cueObject.description;
let imageFileName = cueObject.image;
let imageURL = "https://mainline.i3s.unice.fr/mooc/" + imageFileName;
// add an image to the menu
let figure = document.createElement('figure');
figure.classList.add("img");
figure.innerHTML = "<img onclick='jumpTo(" + cue.startTime + ");' class='thumb' src='" + imageURL + "'><figcaption class='desc'>" + description + "</figcaption></figure>";
chapterMenuDiv.insertBefore(figure, null);
}
}
function displayChapterMarkersAfterTrackLoaded(trackElem, track) {
// Create a listener that will be called only when the track has
// been loaded
trackElem.addEventListener('load', (e) => {
console.log("chapter track loaded");
displayChapterMarkers(track);
});
}
function jumpTo(time) {
video.currentTime = time;
video.play();
}
Lines 4-18: when the page is loaded, we assemble all of the track HTML elements and their corresponding TextTrack objects.
Line 19: using that we can build the chapter navigation menu. All is done in the window.onload callback, so nothing happens until the DOM is ready.
Lines 24-43: the buildChapterMenu function first locates the chapter track for the given language, then checks if this track has been loaded by the browser. Once it has been confirmed that the track is loaded, the function displayChapters is called.
Lines 45-65: the displayChapters(track) function will iterate over all of the cues within the chapter track passed as its parameter. For each cue, the JSON content is re-formatted back into a JavaScript object (line 55) and the image filename and description of the chapter/cue are extracted (lines 56-57). Then an HTML description for the chapter is built and added to the div element with id=chapterMenu. Here is the HTML code for one menu marker:
<figure class="img">
<img onclick="jumpTo(0);" class="thumb" src="https://...../introduction.jpg">
<figcaption class="desc">
Introduction
</figcaption>
</figure>
Notice that we add a click listener to each thumbnail image. Clicking a chapter thumbnail will cause the video to jump to the chapter time location (the example above is for the first chapter with start time = 0).
We also added CSS classes "img", "thumb" and "desc", which make it easy to style and position the thumbnails using CSS.
CSS source code extract:
#chapterMenuSection {
background-color: lightgrey;
border-radius:10px;
padding: 20px;
border:1px solid;
display:inline-block;
margin:0px 30px 30px 30px;
width:90%;
}
figure.img {
margin: 2px;
float: left;
}
figcaption.desc {
text-align: center;
font-weight: normal;
margin: 2px;
}
.thumb {
height: 75px;
border: 1px solid #000;
margin: 10px 5px 0 0;
box-shadow: 5px 5px 5px grey;
transition: all 0.5s;
}
.thumb:hover {
box-shadow: 5px 5px 5px black;
}
This example is the same as the previous one except that we have kept the features that we saw previously: the buttons for displaying a clickable transcript. The code is longer, but it's just a combination of the "clickable transcript" example from the previous lesson, and the code from earlier in this lesson.
HTML code:
<html lang="en">
<head>
<meta charset=utf-8>
<title>Video player with clickable transcript</title>
</head>
<body>
<section id="all">
<h1>Using the track API to extract the content of webVTT files in <code><track></code> elements</h1>
<p>Click on the buttons under the video to extract the english or german subtitles, or to display the chapter markers (english).
</p>
<p>Look at the HTML and JS code.</p>
<p>
<button disabled id="buttonEnglish" onclick="loadTranscript('en', 'subtitles');">Display English transcript</button>
<button disabled id="buttonDeutsch" onclick="loadTranscript('de', 'subtitles');">Display Deutsch transcript</button>
<button disabled id="buttonEnglishChapters" onclick="buildChapterMenu('en', 'chapters');">Display English chapter markers</button>
</p>
<video id="myVideo" preload="metadata" controls crossOrigin="anonymous">
<source src="https://mainline.i3s.unice.fr/mooc/elephants-dream-medium.mp4" type="video/mp4">
<source src="https://mainline.i3s.unice.fr/mooc/elephants-dream-medium.webm" type="video/webm">
<track label="English subtitles" kind="subtitles" srclang="en" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-subtitles-en.vtt" >
<track label="Deutsch subtitles" kind="subtitles" srclang="de" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-subtitles-de.vtt" default>
<track label="English chapters" kind="chapters" srclang="en" src="https://mainline.i3s.unice.fr/mooc/elephants-dream-chapters-en-JSON.vtt">
</video>
<div id="transcript"></div>
</section>
<section id="ChapterMenuSection">
<h2>Chapter menu</h2>
<div id="chapterMenu"></div>
</section>
</body>
</html>
CSS code:
#all {
background-color: lightgrey;
border-radius:10px;
padding: 20px;
border:1px solid;
display:inline-block;
/*height:500px;*/
margin:30px;
width:90%;
}
#chapterMenuSection {
background-color: lightgrey;
border-radius:10px;
padding: 20px;
border:1px solid;
display:inline-block;
/*height:500px;*/
margin:0px 30px 30px 30px;
width:90%;
}
.cues {
color:blue;
}
.cues:hover {
text-decoration: underline;
}
.cues.current {
color:black;
font-weight: bold;
}
#myVideo {
display: block;
float : left;
margin-right: 2.85714%;
width: 65.71429%;
background-color: black;
position: relative;
}
#transcript {
padding: 10px;
border:1px solid;
float: left;
max-height: 225px;
overflow: auto;
width: 25%;
margin: 0;
font-size: 14px;
list-style: none;
}
figure.img {
margin: 2px;
float: left;
}
figcaption.desc {
text-align: center;
font-weight: normal;
margin: 2px;
}
.thumb {
height: 75px;
border: 1px solid #000;
margin: 10px 5px 0 0;
box-shadow: 5px 5px 5px grey;
transition: all 0.5s;
}
.thumb:hover {
box-shadow: 5px 5px 5px black;
}
JS code
let video, transcriptDiv, chapterMenuDiv;
let tracks, trackElems, tracksURLs = [];
let buttonEnglish, buttonDeutsch, buttonEnglishChapters;
window.onload = () => {
console.log("init");
// when the page is loaded
video = document.querySelector("#myVideo");
transcriptDiv = document.querySelector("#transcript");
chapterMenuDiv = document.querySelector("#chapterMenu");
// The tracks as HTML elements
trackElems = document.querySelectorAll("track");
for(let i = 0; i < trackElems.length; i++) {
let currentTrackElem = trackElems[i];
tracksURLs[i] = currentTrackElem.src;
}
buttonEnglish = document.querySelector("#buttonEnglish");
buttonDeutsch = document.querySelector("#buttonDeutsch");
buttonEnglishChapters = document.querySelector("#buttonEnglishChapters");
// we enable the buttons and show transcript
buttonEnglish.disabled = false;
buttonDeutsch.disabled = false;
buttonEnglishChapters.disabled = false;
// The tracks as JS objects
tracks = video.textTracks;
};
function buildChapterMenu(lang, kind) {
// Locate the track with language = lang and kind="chapters"
for(let i = 0; i < tracks.length; i++) {
// current track
let track = tracks[i];
let trackAsHtmlElem = trackElems[i];
if((track.language === lang) && (track.kind === kind)) {
// the track must be active if we want to highlight the
// current chapter while the video is playing
track.mode="showing";
if(trackAsHtmlElem.readyState === 2) {
// the track has already been loaded
displayChaptersMarkers(track);
} else {
displayChapterMarkersAfterTrackLoaded(trackAsHtmlElem, track);
}
}
}
}
function displayChaptersMarkers(track) {
let cues = track.cues;
// Iterate on cues
for(let i=0, len = cues.length; i < len; i++) {
let cue = cues[i];
//addCueListeners(cue);
let cueObject = JSON.parse(cue.text);
let description = cueObject.description;
let imageFileName = cueObject.image;
let imageURL = "https://mainline.i3s.unice.fr/mooc/" + imageFileName;
// add an image to the menu
let figure = document.createElement('figure');
figure.classList.add("img");
figure.innerHTML = "<img onclick='jumpTo(" + cue.startTime + ");' class='thumb' src='" + imageURL + "'><figcaption class='desc'>" + description + "</figcaption></figure>";
chapterMenuDiv.insertBefore(figure, null);
}
}
function displayChapterMarkersAfterTrackLoaded(trackElem, track) {
// Create a listener that will be called only when the track has
// been loaded
trackElem.addEventListener('load', function(e) {
console.log("chapter track loaded");
displayChaptersMarkers(track);
});
}
function loadTranscript(lang, kind) {
// clear current transcript
clearTranscriptDiv();
// set all track mode to disabled. We will only activate the
// one whose content will be displayed as transcript
if(kind !== 'chapters')
disableAllTracks(); // if displaying chapters, do not
// disable all tracks
// Locate the track with language = lang
for(let i = 0; i < tracks.length; i++) {
// current track
let track = tracks[i];
let trackAsHtmlElem = trackElems[i];
if((track.language === lang) && (track.kind === kind)) {
track.mode="showing";
if(trackAsHtmlElem.readyState === 2) {
// the track has already been loaded
displayCues(track);
} else {
displayCuesAfterTrackLoaded(trackAsHtmlElem, track);
}
/* track.addEventListener("cuechange", function(e) {
var cue = this.activeCues[0];
console.log("cue change");
var transcriptText = document.getElementById(cue.id);
transcriptText.classList.add("current");
});
*/
}
}
}
function displayCuesAfterTrackLoaded(trackElem, track) {
// Create a listener that will be called only when the track has
// been loaded
trackElem.addEventListener('load', function(e) {
console.log("track loaded");
displayCues(track);
});
}
function disableAllTracks() {
for(let i = 0; i < tracks.length; i++)
tracks[i].mode = "disabled";
}
function displayCues(track) {
let cues = track.cues;
//append all the subtitle texts to
for(let i=0, len = cues.length; i < len; i++) {
let cue = cues[i];
addCueListeners(cue);
let voices = getVoices(cue.text);
let transText="";
if (voices.length > 0) {
for (let j = 0; j < voices.length; j++) { // how many voices ?
transText += voices[j].voice + ': ' + removeHTML(voices[j].text);
}
} else
transText = cue.text; // not a voice text
let clickableTransText = "<li class='cues' id=" + cue.id + " onclick='jumpTo(" + cue.startTime + ");'" + ">" + transText + "</li>";
addToTranscriptDiv(clickableTransText);
}
}
function getVoices(speech) { // takes a text content and check if there are voices
let voices = []; // inside
let pos = speech.indexOf('<v'); // voices are like <v michel> ....
while (pos != -1) {
endVoice = speech.indexOf('>');
let voice = speech.substring(pos + 2, endVoice).trim();
let endSpeech = speech.indexOf('</v>');
let text = speech.substring(endVoice + 1, endSpeech);
voices.push({
'voice': voice,
'text': text
});
speech = speech.substring(endSpeech + 4);
pos = speech.indexOf('<v');
}
return voices;
}
function removeHTML(text) {
let div = document.createElement('div');
div.innerHTML = text;
return div.textContent || div.innerText || '';
}
function jumpTo(time) {
video.currentTime = time;
video.play();
}
function clearTranscriptDiv() {
transcriptDiv.innerHTML = "";
}
function addToTranscriptDiv(htmlText) {
transcriptDiv.innerHTML += htmlText;
}
function addCueListeners(cue) {
cue.onenter = (e) => {
console.log('enter id=' + e.target.id);
let transcriptText = document.getElementById(e.target.id);
transcriptText.classList.add("current");
};
cue.onexit = (e) => {
console.log('exit id=' + e.target.id);
let transcriptText = document.getElementById(e.target.id); transcriptText.classList.remove("current");
};
}