Install latex
First install the library MiKTEX.
Then install the compiler texmaker.
Install Python and Atom editor
First install python 2.7.x. In the system environmental variable, add the python path C:\Python27 and the other path C:\Python27\Scripts (pip .etc) to "Path"
Then install atom editor
Then configure Atom editor following here.
All I need to know about GitHub TBC...
First install git.
Get an GitHub account Qingju.
How to convert an existing non-empty directory into a Git working directory and push files to a remote repository
Online, generate a repository called ABC
cd to the existing local directory, right click---Git Bash here
git init
git add .
git commit -m "blablabla"
git pull https://github.com/Qingju/ABC.git
git push https://github.com/Qingju/ABC.git
GitHub account name and password are needed to push files.
More details are available from the Git Cheat Sheet 1 and 2.
First time to push after pulling from a git directory Fatal: No configured push destination
##### Error
#fatal: No configured push destination.
#Either specify the URL from the command-line or configure a remote repository using
# git remote add <name> <url>
# and then push using the remote name
# git push <name>
git remote add origin https://github.com/Qingju/ABC.git
git push --set-upstream origin master
A quick pipeline builder with torch
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn
import torch.nn.functional as F
import os
import re
import pickle
import numpy as np
import random
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
import librosa
import librosa.display
import io
import PIL.Image
from torchvision.transforms import ToTensor
torch.manual_seed(12345)
np.random.seed(54321)
random.seed(666)
def load_all_features(data_path, type=".pickle"):
feature_files = []
data_path = re.sub("\\\\", "/", data_path)
# print(data_path)
for root, subFolder, files in os.walk(data_path):
# print(files)
for item in files:
if item.endswith(type):
fileNamePath = str(os.path.join(root, item))
fileNamePath = re.sub("\\\\", "/", fileNamePath)
fileNamePath = fileNamePath[len(data_path):]
feature_files.append(fileNamePath)
# random.shuffle(feature_files)
# split the data to train and test
feat_train, feat_test = train_test_split(feature_files, test_size=0.15)
return feat_train, feat_test
class BeatDataset(Dataset):
def __init__(self, audio_files, data_root_dir):
self.audio_files = audio_files
self.data_root_dir = data_root_dir
def __len__(self):
return len(self.audio_files)
def __getitem__(self, idx):
audio_file = self.audio_files[idx]
# load the pickle file
with open(self.data_root_dir+audio_file, 'rb') as fid:
data_dict = pickle.load(fid)
# randomly choose 10-second long audio from the
total_frame = data_dict["frame_num"]
choose_frames = min(total_frame, 431) #10.0*22050/512
start_frame = random.randint(0, total_frame-choose_frames)
melspec = data_dict["melspec"]
melspec = melspec[:, start_frame:start_frame+choose_frames]
oenv = data_dict["oenv"]
oenv = oenv[start_frame:start_frame+choose_frames]
beats = data_dict["timestamps"]
beats = beats[start_frame:start_frame+choose_frames]
sample = {"audio_file": audio_file,
"melspec": melspec.T, # T x D
"oenv": oenv[..., np.newaxis], # T x 1
"beats": beats # T
}
return sample
class conv_block(nn.Module):
def __init__(self, in_c, out_c):
super().__init__()
self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d((2, 2), ceil_mode=True)
def forward(self, inputs):
x = self.conv1(inputs)
x = self.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool(x)
return x
class AllBeatTracker(nn.Module):
def __init__(self):
super(AllBeatTracker, self).__init__()
self.mel_conv = conv_block(1, 16)
self.mel_conv2 = conv_block(16, 16)
self.mel_rnn = nn.GRU(8, 30, batch_first=True, bidirectional=True)
self.oenv_rnn = nn.GRU(1, 20, batch_first=True, bidirectional=True)
self.all_rnn = nn.GRU(100, 50)
self.fcn = nn.Linear(50, 1)
# self.embeds = nn.Embedding(41, 768)
# self.conv1 = nn.Conv2d(768*2, 256, kernel_size=(1,2), stride=(1,2))
# self.relu = nn.ReLU()
# self.conv2 = nn.Conv2d(256, 2, kernel_size=(1, 1), stride=(1,1))
# self.pool = nn.AdaptiveAvgPool2d((None,1))
def forward(self, input):
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
melspec = input["melspec"]
oenv = input["oenv"]
# print(type(melspec))
# melspec = torch.from_numpy(input["melspec"]).to(device) # T x 80
# oenv = torch.from_numpy(input["oenv"]).to(device) # T x 1
# melspec = torch.unsqueeze(melspec, 0) # B=1 x T x 80
# oenv = torch.unsqueeze(oenv, 0) # B=1 x T x 1
melspec = torch.unsqueeze(melspec, 1)
x1 = self.mel_conv(melspec)
x1 = self.mel_conv2(x1)
x1 = x1.transpose(1, 2)
x1 = x1.reshape(x1.shape[0], -1, x1.shape[-1])
x1 = x1[:, :oenv.shape[1], :]
x1 = self.mel_rnn(x1)[0]
# x1 = self.mel_rnn(melspec)[0] # also output the hidden and states of the rnn for long signals
x2 = self.oenv_rnn(oenv)[0] # also output the hidden and states of the rnn for long signals
x = torch.cat([x1, x2], dim=-1)
y = self.all_rnn(x)[0] # also output the hidden and states of the rnn for long signals
y = self.fcn(y)
y = torch.sigmoid(y)
y = torch.squeeze(y, -1) # [B x T]
return y
def my_loss(output, target):
output_ = output.unsqueeze(1)
target_ = target.unsqueeze(1)
output_mp = F.max_pool1d(output_, 7, stride=1, padding=3)
target_mp = F.max_pool1d(target_, 7, stride=1, padding=3)
output_mp = output_mp.squeeze(1)
target_mp = target_mp.squeeze(1)
b1 = (target - output_mp) ** 2
b2 = (output - target_mp) ** 2
loss = torch.mean(b1 * target + b2 * output)
return loss
if __name__ == '__main__':
data_root_dir = "/data/nfs2/projects/IVE/audio/BallroomFeatures/"
feat_train, feat_test = load_all_features(data_root_dir)
my_beat_dataset_train = BeatDataset(feat_train, data_root_dir)
my_beat_dataset_test = BeatDataset(feat_test, data_root_dir)
#
# a = my_beat_dataset_train[2]
train_loader = DataLoader(dataset=my_beat_dataset_train, shuffle=True, batch_size=2)
test_loader = DataLoader(dataset=my_beat_dataset_test, shuffle=False, batch_size=1)
print("Length of the train_loader:", len(train_loader))
print("Length of the val_loader:", len(test_loader))
epoch_start = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = AllBeatTracker()
# model = model.to(device)
loss_func = nn.MSELoss()
optimiser = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimiser, 0.96, last_epoch=-1)
model_load_dir = "checkpoints/last_ckpt.pt"
if os.path.exists(model_load_dir):
checkpoint = torch.load(model_load_dir)
model.load_state_dict(checkpoint['model_state_dict'])
optimiser.load_state_dict(checkpoint['optimizer_state_dict'])
epoch_start = checkpoint['epoch']
# loss = checkpoint['loss']
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimiser.state_dict():
print(var_name, "\t", optimiser.state_dict()[var_name])
model = model.to(device)
# Writer will output to ./runs/ directory by default
writer = SummaryWriter()
fig, ax = plt.subplots(nrows=3, figsize=(8, 5))
for epoch in range(epoch_start, 1000):
print("EPOCH", epoch)
print("TRAIN")
i = 0
for d in train_loader: # my_beat_dataset_train
i += 1
# if i == 1:
# print(d["audio_file"])
model.zero_grad()
d["melspec"] = d["melspec"].to(device)
d["oenv"] = d["oenv"].to(device)
d["beats"] = d["beats"].to(device)
# print("===============", d["melspec"].is_cuda)
output = model(d)
target = d["beats"]
loss = my_loss(output, target)
# loss = loss_func(output, target)
loss.backward()
optimiser.step()
# if i % 10 == 0:
# print(i)
# if i == 20:
# break
print("Test")
error_sum = 0.0
for d in test_loader:
with torch.no_grad():
d["melspec"] = d["melspec"].to(device)
d["oenv"] = d["oenv"].to(device)
d["beats"] = d["beats"].to(device)
# print("===============", d["melspec"].is_cuda)
output = model(d)
target = d["beats"]
# P and R scores
# for frame, label in target:
# if label == 1:
error_curr = my_loss(output, target)
# error_curr = loss_func(output, target)
error_sum += float(error_curr.cpu().numpy())
print(error_sum)
writer.add_scalar("Loss/test", error_sum, epoch)
if epoch % 10 == 0:
# plot and save to tensorboard
mel_spec = d["melspec"].cpu().detach().numpy()
beats = d["beats"].cpu().detach().numpy()
oenv = d["oenv"].cpu().detach().numpy()
output = output.cpu().detach().numpy()
# fig, ax = plt.subplots(nrows=3, figsize=(8, 5))
[temp.clear() for temp in ax]
librosa.display.specshow(mel_spec[0].T, sr=22050, hop_length=512,
x_axis='time', y_axis='mel', ax=ax[0])
times = librosa.times_like(beats[0], sr=22050, hop_length=512)
ax[1].plot(times, beats[0], label='groundtruth')
ax[1].plot(times, output[0], label='prediction')
ax[1].set_xlim([times[0], times[-1]])
ax[2].plot(times, oenv[0], '--', label='Onset')
ax[2].set_xlim([times[0], times[-1]])
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
image = PIL.Image.open(buf)
image = ToTensor()(image)#.unsqueeze(0)
print(image.shape)
writer.add_image('Image', image, epoch)
if epoch % 10 == 0:
# torch.save(model.state_dict(), "checkpoints/last_ckpt.pt")
if not os.path.isdir("checkpoints"):
os.mkdir("checkpoints")
torch.save({'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimiser.state_dict(),
'loss': error_sum
},
model_load_dir)
writer.close()
aa = 0