FeedForwardNNTrainer.cpp

libcudann

* FeedForwardNNTrainer.cpp

* Created on: 19/nov/2010

* Author: donati

#include "FeedForwardNNTrainer.h"

#include <stdio.h>

#include <stdlib.h>

#include <sys/time.h>

#include <float.h>

#include <signal.h>

#include <cublas.h>

#include <cutil_inline.h>

//SIGINT handler

bool quit=false;

void terminate(int)

{

quit=true;

}

FeedForwardNNTrainer::FeedForwardNNTrainer() {

struct timeval tv;

gettimeofday(&tv, NULL);

srand(tv.tv_usec);

trainingSet = NULL;

testSet = NULL;

net = NULL;

bestMSETestNet = NULL;

bestMSETrainTestNet = NULL;

bestClassTestNet = NULL;

}

FeedForwardNNTrainer::~FeedForwardNNTrainer() {

}

//choose a net to operate on and save after the training

void FeedForwardNNTrainer::selectNet(FeedForwardNN & n)

{

net = &n;

}

//choose the training set

void FeedForwardNNTrainer::selectTrainingSet(LearningSet & s)

{

trainingSet = &s;

}

//choose the test set. if this is set the error rate is computed on test set instead of training set

void FeedForwardNNTrainer::selectTestSet(LearningSet & s)

{

testSet = &s;

}

//choose a net to save the best network trained so far after each epoch. mse on test set is the criterion

void FeedForwardNNTrainer::selectBestMSETestNet(FeedForwardNN & n)

{

bestMSETestNet = &n;

}

//choose a net to save the best network trained so far after each epoch. mse on train set + mse on test set is the criterion

void FeedForwardNNTrainer::selectBestMSETrainTestNet(FeedForwardNN & n)

{

bestMSETrainTestNet = &n;

}

//choose a net to save the best network trained so far after each epoch. percentage as classifier is the criterion

void FeedForwardNNTrainer::selectBestClassTestNet(FeedForwardNN & n)

{

bestClassTestNet = &n;

}

//starts the training using params. n is the number of parameters

//the first 2 elements of params are where the training will be executed (TRAIN_CPU,TRAIN_GPU)

//and the training algorithm (ALG_BP,ALG_BATCH...). the other parameters are algorithm dependent

//returns the best MSE on test set (or train set if test set isn't specified)

//printtype specifies how much verbose will be the execution (PRINT_ALL,PRINT_MIN,PRINT_OFF)

float FeedForwardNNTrainer::train(const int n, const float * params, const int printtype){

//checks CTRL-C to interrupt training manually

quit=false;

signal(SIGINT,terminate);

setvbuf(stdout,(char*)NULL,_IONBF,0);

// checks for network and training set correct initialization

if(net==NULL){printf("NEURAL NETWORK NOT SELECTED\n");exit(1);}

if(trainingSet==NULL){printf("TRAINING SET NOT SELECTED\n");exit(1);}

if((trainingSet->getNumOfInputsPerInstance()!=net->getLayersSize()[0])

||(trainingSet->getNumOfOutputsPerInstance()!=net->getLayersSize()[net->getNumOfLayers()-1])){

printf("NETWORK AND TRAINING SET OF DIFFERENT SIZE\n");exit(1);

}

if(testSet!=NULL&&

(trainingSet->getNumOfInputsPerInstance()!=testSet->getNumOfInputsPerInstance()

||trainingSet->getNumOfOutputsPerInstance()!=testSet->getNumOfOutputsPerInstance())){

printf("TEST SET OF DIFFERENT SIZE\n");exit(1);}

if(n<1){printf("TOO FEW PARAMETERS SELECTED FOR TRAINING\n");exit(1);}

if(printtype!=PRINT_OFF){

printf("Network:\t\t");

printf("%d",net->getLayersSize()[0]);

for(int i=1;i<net->getNumOfLayers();i++)

printf("x%d",net->getLayersSize()[i]);

printf("\n");

printf("Activation functions:\t");

for(int i=0;i<net->getNumOfLayers();i++)

printf("%d ",net->getActFuncts()[i]);

printf("\n");

}

//select the right algorithm to execute training

switch((int)params[0]){

case TRAIN_CPU:

switch((int)params[1]){

case ALG_BP: return trainCpuBp(n-2, params+2,printtype); break;

case ALG_BATCH: return trainCpuBatch(n-2, params+2,printtype); break;

default: printf("TRAINING NOT IMPLEMENTED YET\n");exit(1); break;

}

break;

case TRAIN_GPU:

switch((int)params[1]){

case ALG_BP: printf("TRAINING NOT IMPLEMENTED YET\n");exit(1); break;

case ALG_BATCH: return trainGPUBatch(n-2, params+2,printtype); break;

default: printf("TRAINING NOT IMPLEMENTED YET\n");exit(1); break;

}

break;

default:printf("TRAINING NOT IMPLEMENTED YET\n");exit(1);break;

}

//stops checking CTRL-C

signal(SIGINT,SIG_DFL);

}

//backpropagation training on host

//n is the number of parameters. parameters are (float array):

//desired error, max_epochs, epochs_between_reports, learning_rate, momentum (using momentum is 20% slower), shuffle (SHUFFLE_ON or SHUFFLE_OFF), error function (ERROR_TANH or ERROR_LINEAR)

float FeedForwardNNTrainer::trainCpuBp(const int n, const float * params, const int printtype){

//parameters parsing

float desired_error;

int max_epochs;

int epochs_between_reports;

float learningRate;

float momentum;

int shuff;

int errorFunc;

if(n<2){printf("TOO FEW PARAMETERS SELECTED FOR TRAINING\n");exit(1);}

desired_error=params[0];

max_epochs=params[1];

if(n>=3)

epochs_between_reports=params[2];

else

epochs_between_reports=max_epochs/10;

if(n>=4)

learningRate=params[3];

else

learningRate=0.7;

if(n>=5)

momentum=params[4];

else

momentum=0;

if(n>=6)

shuff=params[5];

else

shuff=SHUFFLE_ON;

if(n>=7)

errorFunc=params[6];

else

errorFunc=ERROR_TANH;

if(printtype!=PRINT_OFF){

printf("Training on:\t\tCPU\n");

printf("Algorithm:\t\tBackpropagation\n");

printf("Desired Error:\t\t%f\n",desired_error);

printf("Max epochs:\t\t%d\n",max_epochs);

printf("Epochs between reports:\t%d\n",epochs_between_reports);

printf("Learning rate:\t\t%f\n",learningRate);

printf("Momentum:\t\t%f\n",momentum);

if(shuff==SHUFFLE_ON)

printf("Shuffle:\t\tON\n");

else

printf("Shuffle:\t\tOFF\n");

if(errorFunc==ERROR_TANH)

printf("Error function:\t\tTANH\n");

else

printf("Error function:\t\tLINEAR\n");

printf("\n");

}

float mseTrain=FLT_MAX,mseTest=FLT_MAX;

//declare some error values for evaluating the trained network and storing best results

//best net MSE on test, best net MSE on train+test, best net as classifier on test

float bestMSETest=FLT_MAX,bestMSETrainTest=FLT_MAX,bestClassTest=0;

//declare some network values

int numOfLayers=net->getNumOfLayers();

int numOfWeights=net->getNumOfWeights();

int numOfNeurons=0;

for(int i=0;i<net->getNumOfLayers();i++){

numOfNeurons+=net->getLayersSize()[i]+1;

}

//declare an array of neurons to represent the neuron values

float * values = new float[numOfNeurons];

//declare an array of deltas to represent the gradients for the weight updates

float * deltas = new float[numOfNeurons];

//declare an array of weights to use for momentum

float * oldWeights = new float[numOfWeights];

//declare an array of temporary weights to use for batch and similar methods

float * tmpWeights = new float[numOfWeights];

//declare a pointer to the net weights

float * weights;

weights=net->getWeights();

//declare a pointer to the net activation functions

int * actFuncts;

actFuncts=net->getActFuncts();

//declare a pointer to the net layers size

int * layersSize;

layersSize=net->getLayersSize();

//declare some offsets to manage array indexes of each layer 'i'

int offsetWeights[numOfLayers];

int offsetIns[numOfLayers];

int offsetOuts[numOfLayers];

int offsetDeltas[numOfLayers];

for(int i=0;i<numOfLayers;i++){

//calculates the offsets of the arrays

offsetWeights[i] = 0;

offsetDeltas[i] = layersSize[0]+1;

offsetIns[i] = 0;

offsetOuts[i] = layersSize[0]+1;

for(int j=0;j<i;j++){

offsetWeights[i]+=(layersSize[j]+1)*layersSize[j+1];

offsetIns[i]+=layersSize[j]+1;

offsetOuts[i]+=layersSize[j+1]+1;

offsetDeltas[i]+=layersSize[j+1]+1;

}

//save previous weights to use in momentum calculation

for(int w=0;w<numOfWeights;w++)

oldWeights[w]=weights[w];

//declare some training set values

int numOfInstances=trainingSet->getNumOfInstances();

int numOfInputsPerInstance=trainingSet->getNumOfInputsPerInstance();

int numOfOutputsPerInstance=trainingSet->getNumOfOutputsPerInstance();

//declare a pointer to the training set inputs

float * trainingSetInputs;

trainingSetInputs=trainingSet->getInputs();

//declare a pointer to the training set outputs

float * trainingSetOutputs;

trainingSetOutputs=trainingSet->getOutputs();

//vector to shuffle training set

int order[numOfInstances];

for(int i=0;i<numOfInstances;i++)

order[i]=i;

if(printtype==PRINT_ALL){

//compute starting error rates

printf("Starting:\tError on train set %.10f",net->computeMSE(*trainingSet));

if(testSet!=NULL){

printf("\t\tError on test set %.10f",net->computeMSE(*testSet));

}

printf("\n");

}

//epochs training

for(int epoch=1;epoch<=max_epochs&&quit==false;epoch++){

//shuffle instances

int ind=0,aux=0;

if(shuff==SHUFFLE_ON)

for(int i=0;i<numOfInstances;i++){

ind=(rand()%(numOfInstances-i))+i;

aux=order[ind];

order[ind]=order[i];

order[i]=aux;

}

//instances training

for(int instance=0;instance<numOfInstances;instance++){

//computes a single instance forward of the backpropagation training

stepForward(values,weights,actFuncts,numOfLayers,layersSize,numOfInputsPerInstance,trainingSetInputs,offsetIns,offsetWeights,offsetOuts,order,instance);

//computes a single instance backward of the backpropagation training

stepBack(values,weights,deltas,actFuncts,numOfLayers,layersSize,numOfOutputsPerInstance,trainingSetOutputs,offsetWeights,offsetDeltas,offsetOuts,order,instance,errorFunc);

//update the weights using the deltas

weightsUpdate(values,weights,weights,deltas,numOfLayers,layersSize,offsetIns,offsetWeights,offsetDeltas,momentum,oldWeights,learningRate);

}

if(epochs_between_reports>0&&epoch%epochs_between_reports==0){

mseTrain=net->computeMSE(*trainingSet);

if(printtype==PRINT_ALL)

printf("Epoch\t%d\tError on train set %.10f",epoch,mseTrain);

if(testSet!=NULL){

mseTest=net->computeMSE(*testSet);

if(mseTest<bestMSETest){

bestMSETest=mseTest;

if(bestMSETestNet!=NULL){

*bestMSETestNet=*net;

}

if((mseTrain+mseTest)<bestMSETrainTest&&bestMSETrainTestNet!=NULL){

*bestMSETrainTestNet=*net;

bestMSETrainTest=mseTrain+mseTest;

}

if(printtype==PRINT_ALL)

printf("\t\tError on test set %.10f",mseTest);

if(bestClassTestNet!=NULL){

float per=net->classificatePerc(*testSet);

if(printtype==PRINT_ALL)

printf("\t\tClassification percentage on test set: %.1f%%",per*100);

if(per>bestClassTest){

*bestClassTestNet=*net;

bestClassTest=per;

if(printtype==PRINT_ALL)

printf(" ***");

}

if(mseTest<=desired_error){

if(printtype==PRINT_ALL)

printf("\nDesired error reached on test set.\n");

break;

}

if(printtype==PRINT_ALL)

printf("\n");

if(mseTrain<=desired_error&&testSet==NULL){

if(printtype==PRINT_ALL)

printf("Desired error reached on training set.\n");

break;

}

delete [] values;

delete [] deltas;

delete [] oldWeights;

delete [] tmpWeights;

if(printtype==PRINT_ALL)

printf("Training complete.\n");

if(testSet!=NULL){

return bestMSETest;

}

else return mseTrain;

}

//batch training on host

//n is the number of parameters. parameters are (float array):

//desired error, max_epochs, epochs_between_reports, learning_rate, momentum (using momentum is 20% slower), shuffle (SHUFFLE_ON or SHUFFLE_OFF), error function (ERROR_TANH or ERROR_LINEAR)void FeedForwardNNTrainer::trainCpuBatch(const int n, const float * params){

float FeedForwardNNTrainer::trainCpuBatch(const int n, const float * params, const int printtype){

//parameters parsing

float desired_error;

int max_epochs;

int epochs_between_reports;

float learningRate;

float momentum;

int shuff;

int errorFunc;

if(n<2){printf("TOO FEW PARAMETERS SELECTED FOR TRAINING\n");exit(1);}

desired_error=params[0];

max_epochs=params[1];

if(n>=3)

epochs_between_reports=params[2];

else

epochs_between_reports=max_epochs/10;

if(n>=4)

learningRate=params[3];

else

learningRate=0.7;

if(n>=5)

momentum=params[4];

else

momentum=0;

if(n>=6)

shuff=params[5];

else

shuff=SHUFFLE_ON;

if(n>=7)

errorFunc=params[6];

else

errorFunc=ERROR_TANH;

if(printtype!=PRINT_OFF){

printf("Training on:\t\tCPU\n");

printf("Algorithm:\t\tBatch\n");

printf("Desired Error:\t\t%f\n",desired_error);

printf("Max epochs:\t\t%d\n",max_epochs);

printf("Epochs between reports:\t%d\n",epochs_between_reports);

printf("Learning rate:\t\t%f\n",learningRate);

printf("Momentum:\t\t%f\n",momentum);

if(shuff==SHUFFLE_ON)

printf("Shuffle:\t\tON\n");

else

printf("Shuffle:\t\tOFF\n");

if(errorFunc==ERROR_TANH)

printf("Error function:\t\tTANH\n");

else

printf("Error function:\t\tLINEAR\n");

printf("\n");

}

float mseTrain=FLT_MAX,mseTest=FLT_MAX;

//declare some error values for evaluating the trained network and storing best results

//best net MSE on test, best net MSE on train+test, best net as classifier on test

float bestMSETest=FLT_MAX,bestMSETrainTest=FLT_MAX,bestClassTest=0;

//declare some network values

int numOfLayers=net->getNumOfLayers();

int numOfWeights=net->getNumOfWeights();

int numOfNeurons=0;

for(int i=0;i<net->getNumOfLayers();i++){

numOfNeurons+=net->getLayersSize()[i]+1;

}

//declare an array of neurons to represent the neuron values

float * values = new float[numOfNeurons];

//declare an array of deltas to represent the gradients for the weight updates

float * deltas = new float[numOfNeurons];

//declare an array of weights to use for momentum

float * oldWeights = new float[numOfWeights];

//declare an array of temporary weights to use for batch and similar methods

float * tmpWeights = new float[numOfWeights];

//declare a pointer to the net weights

float * weights;

weights=net->getWeights();

//declare a pointer to the net activation functions

int * actFuncts;

actFuncts=net->getActFuncts();

//declare a pointer to the net layers size

int * layersSize;

layersSize=net->getLayersSize();

//declare some offsets to manage array indexes of each layer 'i'

int offsetWeights[numOfLayers];

int offsetIns[numOfLayers];

int offsetOuts[numOfLayers];

int offsetDeltas[numOfLayers];

for(int i=0;i<numOfLayers;i++){

//calculates the offsets of the arrays

offsetWeights[i] = 0;

offsetDeltas[i] = layersSize[0]+1;

offsetIns[i] = 0;

offsetOuts[i] = layersSize[0]+1;

for(int j=0;j<i;j++){

offsetWeights[i]+=(layersSize[j]+1)*layersSize[j+1];

offsetIns[i]+=layersSize[j]+1;

offsetOuts[i]+=layersSize[j+1]+1;

offsetDeltas[i]+=layersSize[j+1]+1;

}

//save previous weights to use in momentum calculation

for(int w=0;w<numOfWeights;w++)

oldWeights[w]=weights[w];

//resets temporary weights for batch

for(int w=0;w<numOfWeights;w++)

tmpWeights[w]=0;

//declare some training set values

int numOfInstances=trainingSet->getNumOfInstances();

int numOfInputsPerInstance=trainingSet->getNumOfInputsPerInstance();

int numOfOutputsPerInstance=trainingSet->getNumOfOutputsPerInstance();

//declare a pointer to the training set inputs

float * trainingSetInputs;

trainingSetInputs=trainingSet->getInputs();

//declare a pointer to the training set outputs

float * trainingSetOutputs;

trainingSetOutputs=trainingSet->getOutputs();

//vector to shuffle training set

int order[numOfInstances];

for(int i=0;i<numOfInstances;i++)

order[i]=i;

if(printtype==PRINT_ALL){

//compute starting error rates

printf("Starting:\tError on train set %.10f",net->computeMSE(*trainingSet));

if(testSet!=NULL){

printf("\t\tError on test set %.10f",net->computeMSE(*testSet));

}

printf("\n");

}

//epochs training

for(int epoch=1;epoch<=max_epochs&&quit==false;epoch++){

//shuffle instances

int ind=0,aux=0;

if(shuff==SHUFFLE_ON)

for(int i=0;i<numOfInstances;i++){

ind=(rand()%(numOfInstances-i))+i;

aux=order[ind];

order[ind]=order[i];

order[i]=aux;

}

//instances training

for(int instance=0;instance<numOfInstances;instance++){

//computes a single instance forward of the backpropagation training

stepForward(values,weights,actFuncts,numOfLayers,layersSize,numOfInputsPerInstance,trainingSetInputs,offsetIns,offsetWeights,offsetOuts,order,instance);

//computes a single instance backward of the backpropagation training

stepBack(values,weights,deltas,actFuncts,numOfLayers,layersSize,numOfOutputsPerInstance,trainingSetOutputs,offsetWeights,offsetDeltas,offsetOuts,order,instance,errorFunc);

//update the weights using the deltas

//no momentum is used, it will be added after all the instances

weightsUpdate(values,weights,tmpWeights,deltas,numOfLayers,layersSize,offsetIns,offsetWeights,offsetDeltas,0,oldWeights,learningRate);

}

//add temporary weights changes to real weights (the total is divided among the total number of instances (to use the same learning rate of the standard BP)

//it also uses momentum

for(int w=0;w<numOfWeights;w++){

float auxWeight=weights[w];

weights[w]+=(tmpWeights[w]/numOfInstances)+momentum*(auxWeight-oldWeights[w]);

tmpWeights[w]=0;

oldWeights[w]=auxWeight;

}

if(epochs_between_reports>0&&epoch%epochs_between_reports==0){

mseTrain=net->computeMSE(*trainingSet);

if(printtype==PRINT_ALL)

printf("Epoch\t%d\tError on train set %.10f",epoch,mseTrain);

if(testSet!=NULL){

mseTest=net->computeMSE(*testSet);

if(mseTest<bestMSETest){

bestMSETest=mseTest;

if(bestMSETestNet!=NULL){

*bestMSETestNet=*net;

}

if((mseTrain+mseTest)<bestMSETrainTest&&bestMSETrainTestNet!=NULL){

*bestMSETrainTestNet=*net;

bestMSETrainTest=mseTrain+mseTest;

}

if(printtype==PRINT_ALL)

printf("\t\tError on test set %.10f",mseTest);

if(bestClassTestNet!=NULL){

float per=net->classificatePerc(*testSet);

if(printtype==PRINT_ALL)

printf("\t\tClassification percentage on test set: %.1f%%",per*100);

if(per>bestClassTest){

*bestClassTestNet=*net;

bestClassTest=per;

if(printtype==PRINT_ALL)

printf(" ***");

}

if(mseTest<=desired_error){

if(printtype==PRINT_ALL)

printf("\nDesired error reached on test set.\n");

break;

}

if(printtype==PRINT_ALL)

printf("\n");

if(mseTrain<=desired_error&&testSet==NULL){

if(printtype==PRINT_ALL)

printf("Desired error reached on training set.\n");

break;

}

delete [] values;

delete [] deltas;

delete [] oldWeights;

delete [] tmpWeights;

if(printtype==PRINT_ALL)

printf("Training complete.\n");

if(testSet!=NULL){

return bestMSETest;

}

else return mseTrain;

}

//batch training on device

//n is the number of parameters. parameters are (float array):

float FeedForwardNNTrainer::trainGPUBatch(const int n, const float * params, const int printtype){

//parameters parsing

float desired_error;

int max_epochs;

int epochs_between_reports;

float learningRate;

float momentum;

int shuff;

int errorFunc;

if(n<2){printf("TOO FEW PARAMETERS SELECTED FOR TRAINING\n");exit(1);}

desired_error=params[0];

max_epochs=params[1];

if(n>=3)

epochs_between_reports=params[2];

else

epochs_between_reports=max_epochs/10;

if(n>=4)

learningRate=params[3];

else

learningRate=0.7;

if(n>=5)

momentum=params[4];

else

momentum=0;

if(n>=6)

shuff=params[5];

else

shuff=SHUFFLE_ON;

if(n>=7)

errorFunc=params[6];

else

errorFunc=ERROR_TANH;

if(printtype!=PRINT_OFF){

printf("Training on:\t\tGPU\n");

printf("Algorithm:\t\tBatch\n");

printf("Desired Error:\t\t%f\n",desired_error);

printf("Max epochs:\t\t%d\n",max_epochs);

printf("Epochs between reports:\t%d\n",epochs_between_reports);

printf("Learning rate:\t\t%f\n",learningRate);

printf("Momentum:\t\t%f\n",momentum);

if(shuff==SHUFFLE_ON)

printf("Shuffle:\t\tON\n");

else

printf("Shuffle:\t\tOFF\n");

if(errorFunc==ERROR_TANH)

printf("Error function:\t\tTANH\n");

else

printf("Error function:\t\tLINEAR\n");

printf("\n");

}

float mseTrain=FLT_MAX,mseTest=FLT_MAX;

//declare some error values for evaluating the trained network and storing best results

//best net MSE on test, best net MSE on train+test, best net as classifier on test

float bestMSETest=FLT_MAX,bestMSETrainTest=FLT_MAX,bestClassTest=0;

//declare some network values

int numOfLayers=net->getNumOfLayers();

int numOfWeights=net->getNumOfWeights();

int numOfNeurons=0;

for(int i=0;i<net->getNumOfLayers();i++){

numOfNeurons+=net->getLayersSize()[i]+1;

}

//declare some training set values

int numOfInstances=trainingSet->getNumOfInstances();

int numOfInputsPerInstance=trainingSet->getNumOfInputsPerInstance();

int numOfOutputsPerInstance=trainingSet->getNumOfOutputsPerInstance();

int numOfTestInstances=0;

if(testSet!=NULL){

numOfTestInstances=testSet->getNumOfInstances();

}

//declare an array of neurons to represent the neuron values

float * values = new float[numOfNeurons*numOfInstances];

//declare an array of neurons to represent the neuron values of the test set

float * testValues = new float[numOfNeurons*numOfTestInstances];

//declare an array of deltas to represent the gradients for the weight updates

float * deltas = new float[numOfNeurons*numOfInstances];

//training and test set to be uploaded in device memory

float * columnTrainingSetInputs = new float[numOfInstances*numOfInputsPerInstance];

float * columnTrainingSetOutputs = new float[numOfInstances*numOfOutputsPerInstance];

float * columnTestSetInputs = new float[numOfTestInstances*numOfInputsPerInstance];

float * columnTestSetOutputs = new float[numOfTestInstances*numOfOutputsPerInstance];

//declare an array of weights to use for momentum

float * oldWeights = new float[numOfWeights];

//declare a pointer to the net weights

float * weights;

weights=net->getWeights();

//declare a pointer to the net activation functions

int * actFuncts;

actFuncts=net->getActFuncts();

//declare a pointer to the net layers size

int * layersSize;

layersSize=net->getLayersSize();

//declare a pointer to the training set inputs

float * trainingSetInputs;

//declare a pointer to the training set outputs

float * trainingSetOutputs;

trainingSetInputs=trainingSet->getInputs();

trainingSetOutputs=trainingSet->getOutputs();

//declare a pointer to the test set inputs

float * testSetInputs=NULL;

//declare a pointer to the test set outputs

float * testSetOutputs=NULL;

if(testSet!=NULL){

testSetInputs=testSet->getInputs();

testSetOutputs=testSet->getOutputs();

}

//declare some offsets to manage array indexes of each layer 'i'

int offsetWeights[numOfLayers];

int offsetIns[numOfLayers];

int offsetOuts[numOfLayers];

int offsetDeltas[numOfLayers];

int offsetTestIns[numOfLayers];

int offsetTestOuts[numOfLayers];

for(int i=0;i<numOfLayers;i++){

//calculates the offsets of the arrays

offsetWeights[i] = 0;

offsetDeltas[i] = (layersSize[0]+1)*numOfInstances;

offsetIns[i] = 0;

offsetOuts[i] = (layersSize[0]+1)*numOfInstances;

offsetTestIns[i] = 0;

offsetTestOuts[i] = (layersSize[0]+1)*numOfTestInstances;

for(int j=0;j<i;j++){

offsetWeights[i]+=(layersSize[j]+1)*layersSize[j+1];

offsetIns[i]+=(layersSize[j]+1)*numOfInstances;

offsetOuts[i]+=(layersSize[j+1]+1)*numOfInstances;

offsetDeltas[i]+=(layersSize[j+1]+1)*numOfInstances;

offsetTestIns[i]+=(layersSize[j]+1)*numOfTestInstances;

offsetTestOuts[i]+=(layersSize[j+1]+1)*numOfTestInstances;

}

//resets values and deltas

for(int i=0;i<numOfNeurons*numOfInstances;i++)values[i]=0.0f;

for(int i=0;i<numOfNeurons*numOfTestInstances;i++)testValues[i]=0.0f;

for(int i=0;i<numOfNeurons*numOfInstances;i++)deltas[i]=0.0f;

//row-major->column major indexing

for(int i=0;i<numOfInstances;i++){

for(int j=0;j<numOfInputsPerInstance;j++)

columnTrainingSetInputs[j*numOfInstances+i]=trainingSetInputs[i*numOfInputsPerInstance+j];

for(int j=0;j<numOfOutputsPerInstance;j++)

columnTrainingSetOutputs[j*numOfInstances+i]=trainingSetOutputs[i*numOfOutputsPerInstance+j];

}

for(int i=0;i<numOfTestInstances;i++){

for(int j=0;j<numOfInputsPerInstance;j++)

columnTestSetInputs[j*numOfTestInstances+i]=testSetInputs[i*numOfInputsPerInstance+j];

for(int j=0;j<numOfOutputsPerInstance;j++)

columnTestSetOutputs[j*numOfTestInstances+i]=testSetOutputs[i*numOfOutputsPerInstance+j];

}

//copy the training set into the input neurons values

for(int i=0;i<numOfInstances*numOfInputsPerInstance;i++)

values[i]=columnTrainingSetInputs[i];

//copy the test set into the input neurons values

for(int i=0;i<numOfTestInstances*numOfInputsPerInstance;i++)

testValues[i]=columnTestSetInputs[i];

//BIAS initializations

for(int i=0;i<numOfLayers;i++){

for(int j=offsetIns[i]+(layersSize[i])*numOfInstances;j<offsetOuts[i];j++)

values[j]=1.0f;

}

if(testSet!=NULL)

for(int i=0;i<numOfLayers;i++){

for(int j=offsetTestIns[i]+(layersSize[i])*numOfTestInstances;j<offsetTestOuts[i];j++)

testValues[j]=1.0f;

}

//vector to shuffle training set

int order[numOfInstances];

for(int i=0;i<numOfInstances;i++)

order[i]=i;

//cublas initializations

cublasStatus stat;

cublasInit();

float * devValues=NULL;

float * devTestValues=NULL;

float * devDeltas=NULL;

float * devWeights=NULL;

float * devOldWeights=NULL;

float * devTrainingSetInputs=NULL;

float * devTrainingSetOutputs=NULL;

float * devTestSetInputs=NULL;

float * devTestSetOutputs=NULL;

//allocates the vectors on the device

stat = cublasAlloc (numOfNeurons*numOfInstances, sizeof(*values), (void**)&devValues);