Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add KL Divergence and MethodAE #9

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tmva/tmva/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ set(headers1 Configurable.h Factory.h MethodBase.h MethodCompositeBase.h
MethodKNN.h MethodCFMlpANN.h MethodCFMlpANN_Utils.h MethodLikelihood.h
MethodHMatrix.h MethodPDERS.h MethodBDT.h MethodDT.h MethodSVM.h MethodBayesClassifier.h
MethodFDA.h MethodMLP.h MethodBoost.h
MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h
MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h MethodAE.h
MethodCrossValidation.h)
set(headers2 TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h BinarySearchTree.h
Timer.h RootFinder.h CrossEntropy.h DecisionTree.h DecisionTreeNode.h MisClassificationError.h
1 change: 1 addition & 0 deletions tmva/tmva/inc/LinkDef1.h
Original file line number Diff line number Diff line change
@@ -67,5 +67,6 @@
#pragma link C++ class TMVA::MethodDNN+;
#pragma link C++ class TMVA::MethodCrossValidation+;
#pragma link C++ class TMVA::MethodDL+;
#pragma link C++ class TMVA::MethodAE+;

#endif
9 changes: 9 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h
Original file line number Diff line number Diff line change
@@ -191,6 +191,15 @@ class TCpu
const TCpuMatrix<Scalar_t> &weights);
static void SoftmaxCrossEntropyGradients(TCpuMatrix<Scalar_t> &dY, const TCpuMatrix<Scalar_t> &Y,
const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. The StandardDeviation is assumed to
* be the log of standard deviation and the computation is done accordingly. */
static Scalar_t KLDivergence(const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation,
const TCpuMatrix<Scalar_t> &weights);
static void KLDivergenceGradients(TCpuMatrix<Scalar_t> &dMean, TCpuMatrix<Scalar_t> &dStandardDeviation,
const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation,
const TCpuMatrix<Scalar_t> &weights);
///@}

//____________________________________________________________________________
8 changes: 8 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h
Original file line number Diff line number Diff line change
@@ -193,6 +193,14 @@ class TCuda
const TCudaMatrix<AFloat> &weights);
static void SoftmaxCrossEntropyGradients(TCudaMatrix<AFloat> &dY, const TCudaMatrix<AFloat> &Y,
const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. */
static AFloat KLDivergence(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
const TCudaMatrix<AFloat> &weights);
static void KLDivergenceGradients(TCudaMatrix<AFloat> &dMean, TCudaMatrix<AFloat> &dStandardDeviation,
const TCudaMatrix<AFloat> &Mean, const TCudaMatrix<AFloat> &StandardDeviation,
const TCudaMatrix<AFloat> &weights);
///@}

//____________________________________________________________________________
9 changes: 9 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h
Original file line number Diff line number Diff line change
@@ -190,6 +190,15 @@ class TReference
const TMatrixT<AReal> &weights);
static void SoftmaxCrossEntropyGradients(TMatrixT<AReal> &dY, const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. The StandardDeviation is assumed to
* be the log of standard deviation and the computation is done accordingly. */
static AReal KLDivergence(const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights);
static void KLDivergenceGradients(TMatrixT<AReal> &dMean, TMatrixT<AReal> &dStandardDeviation,
const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights);
///@}

//____________________________________________________________________________
22 changes: 20 additions & 2 deletions tmva/tmva/inc/TMVA/DNN/Functions.h
Original file line number Diff line number Diff line change
@@ -55,7 +55,8 @@ enum class ELossFunction
{
kCrossEntropy = 'C',
kMeanSquaredError = 'R',
kSoftmaxCrossEntropy = 'S'
kSoftmaxCrossEntropy = 'S',
kKLDivergence = 'D'
};

/*! Enum representing the regularization type applied for a given layer */
@@ -171,6 +172,7 @@ inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y
case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
case ELossFunction::kKLDivergence: return Architecture_t::KLDivergence(Y,output,weights);
}
return 0.0;
}
@@ -190,10 +192,26 @@ inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFuncti
case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
case ELossFunction::kSoftmaxCrossEntropy :
Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
break;
break;
}
}

/*! Compute the gradients with respect to the metrics used for
* similarity measure. The gradients are returned in two separate Matrices. */
//______________________________________________________________________________
template <typename Architecture_t>
inline void evaluateGradients(typename Architecture_t::Matrix_t &dMetricOne, typename Architecture_t::Matrix_t &dMetricTwo,
ELossFunction f, const typename Architecture_t::Matrix_t &MetricOne,
const typename Architecture_t::Matrix_t &MetricTwo,
const typename Architecture_t::Matrix_t &weights)
{
switch(f)
{
case ELossFunction::kKLDivergence: Architecture_t::KLDivergenceGradients(dMetricOne, dMetricTwo, MetricOne, MetricTwo, weights);
break;
}
}


//______________________________________________________________________________
//
256 changes: 256 additions & 0 deletions tmva/tmva/inc/TMVA/MethodAE.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
// @(#)root/tmva/tmva/dnn:$Id$
// Author: Vladimir Ilievski, Saurav Shekhar, Siddhartha Rao Kamalakara

/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
* Package: TMVA *
* Class : MethodAE *
* Web : http://tmva.sourceforge.net *
* *
* Description: *
* Deep Neural Network Method *
* *
* Authors (alphabetical): *
* Vladimir Ilievski <[email protected]> - CERN, Switzerland *
* Saurav Shekhar <[email protected]> - ETH Zurich, Switzerland *
* Siddhartha Rao Kamalakara <[email protected]> - CERN, Switzerland *
* *
* Copyright (c) 2005-2015: *
* CERN, Switzerland *
* U. of Victoria, Canada *
* MPI-K Heidelberg, Germany *
* U. of Bonn, Germany *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://tmva.sourceforge.net/LICENSE) *
**********************************************************************************/

#ifndef ROOT_TMVA_MethodAE
#define ROOT_TMVA_MethodAE

//////////////////////////////////////////////////////////////////////////
// //
// MethodAE //
// //
// Method class for creating Auto Encoders //
// //
//////////////////////////////////////////////////////////////////////////

#include "TString.h"

#include "TMVA/MethodBase.h"
#include "TMVA/Types.h"

#include "TMVA/DNN/Architectures/Reference.h"

#ifdef R__HAS_TMVACPU
#include "TMVA/DNN/Architectures/Cpu.h"
#endif

#ifdef R__HAS_TMVACUDA
#include "TMVA/DNN/Architectures/Cuda.h"
#endif

#include "TMVA/DNN/Functions.h"
#include "TMVA/DNN/DeepNet.h"

#include <vector>

namespace TMVA {

/*! All of the options that can be specified in the training string */
struct TTrainingAESettings {
size_t batchSize;
size_t testInterval;
size_t convergenceSteps;
size_t maxEpochs;
DNN::ERegularization regularization;
Double_t learningRate;
Double_t momentum;
Double_t weightDecay;
std::vector<Double_t> dropoutProbabilities;
bool multithreading;
};

class MethodAE : public MethodBase {

private:
// Key-Value vector type, contining the values for the training options
using KeyValueVector_t = std::vector<std::map<TString, TString>>;
#ifdef R__HAS_TMVACPU
using ArchitectureImpl_t = TMVA::DNN::TCpu<Double_t>;
#else
using ArchitectureImpl_t = TMVA::DNN::TReference<Double_t>;
#endif
using DeepNetImpl_t = TMVA::DNN::TDeepNet<ArchitectureImpl_t>;
std::unique_ptr<DeepNetImpl_t> fNet;

/*! The option handling methods */
void DeclareOptions();
void ProcessOptions();

void Init();

// Function to parse the layout of the input
void ParseInputLayout();
void ParseBatchLayout();

/*! After calling the ProcesOptions(), all of the options are parsed,
* so using the parsed options, and given the architecture and the
* type of the layers, we build the Deep Network passed as
* a reference in the function. */
template <typename Architecture_t, typename Layer_t>
void CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);

template <typename Architecture_t, typename Layer_t>
void CreateEncoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);

template <typename Architecture_t, typename Layer_t>
void CreateDecoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);

template <typename Architecture_t, typename Layer_t>
void ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

size_t fInputDepth; ///< The depth of the input.
size_t fInputHeight; ///< The height of the input.
size_t fInputWidth; ///< The width of the input.

size_t fBatchDepth; ///< The depth of the batch used to train the deep net.
size_t fBatchHeight; ///< The height of the batch used to train the deep net.
size_t fBatchWidth; ///< The width of the batch used to train the deep net.

DNN::EInitialization fWeightInitialization; ///< The initialization method
DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions
DNN::ELossFunction fLossFunction; ///< The loss function

TString fInputLayoutString; ///< The string defining the layout of the input
TString fBatchLayoutString; ///< The string defining the layout of the batch
TString fLayoutString; ///< The string defining the layout of the deep net
TString fErrorStrategy; ///< The string defining the error strategy for training
TString fTrainingStrategyString; ///< The string defining the training strategy
TString fWeightInitializationString; ///< The string defining the weight initialization method
TString fArchitectureString; ///< The string defining the architecure: CPU or GPU
bool fResume;

KeyValueVector_t fSettings; ///< Map for the training strategy
std::vector<TTrainingAESettings> fTrainingSettings; ///< The vector defining each training strategy

ClassDef(MethodAE, 0);

protected:
// provide a help message
void GetHelpMessage() const;

public:
/*! Constructor */
MethodAE(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);

/*! Constructor */
MethodAE(DataSetInfo &theData, const TString &theWeightFile);

/*! Virtual Destructor */
virtual ~MethodAE();

/*! Function for parsing the training settings, provided as a string
* in a key-value form. */
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);

/*! Check the type of analysis the deep learning network can do */
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);

/*! Methods for training the deep learning network */
void Train();

Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);

/*! Methods for writing and reading weights */
using MethodBase::ReadWeightsFromStream;
void AddWeightsXMLTo(void *parent) const;
void ReadWeightsFromXML(void *wghtnode);
void ReadWeightsFromStream(std::istream &);

/* Create ranking */
const Ranking *CreateRanking();

/* Getters */
size_t GetInputDepth() const { return fInputDepth; }
size_t GetInputHeight() const { return fInputHeight; }
size_t GetInputWidth() const { return fInputWidth; }

size_t GetBatchDepth() const { return fBatchDepth; }
size_t GetBatchHeight() const { return fBatchHeight; }
size_t GetBatchWidth() const { return fBatchWidth; }

const DeepNetImpl_t & GetDeepNet() const { return *fNet; }

DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; }
DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; }
DNN::ELossFunction GetLossFunction() const { return fLossFunction; }

TString GetInputLayoutString() const { return fInputLayoutString; }
TString GetBatchLayoutString() const { return fBatchLayoutString; }
TString GetLayoutString() const { return fLayoutString; }
TString GetErrorStrategyString() const { return fErrorStrategy; }
TString GetTrainingStrategyString() const { return fTrainingStrategyString; }
TString GetWeightInitializationString() const { return fWeightInitializationString; }
TString GetArchitectureString() const { return fArchitectureString; }

const std::vector<TTrainingAESettings> &GetTrainingSettings() const { return fTrainingSettings; }
std::vector<TTrainingAESettings> &GetTrainingSettings() { return fTrainingSettings; }
const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; }
KeyValueVector_t &GetKeyValueSettings() { return fSettings; }

/** Setters */
void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }

void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }

void SetWeightInitialization(DNN::EInitialization weightInitialization)
{
fWeightInitialization = weightInitialization;
}
void SetOutputFunction(DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
void SetErrorStrategyString(TString errorStrategy) { fErrorStrategy = errorStrategy; }
void SetTrainingStrategyString(TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
void SetWeightInitializationString(TString weightInitializationString)
{
fWeightInitializationString = weightInitializationString;
}
void SetArchitectureString(TString architectureString) { fArchitectureString = architectureString; }
void SetLayoutString(TString layoutString) { fLayoutString = layoutString; }
};

} // namespace TMVA

#endif
1 change: 1 addition & 0 deletions tmva/tmva/inc/TMVA/Types.h
Original file line number Diff line number Diff line change
@@ -99,6 +99,7 @@ namespace TMVA {
kCategory ,
kDNN ,
kDL ,
kAE ,
kPyRandomForest ,
kPyAdaBoost ,
kPyGTB ,
53 changes: 53 additions & 0 deletions tmva/tmva/src/DNN/Architectures/Cpu/LossFunctions.cxx
Original file line number Diff line number Diff line change
@@ -194,5 +194,58 @@ void TCpu<AFloat>::SoftmaxCrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TC
Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));
}

//______________________________________________________________________________
template <typename AFloat>
AFloat TCpu<AFloat>::KLDivergence(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,
const TCpuMatrix<AFloat> &weights)
{
const AFloat *dataY = Y.GetRawDataPointer();
const AFloat *dataOutput = output.GetRawDataPointer();
const AFloat *dataWeights = weights.GetRawDataPointer();
std::vector<AFloat> temp(Y.GetNElements());
size_t m = Y.GetNrows();
AFloat norm = 1.0 / ((AFloat) m);

auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {
AFloat dy = 1 + dataOutput[workerID] - std::pow(dataY[workerID], 2) - std::pow(std::exp(dataOutput[workerID]), 2);
temp[workerID] = dataWeights[workerID % m] * dy;
return 0;
};

auto reduction = [](const std::vector<AFloat> & v )
{
return std::accumulate(v.begin(),v.end(),AFloat{});
};

Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
}

//______________________________________________________________________________
template <typename AFloat>
void TCpu<AFloat>::KLDivergenceGradients(TCpuMatrix<AFloat> &dY, TCpuMatrix<AFloat> &dSD, const TCpuMatrix<AFloat> &Y,
const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)
{

AFloat *dataDY = dY.GetRawDataPointer();
AFloat *dataDSD = dSD.GetRawDataPointer();
const AFloat *dataY = Y.GetRawDataPointer();
const AFloat *dataOutput = output.GetRawDataPointer();
const AFloat *dataWeights = weights.GetRawDataPointer();

size_t m = Y.GetNrows();
AFloat norm = 1.0 / ((AFloat) m);

auto f = [&dataDY, &dataDSD, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {
dataDY[workerID] = -2.0 * norm * dataY[workerID];
dataDY[workerID] *= dataWeights[workerID % m];
dataDSD[workerID] = 1.0 - (2.0 * std::exp(2.0 * dataOutput[workerID]));
dataDSD[workerID] *= norm * dataWeights[workerID % m];
return 0;
};

Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
}

} // namespace DNN
} // namespace TMVA
42 changes: 42 additions & 0 deletions tmva/tmva/src/DNN/Architectures/Reference/LossFunctions.cxx
Original file line number Diff line number Diff line change
@@ -151,5 +151,47 @@ void TReference<AReal>::SoftmaxCrossEntropyGradients(TMatrixT<AReal> &dY, const
}
}

//______________________________________________________________________________
template <typename AReal>
AReal TReference<AReal>::KLDivergence(const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights)
{
size_t m,n;
m = Mean.GetNrows();
n = Mean.GetNcols();

AReal result = 0.0;

for(size_t i=0; i<m; i++){
for(size_t j=0; j<n; j++){
AReal sum = 1 + StandardDeviation(i, j) - (Mean(i, j) * Mean(i, j)) - std::pow(std::exp(StandardDeviation(i, j)) , 2);
result += weights(i,0) * sum;
}
}

result /= static_cast<AReal>(m);

return result;
}

template <typename AReal>
void TReference<AReal>::KLDivergenceGradients(TMatrixT<AReal> &dMean, TMatrixT<AReal> &dStandardDeviation,
const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights)
{
size_t m,n;
m = Mean.GetNrows();
n = Mean.GetNcols();
AReal norm = 1.0 / m ;

for(size_t i = 0; i < m; i++){
for(size_t j = 0; j < n; j++){
dMean(i,j) = -2.0 * norm * Mean(i,j) * weights(i, 0);
dStandardDeviation(i,j) = 1.0 - (2.0 * std::exp(2.0 * StandardDeviation(i,j)));
dStandardDeviation(i,j) *= norm * weights(i, 0);
}
}
}

} // namespace DNN
} // namespace TMVA
1,616 changes: 1,616 additions & 0 deletions tmva/tmva/src/MethodAE.cxx

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tmva/tmva/test/DNN/CNN/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -75,5 +75,7 @@ ROOT_ADD_TEST(TMVA-DNN-CNN-Backpropagation-CPU COMMAND testConvBackpropagationCp
ROOT_EXECUTABLE(testMethodDLCpu TestMethodDL.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-CNN-MethodDL-CPU COMMAND testMethodDLCpu)

ROOT_EXECUTABLE(testMethodAECpu TestMethodAE.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-CNN-MethodAE-CPU COMMAND testMethodAECpu)

endif ()
38 changes: 38 additions & 0 deletions tmva/tmva/test/DNN/CNN/TestMethodAE.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// @(#)root/tmva/tmva/cnn:$Id$
// Author: Vladimir Ilievski

/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
* Package: TMVA *
* Class : *
* Web : http://tmva.sourceforge.net *
* *
* Description: *
* Testing Method DL for Conv Net for the Reference backend *
* *
* Authors (alphabetical): *
* Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
* *
* Copyright (c) 2005-2015: *
* CERN, Switzerland *
* U. of Victoria, Canada *
* MPI-K Heidelberg, Germany *
* U. of Bonn, Germany *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://tmva.sourceforge.net/LICENSE) *
**********************************************************************************/

#include "TestMethodAE.h"
#include "TString.h"

int main()
{
std::cout << "Testing Method AE for CPU backend: " << std::endl;

TString archCPU = "CPU";

testMethodAE_DNN(archCPU);

}
155 changes: 155 additions & 0 deletions tmva/tmva/test/DNN/CNN/TestMethodAE.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// @(#)root/tmva/tmva/cnn:$Id$
// Author: Vladimir Ilievski

/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
* Package: TMVA *
* Class : *
* Web : http://tmva.sourceforge.net *
* *
* Description: *
* Testing Method DL for Conv Net *
* *
* Authors (alphabetical): *
* Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
* *
* Copyright (c) 2005-2015: *
* CERN, Switzerland *
* U. of Victoria, Canada *
* MPI-K Heidelberg, Germany *
* U. of Bonn, Germany *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://tmva.sourceforge.net/LICENSE) *
**********************************************************************************/

#ifndef TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_AE_H
#define TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_AE_H

#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TROOT.h"

#include "TMVA/MethodAE.h"
#include "TMVA/DataLoader.h"
#include "TMVA/Factory.h"
#include "TMVA/Config.h"

#include "MakeImageData.h"

#include <iostream>


/** Testing the entire pipeline of the Method DL, when only a Multilayer Percepton
* is constructed. */
//______________________________________________________________________________
void testMethodAE_DNN(TString architectureStr)
{

ROOT::EnableImplicitMT(1);
TMVA::Config::Instance();

TFile *input(0);
// TString fname = "/Users/vladimirilievski/Desktop/Vladimir/GSoC/ROOT-CI/common-version/root/tmva/tmva/test/DNN/CNN/"
// "dataset/tmva_class_example.root";
/*
TString fname = "http://root.cern.ch/files/tmva_class_example.root";
TString fopt = "CACHEREAD";
input = TFile::Open(fname,fopt);
*/
input = TFile::Open("http://root.cern.ch/files/tmva_reg_example.root", "CACHEREAD");


TString outfileName("TMVA_DNN.root");
TFile *outputFile = TFile::Open(outfileName, "RECREATE");

TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset");

dataloader->AddVariable( "var1", "Variable 1", "units", 'F' );
dataloader->AddVariable( "var2", "Variable 2", "units", 'F' );
dataloader->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' );
dataloader->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' );

dataloader->AddTarget("var1");
dataloader->AddTarget("var2");

TTree *regTree = (TTree*)input->Get("TreeR");

Double_t regWeight = 1.0;

dataloader->AddRegressionTree( regTree, regWeight );

TCut mycut = "";

dataloader->PrepareTrainingAndTestTree( mycut,
"nTrain_Regression=1000:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );

// Input Layout
TString inputLayoutString("InputLayout=1|1|2");

// Batch Layout
TString batchLayoutString("BatchLayout=256|1|2");

// General layout.
TString layoutString("Layout=Encoder={RESHAPE|1|1|2|FLAT,DENSE|128|TANH,DENSE|64|TANH}Decoder={DENSE|128|TANH,DENSE|2|LINEAR,LINEAR}");

// Training strategies.
TString training0("LearningRate=1e-1,Momentum=0.9,Repetitions=1,"
"ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
"WeightDecay=1e-4,Regularization=L2,"
"DropConfig=0.0+0.5+0.5+0.5, Multithreading=True");
TString training1("LearningRate=1e-2,Momentum=0.9,Repetitions=1,"
"ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
"WeightDecay=1e-4,Regularization=L2,"
"DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
TString training2("LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
"ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
"WeightDecay=1e-4,Regularization=L2,"
"DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
TString trainingStrategyString("TrainingStrategy=");
trainingStrategyString += training0 + "|" + training1 + "|" + training2;

// General Options.
TString dnnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:"
"WeightInitialization=XAVIERUNIFORM");


// Concatenate all option strings
dnnOptions.Append(":");
dnnOptions.Append(inputLayoutString);

dnnOptions.Append(":");
dnnOptions.Append(batchLayoutString);

dnnOptions.Append(":");
dnnOptions.Append(layoutString);

dnnOptions.Append(":");
dnnOptions.Append(trainingStrategyString);

dnnOptions.Append(":Architecture=");
dnnOptions.Append(architectureStr);

// create factory
TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile,
"!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" );

TString methodTitle = "AE_" + architectureStr;
factory->BookMethod(dataloader, TMVA::Types::kAE, methodTitle, dnnOptions);

// Train MVAs using the set of training events
factory->TrainAllMethods();

// Save the output
outputFile->Close();

std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVAClassification is done!" << std::endl;

delete factory;
delete dataloader;
}

#endif
15 changes: 15 additions & 0 deletions tmva/tmva/test/DNN/TestLossFunctions.cxx
Original file line number Diff line number Diff line change
@@ -71,4 +71,19 @@ int main()
std::cout << "Testing softmax cross entropy gradient: ";
std::cout << "maximum relative error = " << print_error(error) << std::endl;
if (error > 1e-3) return 1;


//
// KL Divergence
//

error = testKLDivergence<TReference<double>>(10);
std::cout << "Testing KL divergence loss: ";
std::cout << "maximum relative error = " << print_error(error) << std::endl;
if (error > 1e-3) return 1;

error = testKLDivergenceGradients<TReference<double>>(10);
std::cout << "Testing KL divergence gradients: ";
std::cout << "maximum relative error = " << print_error(error) << std::endl;
if (error > 1e-3) return 1;
}
103 changes: 103 additions & 0 deletions tmva/tmva/test/DNN/TestLossFunctions.h
Original file line number Diff line number Diff line change
@@ -328,3 +328,106 @@ auto testSoftmaxCrossEntropyGradients(size_t ntests)
}
return maximumError;
}

//______________________________________________________________________________
//
// KL Divergence
//______________________________________________________________________________

template <typename Architecture>
auto testKLDivergence(size_t ntests)
-> typename Architecture::Scalar_t
{
using Matrix_t = typename Architecture::Matrix_t;
using Scalar_t = typename Architecture::Scalar_t;
Double_t maximumError = 0.0;

for (size_t i = 0; i < ntests; i++) {
size_t m = rand() % 100 + 1;
size_t n = rand() % 100 + 1;

TMatrixT<Double_t> W(m, 1);
TMatrixT<Double_t> X(m, n);
TMatrixT<Double_t> Y(m, n);
TMatrixT<Double_t> Z(m, n);

W = 1.0;
randomMatrix(X);
randomMatrix(Y);

Matrix_t WArch(W);
Matrix_t XArch(X);
Matrix_t YArch(Y);

Scalar_t ce = evaluate<Architecture>(ELossFunction::kKLDivergence, YArch, XArch, WArch);

Scalar_t ceReference = 0.0;
for (size_t j = 0; j < m; j++) {
for (size_t k = 0; k < n; k++) {
ceReference += 1 + XArch(j, k) - pow(YArch(j, k), 2) - pow(exp(XArch(j, k)), 2);
}
}
ceReference /= (Scalar_t) m;

Double_t error;
if (ceReference != 0.0)
error = std::fabs((ce - ceReference) / ceReference);
else
error = std::fabs(ce - ceReference);
maximumError = std::max(error, maximumError);
}
return maximumError;
}

//______________________________________________________________________________
template <typename Architecture>
auto testKLDivergenceGradients(size_t ntests)
-> typename Architecture::Scalar_t
{
using Matrix_t = typename Architecture::Matrix_t;
using Scalar_t = typename Architecture::Scalar_t;
Double_t maximumError = 0.0;

for (size_t i = 0; i < ntests; i++) {
size_t m = 8; //rand() % 100 + 1;
size_t n = 8; //rand() % 100 + 1;

TMatrixT<Double_t> W(m, 1);
TMatrixT<Double_t> X(m, n);
TMatrixT<Double_t> Y(m, n);
TMatrixT<Double_t> MeanRef(m, n);
TMatrixT<Double_t> SDRef(m, n);

randomMatrix(W);
randomMatrix(X);
randomMatrix(Y);

Matrix_t WArch(W);
Matrix_t XArch(X);
Matrix_t YArch(Y);
Matrix_t MeanArch(Y);
Matrix_t SDArch(Y);

evaluateGradients<Architecture>(MeanArch, SDArch, ELossFunction::kKLDivergence, YArch, XArch, WArch);

Double_t norm = 1.0 / m;

for (size_t j = 0; j < m; j++) {
for (size_t k = 0; k < n; k++) {
MeanRef(j, k) = -2.0 * Y(j, k) * W(j, 0) * norm;
SDRef(j, k) = (1.0 - (2.0 * exp(2.0 * X(j ,k)))) * norm;
SDRef(j, k) *= W(j, 0);
}
}

TMatrixT<Double_t> dMean(MeanArch);
TMatrixT<Double_t> dSD(SDArch);
Double_t mean_error = maximumRelativeError(dMean, MeanRef);
Double_t sd_error = maximumRelativeError(dSD, SDRef);
Double_t error = (mean_error + sd_error) / 2;
maximumError = std::max(error, maximumError);
}
return maximumError;
}