Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regression support for MethodAE and MethodDL #13

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tmva/tmva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ set(headers1 Configurable.h Factory.h MethodBase.h MethodCompositeBase.h
MethodKNN.h MethodCFMlpANN.h MethodCFMlpANN_Utils.h MethodLikelihood.h
MethodHMatrix.h MethodPDERS.h MethodBDT.h MethodDT.h MethodSVM.h MethodBayesClassifier.h
MethodFDA.h MethodMLP.h MethodBoost.h
MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h
MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h MethodAE.h
MethodCrossValidation.h)
set(headers2 TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h BinarySearchTree.h
Timer.h RootFinder.h CrossEntropy.h DecisionTree.h DecisionTreeNode.h MisClassificationError.h
Expand Down
1 change: 1 addition & 0 deletions tmva/tmva/inc/LinkDef1.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,6 @@
#pragma link C++ class TMVA::MethodDNN+;
#pragma link C++ class TMVA::MethodCrossValidation+;
#pragma link C++ class TMVA::MethodDL+;
#pragma link C++ class TMVA::MethodAE+;

#endif
9 changes: 9 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ class TCpu
const TCpuMatrix<Scalar_t> &weights);
static void SoftmaxCrossEntropyGradients(TCpuMatrix<Scalar_t> &dY, const TCpuMatrix<Scalar_t> &Y,
const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. The StandardDeviation is assumed to
* be the log of standard deviation and the computation is done accordingly. */
static Scalar_t KLDivergence(const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation,
const TCpuMatrix<Scalar_t> &weights);
static void KLDivergenceGradients(TCpuMatrix<Scalar_t> &dMean, TCpuMatrix<Scalar_t> &dStandardDeviation,
const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation,
const TCpuMatrix<Scalar_t> &weights);
///@}

//____________________________________________________________________________
Expand Down
8 changes: 8 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ class TCuda
const TCudaMatrix<AFloat> &weights);
static void SoftmaxCrossEntropyGradients(TCudaMatrix<AFloat> &dY, const TCudaMatrix<AFloat> &Y,
const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. */
static AFloat KLDivergence(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
const TCudaMatrix<AFloat> &weights);
static void KLDivergenceGradients(TCudaMatrix<AFloat> &dMean, TCudaMatrix<AFloat> &dStandardDeviation,
const TCudaMatrix<AFloat> &Mean, const TCudaMatrix<AFloat> &StandardDeviation,
const TCudaMatrix<AFloat> &weights);
///@}

//____________________________________________________________________________
Expand Down
9 changes: 9 additions & 0 deletions tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,15 @@ class TReference
const TMatrixT<AReal> &weights);
static void SoftmaxCrossEntropyGradients(TMatrixT<AReal> &dY, const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);

/** KL Divergence between the distributions corresponding to mean and standard deviation.
* This is applied at the end of Encoder network. The StandardDeviation is assumed to
* be the log of standard deviation and the computation is done accordingly. */
static AReal KLDivergence(const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights);
static void KLDivergenceGradients(TMatrixT<AReal> &dMean, TMatrixT<AReal> &dStandardDeviation,
const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
const TMatrixT<AReal> &weights);
///@}

//____________________________________________________________________________
Expand Down
22 changes: 20 additions & 2 deletions tmva/tmva/inc/TMVA/DNN/Functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ enum class ELossFunction
{
kCrossEntropy = 'C',
kMeanSquaredError = 'R',
kSoftmaxCrossEntropy = 'S'
kSoftmaxCrossEntropy = 'S',
kKLDivergence = 'D'
};

/*! Enum representing the regularization type applied for a given layer */
Expand Down Expand Up @@ -171,6 +172,7 @@ inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y
case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
case ELossFunction::kKLDivergence: return Architecture_t::KLDivergence(Y,output,weights);
}
return 0.0;
}
Expand All @@ -190,10 +192,26 @@ inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFuncti
case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
case ELossFunction::kSoftmaxCrossEntropy :
Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
break;
break;
}
}

/*! Compute the gradients with respect to the metrics used for
* similarity measure. The gradients are returned in two separate Matrices. */
//______________________________________________________________________________
template <typename Architecture_t>
inline void evaluateGradients(typename Architecture_t::Matrix_t &dMetricOne, typename Architecture_t::Matrix_t &dMetricTwo,
ELossFunction f, const typename Architecture_t::Matrix_t &MetricOne,
const typename Architecture_t::Matrix_t &MetricTwo,
const typename Architecture_t::Matrix_t &weights)
{
switch(f)
{
case ELossFunction::kKLDivergence: Architecture_t::KLDivergenceGradients(dMetricOne, dMetricTwo, MetricOne, MetricTwo, weights);
break;
}
}


//______________________________________________________________________________
//
Expand Down
258 changes: 258 additions & 0 deletions tmva/tmva/inc/TMVA/MethodAE.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
// @(#)root/tmva/tmva/dnn:$Id$
// Author: Vladimir Ilievski, Saurav Shekhar, Siddhartha Rao Kamalakara

/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
* Package: TMVA *
* Class : MethodAE *
* Web : http://tmva.sourceforge.net *
* *
* Description: *
* Deep Neural Network Method *
* *
* Authors (alphabetical): *
* Vladimir Ilievski <[email protected]> - CERN, Switzerland *
* Saurav Shekhar <[email protected]> - ETH Zurich, Switzerland *
* Siddhartha Rao Kamalakara <[email protected]> - CERN, Switzerland *
* *
* Copyright (c) 2005-2015: *
* CERN, Switzerland *
* U. of Victoria, Canada *
* MPI-K Heidelberg, Germany *
* U. of Bonn, Germany *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://tmva.sourceforge.net/LICENSE) *
**********************************************************************************/

#ifndef ROOT_TMVA_MethodAE
#define ROOT_TMVA_MethodAE

//////////////////////////////////////////////////////////////////////////
// //
// MethodAE //
// //
// Method class for creating Auto Encoders //
// //
//////////////////////////////////////////////////////////////////////////

#include "TString.h"

#include "TMVA/MethodBase.h"
#include "TMVA/Types.h"

#include "TMVA/DNN/Architectures/Reference.h"

#ifdef R__HAS_TMVACPU
#include "TMVA/DNN/Architectures/Cpu.h"
#endif

#ifdef R__HAS_TMVACUDA
#include "TMVA/DNN/Architectures/Cuda.h"
#endif

#include "TMVA/DNN/Functions.h"
#include "TMVA/DNN/DeepNet.h"

#include <vector>

namespace TMVA {

/*! All of the options that can be specified in the training string */
struct TTrainingAESettings {
size_t batchSize;
size_t testInterval;
size_t convergenceSteps;
size_t maxEpochs;
DNN::ERegularization regularization;
Double_t learningRate;
Double_t momentum;
Double_t weightDecay;
std::vector<Double_t> dropoutProbabilities;
bool multithreading;
};

class MethodAE : public MethodBase {

private:
// Key-Value vector type, contining the values for the training options
using KeyValueVector_t = std::vector<std::map<TString, TString>>;
#ifdef R__HAS_TMVACPU
using ArchitectureImpl_t = TMVA::DNN::TCpu<Double_t>;
#else
using ArchitectureImpl_t = TMVA::DNN::TReference<Double_t>;
#endif
using DeepNetImpl_t = TMVA::DNN::TDeepNet<ArchitectureImpl_t>;
using Matrix_t = ArchitectureImpl_t::Matrix_t;
std::unique_ptr<DeepNetImpl_t> fNet;

/*! The option handling methods */
void DeclareOptions();
void ProcessOptions();

void Init();

// Function to parse the layout of the input
void ParseInputLayout();
void ParseBatchLayout();

/*! After calling the ProcesOptions(), all of the options are parsed,
* so using the parsed options, and given the architecture and the
* type of the layers, we build the Deep Network passed as
* a reference in the function. */
template <typename Architecture_t, typename Layer_t>
void CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);

template <typename Architecture_t, typename Layer_t>
void CreateEncoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);

template <typename Architecture_t, typename Layer_t>
void CreateDecoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);

template <typename Architecture_t, typename Layer_t>
void ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

template <typename Architecture_t, typename Layer_t>
void ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);

size_t fInputDepth; ///< The depth of the input.
size_t fInputHeight; ///< The height of the input.
size_t fInputWidth; ///< The width of the input.

size_t fBatchDepth; ///< The depth of the batch used to train the deep net.
size_t fBatchHeight; ///< The height of the batch used to train the deep net.
size_t fBatchWidth; ///< The width of the batch used to train the deep net.

DNN::EInitialization fWeightInitialization; ///< The initialization method
DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions
DNN::ELossFunction fLossFunction; ///< The loss function

TString fInputLayoutString; ///< The string defining the layout of the input
TString fBatchLayoutString; ///< The string defining the layout of the batch
TString fLayoutString; ///< The string defining the layout of the deep net
TString fErrorStrategy; ///< The string defining the error strategy for training
TString fTrainingStrategyString; ///< The string defining the training strategy
TString fWeightInitializationString; ///< The string defining the weight initialization method
TString fArchitectureString; ///< The string defining the architecure: CPU or GPU
bool fResume;

KeyValueVector_t fSettings; ///< Map for the training strategy
std::vector<TTrainingAESettings> fTrainingSettings; ///< The vector defining each training strategy

ClassDef(MethodAE, 0);

protected:
// provide a help message
void GetHelpMessage() const;

public:
/*! Constructor */
MethodAE(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);

/*! Constructor */
MethodAE(DataSetInfo &theData, const TString &theWeightFile);

/*! Virtual Destructor */
virtual ~MethodAE();

/*! Function for parsing the training settings, provided as a string
* in a key-value form. */
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);

/*! Check the type of analysis the deep learning network can do */
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);

/*! Methods for training the deep learning network */
void Train();

Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
virtual const std::vector<Float_t>& GetRegressionValues();
virtual const std::vector<Float_t>& GetMulticlassValues();
/*! Methods for writing and reading weights */
using MethodBase::ReadWeightsFromStream;
void AddWeightsXMLTo(void *parent) const;
void ReadWeightsFromXML(void *wghtnode);
void ReadWeightsFromStream(std::istream &);

/* Create ranking */
const Ranking *CreateRanking();

/* Getters */
size_t GetInputDepth() const { return fInputDepth; }
size_t GetInputHeight() const { return fInputHeight; }
size_t GetInputWidth() const { return fInputWidth; }

size_t GetBatchDepth() const { return fBatchDepth; }
size_t GetBatchHeight() const { return fBatchHeight; }
size_t GetBatchWidth() const { return fBatchWidth; }

const DeepNetImpl_t & GetDeepNet() const { return *fNet; }

DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; }
DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; }
DNN::ELossFunction GetLossFunction() const { return fLossFunction; }

TString GetInputLayoutString() const { return fInputLayoutString; }
TString GetBatchLayoutString() const { return fBatchLayoutString; }
TString GetLayoutString() const { return fLayoutString; }
TString GetErrorStrategyString() const { return fErrorStrategy; }
TString GetTrainingStrategyString() const { return fTrainingStrategyString; }
TString GetWeightInitializationString() const { return fWeightInitializationString; }
TString GetArchitectureString() const { return fArchitectureString; }

const std::vector<TTrainingAESettings> &GetTrainingSettings() const { return fTrainingSettings; }
std::vector<TTrainingAESettings> &GetTrainingSettings() { return fTrainingSettings; }
const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; }
KeyValueVector_t &GetKeyValueSettings() { return fSettings; }

/** Setters */
void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }

void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }

void SetWeightInitialization(DNN::EInitialization weightInitialization)
{
fWeightInitialization = weightInitialization;
}
void SetOutputFunction(DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
void SetErrorStrategyString(TString errorStrategy) { fErrorStrategy = errorStrategy; }
void SetTrainingStrategyString(TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
void SetWeightInitializationString(TString weightInitializationString)
{
fWeightInitializationString = weightInitializationString;
}
void SetArchitectureString(TString architectureString) { fArchitectureString = architectureString; }
void SetLayoutString(TString layoutString) { fLayoutString = layoutString; }
};

} // namespace TMVA

#endif
3 changes: 3 additions & 0 deletions tmva/tmva/inc/TMVA/MethodDL.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class MethodDL : public MethodBase {
using ArchitectureImpl_t = TMVA::DNN::TReference<Double_t>;
#endif
using DeepNetImpl_t = TMVA::DNN::TDeepNet<ArchitectureImpl_t>;
using Matrix_t = ArchitectureImpl_t::Matrix_t;
std::unique_ptr<DeepNetImpl_t> fNet;

/*! The option handling methods */
Expand Down Expand Up @@ -180,6 +181,8 @@ class MethodDL : public MethodBase {
void Train();

Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
virtual const std::vector<Float_t>& GetRegressionValues();
virtual const std::vector<Float_t>& GetMulticlassValues();

/*! Methods for writing and reading weights */
using MethodBase::ReadWeightsFromStream;
Expand Down
1 change: 1 addition & 0 deletions tmva/tmva/inc/TMVA/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ namespace TMVA {
kCategory ,
kDNN ,
kDL ,
kAE ,
kPyRandomForest ,
kPyAdaBoost ,
kPyGTB ,
Expand Down
Loading