diff --git a/tmva/tmva/CMakeLists.txt b/tmva/tmva/CMakeLists.txt index ab0812cfefd1d..9e448557ad45a 100644 --- a/tmva/tmva/CMakeLists.txt +++ b/tmva/tmva/CMakeLists.txt @@ -16,7 +16,7 @@ set(headers1 Configurable.h Factory.h MethodBase.h MethodCompositeBase.h MethodKNN.h MethodCFMlpANN.h MethodCFMlpANN_Utils.h MethodLikelihood.h MethodHMatrix.h MethodPDERS.h MethodBDT.h MethodDT.h MethodSVM.h MethodBayesClassifier.h MethodFDA.h MethodMLP.h MethodBoost.h - MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h + MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h MethodAE.h MethodCrossValidation.h) set(headers2 TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h BinarySearchTree.h Timer.h RootFinder.h CrossEntropy.h DecisionTree.h DecisionTreeNode.h MisClassificationError.h diff --git a/tmva/tmva/inc/LinkDef1.h b/tmva/tmva/inc/LinkDef1.h index 47ea5c0aae88e..82277139b5445 100644 --- a/tmva/tmva/inc/LinkDef1.h +++ b/tmva/tmva/inc/LinkDef1.h @@ -67,5 +67,6 @@ #pragma link C++ class TMVA::MethodDNN+; #pragma link C++ class TMVA::MethodCrossValidation+; #pragma link C++ class TMVA::MethodDL+; +#pragma link C++ class TMVA::MethodAE+; #endif diff --git a/tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h b/tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h index 116946b1ba24b..e2bfbf58eed89 100644 --- a/tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h +++ b/tmva/tmva/inc/TMVA/DNN/Architectures/Cpu.h @@ -191,6 +191,15 @@ class TCpu const TCpuMatrix &weights); static void SoftmaxCrossEntropyGradients(TCpuMatrix &dY, const TCpuMatrix &Y, const TCpuMatrix &output, const TCpuMatrix &weights); + + /** KL Divergence between the distributions corresponding to mean and standard deviation. + * This is applied at the end of Encoder network. The StandardDeviation is assumed to + * be the log of standard deviation and the computation is done accordingly. */ + static Scalar_t KLDivergence(const TCpuMatrix &Mean, const TCpuMatrix &StandardDeviation, + const TCpuMatrix &weights); + static void KLDivergenceGradients(TCpuMatrix &dMean, TCpuMatrix &dStandardDeviation, + const TCpuMatrix &Mean, const TCpuMatrix &StandardDeviation, + const TCpuMatrix &weights); ///@} //____________________________________________________________________________ diff --git a/tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h b/tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h index 5b7bc22f4ce76..4c0b595a1aecf 100644 --- a/tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h +++ b/tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h @@ -193,6 +193,14 @@ class TCuda const TCudaMatrix &weights); static void SoftmaxCrossEntropyGradients(TCudaMatrix &dY, const TCudaMatrix &Y, const TCudaMatrix &output, const TCudaMatrix &weights); + + /** KL Divergence between the distributions corresponding to mean and standard deviation. + * This is applied at the end of Encoder network. */ + static AFloat KLDivergence(const TCudaMatrix &Y, const TCudaMatrix &output, + const TCudaMatrix &weights); + static void KLDivergenceGradients(TCudaMatrix &dMean, TCudaMatrix &dStandardDeviation, + const TCudaMatrix &Mean, const TCudaMatrix &StandardDeviation, + const TCudaMatrix &weights); ///@} //____________________________________________________________________________ diff --git a/tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h b/tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h index 55fb860642b61..0acdc39d52a9e 100644 --- a/tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h +++ b/tmva/tmva/inc/TMVA/DNN/Architectures/Reference.h @@ -190,6 +190,15 @@ class TReference const TMatrixT &weights); static void SoftmaxCrossEntropyGradients(TMatrixT &dY, const TMatrixT &Y, const TMatrixT &output, const TMatrixT &weights); + + /** KL Divergence between the distributions corresponding to mean and standard deviation. + * This is applied at the end of Encoder network. The StandardDeviation is assumed to + * be the log of standard deviation and the computation is done accordingly. */ + static AReal KLDivergence(const TMatrixT &Mean, const TMatrixT &StandardDeviation, + const TMatrixT &weights); + static void KLDivergenceGradients(TMatrixT &dMean, TMatrixT &dStandardDeviation, + const TMatrixT &Mean, const TMatrixT &StandardDeviation, + const TMatrixT &weights); ///@} //____________________________________________________________________________ diff --git a/tmva/tmva/inc/TMVA/DNN/Functions.h b/tmva/tmva/inc/TMVA/DNN/Functions.h index 624899afe5e69..38112f6a9e348 100644 --- a/tmva/tmva/inc/TMVA/DNN/Functions.h +++ b/tmva/tmva/inc/TMVA/DNN/Functions.h @@ -55,7 +55,8 @@ enum class ELossFunction { kCrossEntropy = 'C', kMeanSquaredError = 'R', - kSoftmaxCrossEntropy = 'S' + kSoftmaxCrossEntropy = 'S', + kKLDivergence = 'D' }; /*! Enum representing the regularization type applied for a given layer */ @@ -171,6 +172,7 @@ inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights); case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights); case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights); + case ELossFunction::kKLDivergence: return Architecture_t::KLDivergence(Y,output,weights); } return 0.0; } @@ -190,10 +192,26 @@ inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFuncti case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break; case ELossFunction::kSoftmaxCrossEntropy : Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights); - break; + break; } } +/*! Compute the gradients with respect to the metrics used for +* similarity measure. The gradients are returned in two separate Matrices. */ +//______________________________________________________________________________ +template +inline void evaluateGradients(typename Architecture_t::Matrix_t &dMetricOne, typename Architecture_t::Matrix_t &dMetricTwo, + ELossFunction f, const typename Architecture_t::Matrix_t &MetricOne, + const typename Architecture_t::Matrix_t &MetricTwo, + const typename Architecture_t::Matrix_t &weights) +{ + switch(f) + { + case ELossFunction::kKLDivergence: Architecture_t::KLDivergenceGradients(dMetricOne, dMetricTwo, MetricOne, MetricTwo, weights); + break; + } +} + //______________________________________________________________________________ // diff --git a/tmva/tmva/inc/TMVA/MethodAE.h b/tmva/tmva/inc/TMVA/MethodAE.h new file mode 100644 index 0000000000000..da98588935a26 --- /dev/null +++ b/tmva/tmva/inc/TMVA/MethodAE.h @@ -0,0 +1,256 @@ +// @(#)root/tmva/tmva/dnn:$Id$ +// Author: Vladimir Ilievski, Saurav Shekhar, Siddhartha Rao Kamalakara + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : MethodAE * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Deep Neural Network Method * + * * + * Authors (alphabetical): * + * Vladimir Ilievski - CERN, Switzerland * + * Saurav Shekhar - ETH Zurich, Switzerland * + * Siddhartha Rao Kamalakara - CERN, Switzerland * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_MethodAE +#define ROOT_TMVA_MethodAE + +////////////////////////////////////////////////////////////////////////// +// // +// MethodAE // +// // +// Method class for creating Auto Encoders // +// // +////////////////////////////////////////////////////////////////////////// + +#include "TString.h" + +#include "TMVA/MethodBase.h" +#include "TMVA/Types.h" + +#include "TMVA/DNN/Architectures/Reference.h" + +#ifdef R__HAS_TMVACPU +#include "TMVA/DNN/Architectures/Cpu.h" +#endif + +#ifdef R__HAS_TMVACUDA +#include "TMVA/DNN/Architectures/Cuda.h" +#endif + +#include "TMVA/DNN/Functions.h" +#include "TMVA/DNN/DeepNet.h" + +#include + +namespace TMVA { + +/*! All of the options that can be specified in the training string */ +struct TTrainingAESettings { + size_t batchSize; + size_t testInterval; + size_t convergenceSteps; + size_t maxEpochs; + DNN::ERegularization regularization; + Double_t learningRate; + Double_t momentum; + Double_t weightDecay; + std::vector dropoutProbabilities; + bool multithreading; +}; + +class MethodAE : public MethodBase { + +private: + // Key-Value vector type, contining the values for the training options + using KeyValueVector_t = std::vector>; +#ifdef R__HAS_TMVACPU + using ArchitectureImpl_t = TMVA::DNN::TCpu; +#else + using ArchitectureImpl_t = TMVA::DNN::TReference; +#endif + using DeepNetImpl_t = TMVA::DNN::TDeepNet; + std::unique_ptr fNet; + + /*! The option handling methods */ + void DeclareOptions(); + void ProcessOptions(); + + void Init(); + + // Function to parse the layout of the input + void ParseInputLayout(); + void ParseBatchLayout(); + + /*! After calling the ProcesOptions(), all of the options are parsed, + * so using the parsed options, and given the architecture and the + * type of the layers, we build the Deep Network passed as + * a reference in the function. */ + template + void CreateDeepNet(DNN::TDeepNet &deepNet, + std::vector> &nets); + + template + void CreateEncoder(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layoutString); + + template + void CreateDecoder(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layoutString); + + template + void ParseDenseLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim); + + template + void ParseConvLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim); + + template + void ParseMaxPoolLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, + TString delim); + + template + void ParseReshapeLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, + TString delim); + + template + void ParseRnnLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim); + + template + void ParseLstmLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim); + + size_t fInputDepth; ///< The depth of the input. + size_t fInputHeight; ///< The height of the input. + size_t fInputWidth; ///< The width of the input. + + size_t fBatchDepth; ///< The depth of the batch used to train the deep net. + size_t fBatchHeight; ///< The height of the batch used to train the deep net. + size_t fBatchWidth; ///< The width of the batch used to train the deep net. + + DNN::EInitialization fWeightInitialization; ///< The initialization method + DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions + DNN::ELossFunction fLossFunction; ///< The loss function + + TString fInputLayoutString; ///< The string defining the layout of the input + TString fBatchLayoutString; ///< The string defining the layout of the batch + TString fLayoutString; ///< The string defining the layout of the deep net + TString fErrorStrategy; ///< The string defining the error strategy for training + TString fTrainingStrategyString; ///< The string defining the training strategy + TString fWeightInitializationString; ///< The string defining the weight initialization method + TString fArchitectureString; ///< The string defining the architecure: CPU or GPU + bool fResume; + + KeyValueVector_t fSettings; ///< Map for the training strategy + std::vector fTrainingSettings; ///< The vector defining each training strategy + + ClassDef(MethodAE, 0); + +protected: + // provide a help message + void GetHelpMessage() const; + +public: + /*! Constructor */ + MethodAE(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption); + + /*! Constructor */ + MethodAE(DataSetInfo &theData, const TString &theWeightFile); + + /*! Virtual Destructor */ + virtual ~MethodAE(); + + /*! Function for parsing the training settings, provided as a string + * in a key-value form. */ + KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim); + + /*! Check the type of analysis the deep learning network can do */ + Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets); + + /*! Methods for training the deep learning network */ + void Train(); + + Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0); + + /*! Methods for writing and reading weights */ + using MethodBase::ReadWeightsFromStream; + void AddWeightsXMLTo(void *parent) const; + void ReadWeightsFromXML(void *wghtnode); + void ReadWeightsFromStream(std::istream &); + + /* Create ranking */ + const Ranking *CreateRanking(); + + /* Getters */ + size_t GetInputDepth() const { return fInputDepth; } + size_t GetInputHeight() const { return fInputHeight; } + size_t GetInputWidth() const { return fInputWidth; } + + size_t GetBatchDepth() const { return fBatchDepth; } + size_t GetBatchHeight() const { return fBatchHeight; } + size_t GetBatchWidth() const { return fBatchWidth; } + + const DeepNetImpl_t & GetDeepNet() const { return *fNet; } + + DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; } + DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; } + DNN::ELossFunction GetLossFunction() const { return fLossFunction; } + + TString GetInputLayoutString() const { return fInputLayoutString; } + TString GetBatchLayoutString() const { return fBatchLayoutString; } + TString GetLayoutString() const { return fLayoutString; } + TString GetErrorStrategyString() const { return fErrorStrategy; } + TString GetTrainingStrategyString() const { return fTrainingStrategyString; } + TString GetWeightInitializationString() const { return fWeightInitializationString; } + TString GetArchitectureString() const { return fArchitectureString; } + + const std::vector &GetTrainingSettings() const { return fTrainingSettings; } + std::vector &GetTrainingSettings() { return fTrainingSettings; } + const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; } + KeyValueVector_t &GetKeyValueSettings() { return fSettings; } + + /** Setters */ + void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; } + void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; } + void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; } + + void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; } + void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; } + void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; } + + void SetWeightInitialization(DNN::EInitialization weightInitialization) + { + fWeightInitialization = weightInitialization; + } + void SetOutputFunction(DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; } + void SetErrorStrategyString(TString errorStrategy) { fErrorStrategy = errorStrategy; } + void SetTrainingStrategyString(TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; } + void SetWeightInitializationString(TString weightInitializationString) + { + fWeightInitializationString = weightInitializationString; + } + void SetArchitectureString(TString architectureString) { fArchitectureString = architectureString; } + void SetLayoutString(TString layoutString) { fLayoutString = layoutString; } +}; + +} // namespace TMVA + +#endif diff --git a/tmva/tmva/inc/TMVA/Types.h b/tmva/tmva/inc/TMVA/Types.h index 1b3fe6b0fb817..2dc3db0b1e55b 100644 --- a/tmva/tmva/inc/TMVA/Types.h +++ b/tmva/tmva/inc/TMVA/Types.h @@ -99,6 +99,7 @@ namespace TMVA { kCategory , kDNN , kDL , + kAE , kPyRandomForest , kPyAdaBoost , kPyGTB , diff --git a/tmva/tmva/src/DNN/Architectures/Cpu/LossFunctions.cxx b/tmva/tmva/src/DNN/Architectures/Cpu/LossFunctions.cxx index b02aadcd0cdb5..cb8c182f624ca 100644 --- a/tmva/tmva/src/DNN/Architectures/Cpu/LossFunctions.cxx +++ b/tmva/tmva/src/DNN/Architectures/Cpu/LossFunctions.cxx @@ -194,5 +194,58 @@ void TCpu::SoftmaxCrossEntropyGradients(TCpuMatrix &dY, const TC Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows())); } +//______________________________________________________________________________ +template +AFloat TCpu::KLDivergence(const TCpuMatrix &Y, const TCpuMatrix &output, + const TCpuMatrix &weights) +{ + const AFloat *dataY = Y.GetRawDataPointer(); + const AFloat *dataOutput = output.GetRawDataPointer(); + const AFloat *dataWeights = weights.GetRawDataPointer(); + std::vector temp(Y.GetNElements()); + size_t m = Y.GetNrows(); + AFloat norm = 1.0 / ((AFloat) m); + + auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) { + AFloat dy = 1 + dataOutput[workerID] - std::pow(dataY[workerID], 2) - std::pow(std::exp(dataOutput[workerID]), 2); + temp[workerID] = dataWeights[workerID % m] * dy; + return 0; + }; + + auto reduction = [](const std::vector & v ) + { + return std::accumulate(v.begin(),v.end(),AFloat{}); + }; + + Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements())); + return norm * Y.GetThreadExecutor().Reduce(temp, reduction); +} + +//______________________________________________________________________________ +template +void TCpu::KLDivergenceGradients(TCpuMatrix &dY, TCpuMatrix &dSD, const TCpuMatrix &Y, + const TCpuMatrix &output, const TCpuMatrix &weights) +{ + + AFloat *dataDY = dY.GetRawDataPointer(); + AFloat *dataDSD = dSD.GetRawDataPointer(); + const AFloat *dataY = Y.GetRawDataPointer(); + const AFloat *dataOutput = output.GetRawDataPointer(); + const AFloat *dataWeights = weights.GetRawDataPointer(); + + size_t m = Y.GetNrows(); + AFloat norm = 1.0 / ((AFloat) m); + + auto f = [&dataDY, &dataDSD, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) { + dataDY[workerID] = -2.0 * norm * dataY[workerID]; + dataDY[workerID] *= dataWeights[workerID % m]; + dataDSD[workerID] = 1.0 - (2.0 * std::exp(2.0 * dataOutput[workerID])); + dataDSD[workerID] *= norm * dataWeights[workerID % m]; + return 0; + }; + + Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements())); +} + } // namespace DNN } // namespace TMVA diff --git a/tmva/tmva/src/DNN/Architectures/Reference/LossFunctions.cxx b/tmva/tmva/src/DNN/Architectures/Reference/LossFunctions.cxx index 522ae5b4d6585..ff52c501e8bd1 100644 --- a/tmva/tmva/src/DNN/Architectures/Reference/LossFunctions.cxx +++ b/tmva/tmva/src/DNN/Architectures/Reference/LossFunctions.cxx @@ -151,5 +151,47 @@ void TReference::SoftmaxCrossEntropyGradients(TMatrixT &dY, const } } +//______________________________________________________________________________ +template +AReal TReference::KLDivergence(const TMatrixT &Mean, const TMatrixT &StandardDeviation, + const TMatrixT &weights) +{ + size_t m,n; + m = Mean.GetNrows(); + n = Mean.GetNcols(); + + AReal result = 0.0; + + for(size_t i=0; i(m); + + return result; +} + +template +void TReference::KLDivergenceGradients(TMatrixT &dMean, TMatrixT &dStandardDeviation, + const TMatrixT &Mean, const TMatrixT &StandardDeviation, + const TMatrixT &weights) +{ + size_t m,n; + m = Mean.GetNrows(); + n = Mean.GetNcols(); + AReal norm = 1.0 / m ; + + for(size_t i = 0; i < m; i++){ + for(size_t j = 0; j < n; j++){ + dMean(i,j) = -2.0 * norm * Mean(i,j) * weights(i, 0); + dStandardDeviation(i,j) = 1.0 - (2.0 * std::exp(2.0 * StandardDeviation(i,j))); + dStandardDeviation(i,j) *= norm * weights(i, 0); + } + } +} + } // namespace DNN } // namespace TMVA diff --git a/tmva/tmva/src/MethodAE.cxx b/tmva/tmva/src/MethodAE.cxx new file mode 100644 index 0000000000000..36cce24f3d3d6 --- /dev/null +++ b/tmva/tmva/src/MethodAE.cxx @@ -0,0 +1,1616 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Vladimir Ilievski, Saurav Shekhar, Siddhartha Rao Kamalakara + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : MethodAE * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Deep Neural Network Method * + * * + * Authors (alphabetical): * + * Vladimir Ilievski - CERN, Switzerland * + * Saurav Shekhar - ETH Zurich, Switzerland * + * Siddhartha Rao Kamalakara - CERN, Switzerland * * + * * + Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include "TFormula.h" +#include "TString.h" +#include "TMath.h" + +#include "TMVA/Tools.h" +#include "TMVA/Configurable.h" +#include "TMVA/IMethod.h" +#include "TMVA/ClassifierFactory.h" +#include "TMVA/MethodAE.h" +#include "TMVA/Types.h" +#include "TMVA/DNN/TensorDataLoader.h" +#include "TMVA/DNN/Functions.h" +#include "TMVA/DNN/DLMinimizers.h" +#include "TStopwatch.h" + +#include + +REGISTER_METHOD(AE) +ClassImp(TMVA::MethodAE); + +using namespace TMVA::DNN::CNN; +using namespace TMVA::DNN; + +using TMVA::DNN::EActivationFunction; +using TMVA::DNN::ELossFunction; +using TMVA::DNN::EInitialization; +using TMVA::DNN::EOutputFunction; + +namespace TMVA { + +//////////////////////////////////////////////////////////////////////////////// +TString fetchValueAE(const std::map &keyValueMap, TString key) +{ + key.ToUpper(); + std::map::const_iterator it = keyValueMap.find(key); + if (it == keyValueMap.end()) { + return TString(""); + } + return it->second; +} + +//////////////////////////////////////////////////////////////////////////////// +template +T fetchValueAE(const std::map &keyValueMap, TString key, T defaultValue); + +//////////////////////////////////////////////////////////////////////////////// +template <> +int fetchValueAE(const std::map &keyValueMap, TString key, int defaultValue) +{ + TString value(fetchValueAE(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value.Atoi(); +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +double fetchValueAE(const std::map &keyValueMap, TString key, double defaultValue) +{ + TString value(fetchValueAE(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value.Atof(); +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +TString fetchValueAE(const std::map &keyValueMap, TString key, TString defaultValue) +{ + TString value(fetchValueAE(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value; +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +bool fetchValueAE(const std::map &keyValueMap, TString key, bool defaultValue) +{ + TString value(fetchValueAE(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + + value.ToUpper(); + if (value == "TRUE" || value == "T" || value == "1") { + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +std::vector fetchValueAE(const std::map &keyValueMap, TString key, + std::vector defaultValue) +{ + TString parseString(fetchValueAE(keyValueMap, key)); + if (parseString == "") { + return defaultValue; + } + + parseString.ToUpper(); + std::vector values; + + const TString tokenDelim("+"); + TObjArray *tokenStrings = parseString.Tokenize(tokenDelim); + TIter nextToken(tokenStrings); + TObjString *tokenString = (TObjString *)nextToken(); + for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) { + std::stringstream sstr; + double currentValue; + sstr << tokenString->GetString().Data(); + sstr >> currentValue; + values.push_back(currentValue); + } + return values; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::DeclareOptions() +{ + // Set default values for all option strings + + DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input"); + + DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch"); + + DeclareOptionRef(fLayoutString = "Encoder={DENSE|(N+100)*2|SOFTSIGN}Decoder={DENSE|0|LINEAR}", "Layout", "Layout of the network."); + + DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)" + " or cross entropy (binary classification)."); + AddPreDefVal(TString("CROSSENTROPY")); + AddPreDefVal(TString("SUMOFSQUARES")); + AddPreDefVal(TString("MUTUALEXCLUSIVE")); + + DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy"); + AddPreDefVal(TString("XAVIER")); + AddPreDefVal(TString("XAVIERUNIFORM")); + + DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on."); + AddPreDefVal(TString("STANDARD")); + AddPreDefVal(TString("CPU")); + AddPreDefVal(TString("GPU")); + AddPreDefVal(TString("OPENCL")); + + DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1," + "Momentum=0.3," + "Repetitions=3," + "ConvergenceSteps=50," + "BatchSize=30," + "TestRepetitions=7," + "WeightDecay=0.0," + "Renormalize=L2," + "DropConfig=0.0," + "DropRepetitions=5|LearningRate=1e-4," + "Momentum=0.3," + "Repetitions=3," + "ConvergenceSteps=50," + "MaxEpochs=2000," + "BatchSize=20," + "TestRepetitions=7," + "WeightDecay=0.001," + "Renormalize=L2," + "DropConfig=0.0+0.5+0.5," + "DropRepetitions=5," + "Multithreading=True", + "TrainingStrategy", "Defines the training strategies."); +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::ProcessOptions() +{ + if (IgnoreEventsWithNegWeightsInTraining()) { + Log() << kINFO << "Will ignore negative events in training!" << Endl; + } + + if (fArchitectureString == "STANDARD") { + Log() << kERROR << "The STANDARD architecture has been deprecated. " + "Please use Architecture=CPU or Architecture=CPU." + "See the TMVA Users' Guide for instructions if you " + "encounter problems." + << Endl; + Log() << kFATAL << "The STANDARD architecture has been deprecated. " + "Please use Architecture=CPU or Architecture=CPU." + "See the TMVA Users' Guide for instructions if you " + "encounter problems." + << Endl; + } + + if (fArchitectureString == "OPENCL") { + Log() << kERROR << "The OPENCL architecture has not been implemented yet. " + "Please use Architecture=CPU or Architecture=CPU for the " + "time being. See the TMVA Users' Guide for instructions " + "if you encounter problems." + << Endl; + Log() << kFATAL << "The OPENCL architecture has not been implemented yet. " + "Please use Architecture=CPU or Architecture=CPU for the " + "time being. See the TMVA Users' Guide for instructions " + "if you encounter problems." + << Endl; + } + + if (fArchitectureString == "GPU") { +#ifndef R__HAS_TMVACUDA // Included only if DNNCUDA flag is _not_ set. + Log() << kERROR << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; + Log() << kFATAL << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; +#endif // DNNCUDA + } + + if (fArchitectureString == "CPU") { +#ifndef R__HAS_TMVACPU // Included only if DNNCPU flag is _not_ set. + Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; + Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; +#endif // DNNCPU + } + + // Input Layout + ParseInputLayout(); + ParseBatchLayout(); + + // Loss function and output. + fOutputFunction = EOutputFunction::kSigmoid; + if (fAnalysisType == Types::kClassification) { + if (fErrorStrategy == "SUMOFSQUARES") { + fLossFunction = ELossFunction::kMeanSquaredError; + } + if (fErrorStrategy == "CROSSENTROPY") { + fLossFunction = ELossFunction::kCrossEntropy; + } + fOutputFunction = EOutputFunction::kSigmoid; + } else if (fAnalysisType == Types::kRegression) { + if (fErrorStrategy != "SUMOFSQUARES") { + Log() << kWARNING << "For regression only SUMOFSQUARES is a valid " + << " neural net error function. Setting error function to " + << " SUMOFSQUARES now." << Endl; + } + + fLossFunction = ELossFunction::kMeanSquaredError; + fOutputFunction = EOutputFunction::kIdentity; + } else if (fAnalysisType == Types::kMulticlass) { + if (fErrorStrategy == "SUMOFSQUARES") { + fLossFunction = ELossFunction::kMeanSquaredError; + } + if (fErrorStrategy == "CROSSENTROPY") { + fLossFunction = ELossFunction::kCrossEntropy; + } + if (fErrorStrategy == "MUTUALEXCLUSIVE") { + fLossFunction = ELossFunction::kSoftmaxCrossEntropy; + } + fOutputFunction = EOutputFunction::kSoftmax; + } + + // Initialization + if (fWeightInitializationString == "XAVIER") { + fWeightInitialization = DNN::EInitialization::kGauss; + } else if (fWeightInitializationString == "XAVIERUNIFORM") { + fWeightInitialization = DNN::EInitialization::kUniform; + } else { + fWeightInitialization = DNN::EInitialization::kGauss; + } + + // Training settings. + + KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(",")); + for (auto &block : strategyKeyValues) { + TTrainingAESettings settings; + + settings.convergenceSteps = fetchValueAE(block, "ConvergenceSteps", 100); + settings.batchSize = fetchValueAE(block, "BatchSize", 30); + settings.maxEpochs = fetchValueAE(block, "MaxEpochs", 2000); + settings.testInterval = fetchValueAE(block, "TestRepetitions", 7); + settings.weightDecay = fetchValueAE(block, "WeightDecay", 0.0); + settings.learningRate = fetchValueAE(block, "LearningRate", 1e-5); + settings.momentum = fetchValueAE(block, "Momentum", 0.3); + settings.dropoutProbabilities = fetchValueAE(block, "DropConfig", std::vector()); + + TString regularization = fetchValueAE(block, "Regularization", TString("NONE")); + if (regularization == "L1") { + settings.regularization = DNN::ERegularization::kL1; + } else if (regularization == "L2") { + settings.regularization = DNN::ERegularization::kL2; + } + + TString strMultithreading = fetchValueAE(block, "Multithreading", TString("True")); + + if (strMultithreading.BeginsWith("T")) { + settings.multithreading = true; + } else { + settings.multithreading = false; + } + + fTrainingSettings.push_back(settings); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// default initializations +void MethodAE::Init() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse the input layout +void MethodAE::ParseInputLayout() +{ + // Define the delimiter + const TString delim("|"); + + // Get the input layout string + TString inputLayoutString = this->GetInputLayoutString(); + + size_t depth = 0; + size_t height = 0; + size_t width = 0; + + // Split the input layout string + TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim); + TIter nextInputDim(inputDimStrings); + TObjString *inputDimString = (TObjString *)nextInputDim(); + int idxToken = 0; + + for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) { + switch (idxToken) { + case 0: // input depth + { + TString strDepth(inputDimString->GetString()); + depth = (size_t)strDepth.Atoi(); + } break; + case 1: // input height + { + TString strHeight(inputDimString->GetString()); + height = (size_t)strHeight.Atoi(); + } break; + case 2: // input width + { + TString strWidth(inputDimString->GetString()); + width = (size_t)strWidth.Atoi(); + } break; + } + ++idxToken; + } + + this->SetInputDepth(depth); + this->SetInputHeight(height); + this->SetInputWidth(width); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse the input layout +void MethodAE::ParseBatchLayout() +{ + // Define the delimiter + const TString delim("|"); + + // Get the input layout string + TString batchLayoutString = this->GetBatchLayoutString(); + + size_t batchDepth = 0; + size_t batchHeight = 0; + size_t batchWidth = 0; + + // Split the input layout string + TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim); + TIter nextBatchDim(batchDimStrings); + TObjString *batchDimString = (TObjString *)nextBatchDim(); + int idxToken = 0; + + for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) { + switch (idxToken) { + case 0: // input depth + { + TString strDepth(batchDimString->GetString()); + batchDepth = (size_t)strDepth.Atoi(); + } break; + case 1: // input height + { + TString strHeight(batchDimString->GetString()); + batchHeight = (size_t)strHeight.Atoi(); + } break; + case 2: // input width + { + TString strWidth(batchDimString->GetString()); + batchWidth = (size_t)strWidth.Atoi(); + } break; + } + ++idxToken; + } + + this->SetBatchDepth(batchDepth); + this->SetBatchHeight(batchHeight); + this->SetBatchWidth(batchWidth); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Create an autoencoder based on the layout string +template +void MethodAE::CreateDeepNet(DNN::TDeepNet &deepNet, + std::vector> &nets) +{ + + TString layoutString = this->GetLayoutString(); + + size_t offset[4]; + size_t idx = 0; + + if(layoutString.BeginsWith("Encoder=")){ + for(size_t i=0; i +void MethodAE::CreateEncoder(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layoutString) +{ + // Layer specification, layer details + const TString layerDelimiter(","); + const TString subDelimiter("|"); + + // Split layers + TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter); + TIter nextLayer(layerStrings); + TObjString *layerString = (TObjString *)nextLayer(); + + + for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) { + // Split layer details + TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + + // Determine the type of the layer + TString strLayerType = token->GetString(); + + + if (strLayerType == "DENSE") { + ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "CONV") { + ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "MAXPOOL") { + ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "RESHAPE") { + ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "RNN") { + ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "LSTM") { + Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl; + //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +/// Create a Decoder based on the layout string received +/// from +template +void MethodAE::CreateDecoder(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layoutString) +{ + // Layer specification, layer details + const TString layerDelimiter(","); + const TString subDelimiter("|"); + + // Split layers + TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter); + TIter nextLayer(layerStrings); + TObjString *layerString = (TObjString *)nextLayer(); + + + for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) { + // Split layer details + TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + + // Determine the type of the layer + TString strLayerType = token->GetString(); + + + if (strLayerType == "DENSE") { + ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "CONV") { + ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "MAXPOOL") { + ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "RESHAPE") { + ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "RNN") { + ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } else if (strLayerType == "LSTM") { + Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl; + //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } + } +} + + + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate dense layer +template +void MethodAE::ParseDenseLayer(DNN::TDeepNet &deepNet, + std::vector> & /*nets*/, TString layerString, + TString delim) +{ + int width = 0; + EActivationFunction activationFunction = EActivationFunction::kTanh; + + // not sure about this + const size_t inputSize = GetNvar(); + + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + // jump the first token + for (; token != nullptr; token = (TObjString *)nextToken()) { + switch (idxToken) { + case 1: // number of nodes + { + // not sure + TString strNumNodes(token->GetString()); + TString strN("x"); + strNumNodes.ReplaceAll("N", strN); + strNumNodes.ReplaceAll("n", strN); + TFormula fml("tmp", strNumNodes); + width = fml.Eval(inputSize); + } break; + case 2: // actiovation function + { + TString strActFnc(token->GetString()); + if (strActFnc == "RELU") { + activationFunction = DNN::EActivationFunction::kRelu; + } else if (strActFnc == "TANH") { + activationFunction = DNN::EActivationFunction::kTanh; + } else if (strActFnc == "SYMMRELU") { + activationFunction = DNN::EActivationFunction::kSymmRelu; + } else if (strActFnc == "SOFTSIGN") { + activationFunction = DNN::EActivationFunction::kSoftSign; + } else if (strActFnc == "SIGMOID") { + activationFunction = DNN::EActivationFunction::kSigmoid; + } else if (strActFnc == "LINEAR") { + activationFunction = DNN::EActivationFunction::kIdentity; + } else if (strActFnc == "GAUSS") { + activationFunction = DNN::EActivationFunction::kGauss; + } + } break; + } + ++idxToken; + } + + // Add the dense layer, initialize the weights and biases and copy + TDenseLayer *denseLayer = deepNet.AddDenseLayer(width, activationFunction); + denseLayer->Initialize(); + + // add same layer to fNet + fNet->AddDenseLayer(width, activationFunction); + + //TDenseLayer *copyDenseLayer = new TDenseLayer(*denseLayer); + + // add the copy to all slave nets + //for (size_t i = 0; i < nets.size(); i++) { + // nets[i].AddDenseLayer(copyDenseLayer); + //} + + // check compatibility of added layer + // for a dense layer input should be 1 x 1 x DxHxW +} + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate convolutional layer +template +void MethodAE::ParseConvLayer(DNN::TDeepNet &deepNet, + std::vector> & /*nets*/, TString layerString, + TString delim) +{ + int depth = 0; + int fltHeight = 0; + int fltWidth = 0; + int strideRows = 0; + int strideCols = 0; + int zeroPadHeight = 0; + int zeroPadWidth = 0; + EActivationFunction activationFunction = EActivationFunction::kTanh; + + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + for (; token != nullptr; token = (TObjString *)nextToken()) { + switch (idxToken) { + case 1: // depth + { + TString strDepth(token->GetString()); + depth = strDepth.Atoi(); + } break; + case 2: // filter height + { + TString strFltHeight(token->GetString()); + fltHeight = strFltHeight.Atoi(); + } break; + case 3: // filter width + { + TString strFltWidth(token->GetString()); + fltWidth = strFltWidth.Atoi(); + } break; + case 4: // stride in rows + { + TString strStrideRows(token->GetString()); + strideRows = strStrideRows.Atoi(); + } break; + case 5: // stride in cols + { + TString strStrideCols(token->GetString()); + strideCols = strStrideCols.Atoi(); + } break; + case 6: // zero padding height + { + TString strZeroPadHeight(token->GetString()); + zeroPadHeight = strZeroPadHeight.Atoi(); + } break; + case 7: // zero padding width + { + TString strZeroPadWidth(token->GetString()); + zeroPadWidth = strZeroPadWidth.Atoi(); + } break; + case 8: // activation function + { + TString strActFnc(token->GetString()); + if (strActFnc == "RELU") { + activationFunction = DNN::EActivationFunction::kRelu; + } else if (strActFnc == "TANH") { + activationFunction = DNN::EActivationFunction::kTanh; + } else if (strActFnc == "SYMMRELU") { + activationFunction = DNN::EActivationFunction::kSymmRelu; + } else if (strActFnc == "SOFTSIGN") { + activationFunction = DNN::EActivationFunction::kSoftSign; + } else if (strActFnc == "SIGMOID") { + activationFunction = DNN::EActivationFunction::kSigmoid; + } else if (strActFnc == "LINEAR") { + activationFunction = DNN::EActivationFunction::kIdentity; + } else if (strActFnc == "GAUSS") { + activationFunction = DNN::EActivationFunction::kGauss; + } + } break; + } + ++idxToken; + } + + // Add the convolutional layer, initialize the weights and biases and copy + TConvLayer *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + zeroPadHeight, zeroPadWidth, activationFunction); + convLayer->Initialize(); + + // Add same layer to fNet + fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + zeroPadHeight, zeroPadWidth, activationFunction); + + //TConvLayer *copyConvLayer = new TConvLayer(*convLayer); + + //// add the copy to all slave nets + //for (size_t i = 0; i < nets.size(); i++) { + // nets[i].AddConvLayer(copyConvLayer); + //} +} + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate max pool layer +template +void MethodAE::ParseMaxPoolLayer(DNN::TDeepNet &deepNet, + std::vector> & /*nets*/, TString layerString, + TString delim) +{ + + int frameHeight = 0; + int frameWidth = 0; + int strideRows = 0; + int strideCols = 0; + + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + for (; token != nullptr; token = (TObjString *)nextToken()) { + switch (idxToken) { + case 1: // frame height + { + TString strFrmHeight(token->GetString()); + frameHeight = strFrmHeight.Atoi(); + } break; + case 2: // frame width + { + TString strFrmWidth(token->GetString()); + frameWidth = strFrmWidth.Atoi(); + } break; + case 3: // stride in rows + { + TString strStrideRows(token->GetString()); + strideRows = strStrideRows.Atoi(); + } break; + case 4: // stride in cols + { + TString strStrideCols(token->GetString()); + strideCols = strStrideCols.Atoi(); + } break; + } + ++idxToken; + } + + // Add the Max pooling layer + // TMaxPoolLayer *maxPoolLayer = + deepNet.AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + + // Add the same layer to fNet + fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + + //TMaxPoolLayer *copyMaxPoolLayer = new TMaxPoolLayer(*maxPoolLayer); + + //// add the copy to all slave nets + //for (size_t i = 0; i < nets.size(); i++) { + // nets[i].AddMaxPoolLayer(copyMaxPoolLayer); + //} +} + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate reshape layer +template +void MethodAE::ParseReshapeLayer(DNN::TDeepNet &deepNet, + std::vector> & /*nets*/, TString layerString, + TString delim) +{ + int depth = 0; + int height = 0; + int width = 0; + bool flattening = false; + + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + for (; token != nullptr; token = (TObjString *)nextToken()) { + if (token->GetString() == "FLAT") idxToken=4; + switch (idxToken) { + case 1: { + TString strDepth(token->GetString()); + depth = strDepth.Atoi(); + } break; + case 2: // height + { + TString strHeight(token->GetString()); + height = strHeight.Atoi(); + } break; + case 3: // width + { + TString strWidth(token->GetString()); + width = strWidth.Atoi(); + } break; + case 4: // flattening + { + TString flat(token->GetString()); + if (flat == "FLAT") { + flattening = true; + } + } break; + } + ++idxToken; + } + + // Add the reshape layer + // TReshapeLayer *reshapeLayer = + deepNet.AddReshapeLayer(depth, height, width, flattening); + + // Add the same layer to fNet + fNet->AddReshapeLayer(depth, height, width, flattening); + + //TReshapeLayer *copyReshapeLayer = new TReshapeLayer(*reshapeLayer); + + //// add the copy to all slave nets + //for (size_t i = 0; i < nets.size(); i++) { + // nets[i].AddReshapeLayer(copyReshapeLayer); + //} +} + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate rnn layer +template +void MethodAE::ParseRnnLayer(DNN::TDeepNet & deepNet, + std::vector> & /*nets */, TString layerString, + TString delim) +{ + // int depth = 0; + int stateSize = 0; + int inputSize = 0; + int timeSteps = 0; + bool rememberState = false; + + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + for (; token != nullptr; token = (TObjString *)nextToken()) { + switch (idxToken) { + case 1: // state size + { + TString strstateSize(token->GetString()); + stateSize = strstateSize.Atoi(); + } break; + case 2: // input size + { + TString strinputSize(token->GetString()); + inputSize = strinputSize.Atoi(); + } break; + case 3: // time steps + { + TString strtimeSteps(token->GetString()); + timeSteps = strtimeSteps.Atoi(); + } + case 4: // remember state (1 or 0) + { + TString strrememberState(token->GetString()); + rememberState = (bool) strrememberState.Atoi(); + } break; + } + ++idxToken; + } + + // Add the recurrent layer, initialize the weights and biases and copy + TBasicRNNLayer *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, + timeSteps, rememberState); + basicRNNLayer->Initialize(); + + // Add same layer to fNet + fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); + + //TBasicRNNLayer *copyRNNLayer = new TBasicRNNLayer(*basicRNNLayer); + + //// add the copy to all slave nets + //for (size_t i = 0; i < nets.size(); i++) { + // nets[i].AddBasicRNNLayer(copyRNNLayer); + //} +} + +//////////////////////////////////////////////////////////////////////////////// +/// Pases the layer string and creates the appropriate lstm layer +template +void MethodAE::ParseLstmLayer(DNN::TDeepNet & /*deepNet*/, + std::vector> & /*nets*/, TString layerString, + TString delim) +{ + // Split layer details + TObjArray *subStrings = layerString.Tokenize(delim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + int idxToken = 0; + + for (; token != nullptr; token = (TObjString *)nextToken()) { + switch (idxToken) { + } + ++idxToken; + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// Standard constructor. +MethodAE::MethodAE(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption) + : MethodBase(jobName, Types::kAE, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(), + fBatchDepth(), fBatchHeight(), fBatchWidth(), fWeightInitialization(), fOutputFunction(), fLossFunction(), + fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), + fWeightInitializationString(), fArchitectureString(), fResume(false), fTrainingSettings() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor from a weight file. +MethodAE::MethodAE(DataSetInfo &theData, const TString &theWeightFile) + : MethodBase(Types::kAE, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(), + fBatchHeight(), fBatchWidth(), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(), + fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(), + fArchitectureString(), fResume(false), fTrainingSettings() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Destructor. +MethodAE::~MethodAE() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs. +auto MethodAE::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t +{ + KeyValueVector_t blockKeyValues; + const TString keyValueDelim("="); + + TObjArray *blockStrings = parseString.Tokenize(blockDelim); + TIter nextBlock(blockStrings); + TObjString *blockString = (TObjString *)nextBlock(); + + for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) { + blockKeyValues.push_back(std::map()); + std::map ¤tBlock = blockKeyValues.back(); + + TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + + for (; token != nullptr; token = (TObjString *)nextToken()) { + TString strKeyValue(token->GetString()); + int delimPos = strKeyValue.First(keyValueDelim.Data()); + if (delimPos <= 0) continue; + + TString strKey = TString(strKeyValue(0, delimPos)); + strKey.ToUpper(); + TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length())); + + strKey.Strip(TString::kBoth, ' '); + strValue.Strip(TString::kBoth, ' '); + + currentBlock.insert(std::make_pair(strKey, strValue)); + } + } + return blockKeyValues; +} + +//////////////////////////////////////////////////////////////////////////////// +/// What kind of analysis type can handle the CNN +Bool_t MethodAE::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/) +{ + if (type == Types::kClassification && numberClasses == 2) return kTRUE; + if (type == Types::kMulticlass) return kTRUE; + if (type == Types::kRegression) return kTRUE; + + return kFALSE; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::Train() +{ + if (fInteractive) { + Log() << kFATAL << "Not implemented yet" << Endl; + return; + } + + if (this->GetArchitectureString() == "GPU") { +#ifdef R__HAS_TMVACUDA + Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl; +#else + Log() << kFATAL << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; + return; +#endif + } else if (this->GetArchitectureString() == "OpenCL") { + Log() << kFATAL << "OpenCL backend not yet supported." << Endl; + return; + } else if (this->GetArchitectureString() == "CPU") { +#ifdef R__HAS_TMVACPU + Log() << kINFO << "Start of deep neural network training on CPU." << Endl << Endl; +#else + Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; + return; +#endif + } + +/// definitions for CUDA +#ifdef R__HAS_TMVACUDA // Included only if DNNCUDA flag is set. + using Architecture_t = DNN::TCuda; +#else +#ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set. + using Architecture_t = DNN::TCpu; +#else + using Architecture_t = DNN::TReference; +#endif +#endif + + using Scalar_t = Architecture_t::Scalar_t; + using DeepNet_t = TMVA::DNN::TDeepNet; + using TensorDataLoader_t = TTensorDataLoader; + + // Determine the number of training and testing examples + size_t nTrainingSamples = GetEventCollection(Types::kTraining).size(); + size_t nTestSamples = GetEventCollection(Types::kTesting).size(); + + // Determine the number of outputs + // // size_t outputSize = 1; + // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) { + // // outputSize = GetNTargets(); + // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) { + // // outputSize = DataInfo().GetNClasses(); + // // } + + size_t trainingPhase = 1; + for (TTrainingAESettings &settings : this->GetTrainingSettings()) { + + size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading + + Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ":" << Endl; + trainingPhase++; + + // After the processing of the options, initialize the master deep net + size_t batchSize = settings.batchSize; + // Should be replaced by actual implementation. No support for this now. + size_t inputDepth = this->GetInputDepth(); + size_t inputHeight = this->GetInputHeight(); + size_t inputWidth = this->GetInputWidth(); + size_t batchDepth = this->GetBatchDepth(); + size_t batchHeight = this->GetBatchHeight(); + size_t batchWidth = this->GetBatchWidth(); + ELossFunction J = this->GetLossFunction(); + EInitialization I = this->GetWeightInitialization(); + ERegularization R = settings.regularization; + Scalar_t weightDecay = settings.weightDecay; + + //Batch size should be included in batch layout as well. There are two possibilities: + // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2) + // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height + // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features + // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features ) + + if (batchDepth != batchSize && batchDepth > 1) { + Error("TrainCpu","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize); + return; + } + if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) { + Error("TrainCpu","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize); + return; + } + + + //check also that input layout compatible with batch layout + bool badLayout = false; + // case batch depth == batch size + if (batchDepth == batchSize) + badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ; + // case batch Height is batch size + if (batchHeight == batchSize && batchDepth == 1) + badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth); + if (badLayout) { + Error("TrainCpu","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ", + inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth); + return; + } + + + DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay); + + // create a copy of DeepNet for evaluating but with batch size = 1 + // fNet is the saved network and will be with CPU or Referrence architecture + fNet = std::unique_ptr(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth, + batchHeight, batchWidth, J, I, R, weightDecay)); + + // Initialize the vector of slave nets + std::vector nets{}; + nets.reserve(nThreads); + for (size_t i = 0; i < nThreads; i++) { + // create a copies of the master deep net + nets.push_back(deepNet); + } + + // Add all appropriate layers to deepNet and copies to fNet + CreateDeepNet(deepNet, nets); + + // print the created network + std::cout << "***** Deep Learning Network *****\n"; + deepNet.Print(); + + // Loading the training and testing datasets + TMVAInput_t trainingTuple = std::tie(GetEventCollection(Types::kTraining), DataInfo()); + TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(), + deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(), + deepNet.GetOutputWidth(), nThreads); + + TMVAInput_t testTuple = std::tie(GetEventCollection(Types::kTesting), DataInfo()); + TensorDataLoader_t testingData(testTuple, nTestSamples, deepNet.GetBatchSize(), + deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(), + deepNet.GetOutputWidth(), nThreads); + + // Initialize the minimizer + DNN::TDLGradientDescent minimizer(settings.learningRate, settings.convergenceSteps, + settings.testInterval); + + // Initialize the vector of batches, one batch for one slave network + std::vector> batches{}; + + bool converged = false; + // count the steps until the convergence + size_t stepCount = 0; + size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize(); + + // start measuring + std::chrono::time_point tstart, tend; + tstart = std::chrono::system_clock::now(); + + if (!fInteractive) { + Log() << std::setw(10) << "Epoch" + << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Test Err." << std::setw(12) << "GFLOP/s" + << std::setw(16) << "time(s)/epoch" << std::setw(12) << "Conv. Steps" << Endl; + std::string separator(62, '-'); + Log() << separator << Endl; + } + + Double_t minTestError = 0; + // use generator with 0 seed to get always different values + RandomGenerator rng(0); + while (!converged) { + stepCount++; + trainingData.Shuffle(rng); + + // execute all epochs + //for (size_t i = 0; i < batchesInEpoch; i += nThreads) { + //std::cout << "Loop on batches " << batchesInEpoch << std::endl; + for (size_t i = 0; i < batchesInEpoch; ++i ) { + // Clean and load new batches, one batch for one slave net + //batches.clear(); + //batches.reserve(nThreads); + //for (size_t j = 0; j < nThreads; j++) { + // batches.push_back(trainingData.GetTensorBatch()); + //} + + auto my_batch = trainingData.GetTensorBatch(); + + //std::cout << "retrieve batch # " << i << " data " << my_batch.GetInput()[0](0,0) << std::endl; + + //std::cout << "input size " << my_batch.GetInput().size() << " matrix " << my_batch.GetInput().front().GetNrows() << " x " << my_batch.GetInput().front().GetNcols() << std::endl; + + // execute one minimization step + // StepMomentum is currently not written for single thread, TODO write it + if (settings.momentum > 0.0) { + //minimizer.StepMomentum(deepNet, nets, batches, settings.momentum); + minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights()); + } else { + //minimizer.Step(deepNet, nets, batches); + minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights()); + } + } + //} + + + if ((stepCount % minimizer.GetTestInterval()) == 0) { + + std::chrono::time_point t1,t2; + + t1 = std::chrono::system_clock::now(); + + // Compute test error. + Double_t testError = 0.0; + for (auto batch : testingData) { + auto inputTensor = batch.GetInput(); + auto outputMatrix = batch.GetOutput(); + auto weights = batch.GetWeights(); + testError += deepNet.Loss(inputTensor, outputMatrix, weights); + } + + + t2 = std::chrono::system_clock::now(); + testError /= (Double_t)(nTestSamples / settings.batchSize); + // copy configuration when reached a minimum error + if (testError < minTestError ) { + // Copy weights from deepNet to fNet + Log() << std::setw(10) << stepCount << " Minimun Test error found - save the configuration " << Endl; + for (size_t i = 0; i < deepNet.GetDepth(); ++i) { + const auto & nLayer = fNet->GetLayerAt(i); + const auto & dLayer = deepNet.GetLayerAt(i); + nLayer->CopyWeights(dLayer->GetWeights()); + nLayer->CopyBiases(dLayer->GetBiases()); + // std::cout << "Weights for layer " << i << std::endl; + // for (size_t k = 0; k < dlayer->GetWeights().size(); ++k) + // dLayer->GetWeightsAt(k).Print(); + } + minTestError = testError; + } + else if ( minTestError <= 0. ) + minTestError = testError; + + + Double_t trainingError = 0.0; + // Compute training error. + for (auto batch : trainingData) { + auto inputTensor = batch.GetInput(); + auto outputMatrix = batch.GetOutput(); + auto weights = batch.GetWeights(); + + //std::cout << "After size " << batch.GetInput().size() << " matrix " << batch.GetInput().front().GetNrows() << " x " << batch.GetInput().front().GetNcols() << std::endl; + + trainingError += deepNet.Loss(inputTensor, outputMatrix, weights); + } + trainingError /= (Double_t)(nTrainingSamples / settings.batchSize); + + // stop measuring + tend = std::chrono::system_clock::now(); + + // Compute numerical throughput. + std::chrono::duration elapsed_seconds = tend - tstart; + std::chrono::duration elapsed1 = t1-tstart; + std::chrono::duration elapsed2 = t2-tstart; + + double seconds = elapsed_seconds.count(); + double nFlops = (double)(settings.testInterval * batchesInEpoch); + // nFlops *= net.GetNFlops() * 1e-9; + + converged = minimizer.HasConverged(testError) || stepCount >= settings.maxEpochs; + + Log() << std::setw(10) << stepCount << " | " << std::setw(12) << trainingError << std::setw(12) << testError + << std::setw(12) << nFlops / seconds << std::setw(12) + << std::setw(12) << seconds/settings.testInterval + << std::setw(12) << minimizer.GetConvergenceCount() + << std::setw(12) << elapsed1.count() + << std::setw(12) << elapsed2.count() + << std::setw(12) << seconds + + << Endl; + + if (converged) { + Log() << Endl; + } + tstart = std::chrono::system_clock::now(); + } + } + + } + +} + +//////////////////////////////////////////////////////////////////////////////// +Double_t MethodAE::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/) +{ + using Matrix_t = typename ArchitectureImpl_t::Matrix_t; + + int nVariables = GetEvent()->GetNVariables(); + int batchWidth = fNet->GetBatchWidth(); + int batchDepth = fNet->GetBatchDepth(); + int batchHeight = fNet->GetBatchHeight(); + int nb = fNet->GetBatchSize(); + int noutput = fNet->GetOutputWidth(); + + // note that batch size whould be equal to 1 + R__ASSERT(nb == 1); + + std::vector X{}; + Matrix_t YHat(nb, noutput); + + // get current event + const std::vector &inputValues = GetEvent()->GetValues(); + + // for (int i = 0; i < batchDepth; ++i) + + // find dimension of matrices + // Tensor outer size must be equal to 1 + // because nb ==1 by definition + int n1 = batchHeight; + int n2 = batchWidth; + // treat case where batchHeight is batchSize in case of first Dense layers + if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = 1; + + X.emplace_back(Matrix_t(n1, n2)); + + if (n1 > 1) { + R__ASSERT( n1*n2 == nVariables); + // for CNN or RNN evaluations + for (int j = 0; j < n1; ++j) { + for (int k = 0; k < n2; k++) { + X[0](j, k) = inputValues[j*n1+k]; + } + } + } + else { + R__ASSERT( n2 == nVariables); + for (int k = 0; k < n2; k++) { + X[0](0, k) = inputValues[k]; + } + } + + // perform the prediction + fNet->Prediction(YHat, X, fOutputFunction); + + double mvaValue = YHat(0, 0); + + // for debugging +// #ifdef DEBUG +// TMatrixF xInput(n1,n2, inputValues.data() ); +// std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl; +// xInput.Print(); +// std::cout << "Output of DeepNet " << mvaValue << std::endl; +// auto & deepnet = *fNet; +// const auto * rnn = deepnet.GetLayerAt(0); +// const auto & rnn_output = rnn->GetOutput(); +// std::cout << "DNN output " << rnn_output.size() << std::endl; +// for (size_t i = 0; i < rnn_output.size(); ++i) { +// TMatrixD m(rnn_output[i].GetNrows(), rnn_output[i].GetNcols() , rnn_output[i].GetRawDataPointer() ); +// m.Print(); +// //rnn_output[i].Print(); +// } +// #endif +// std::cout << " { " << GetEvent()->GetClass() << " , " << mvaValue << " } "; + + + return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue; + +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::AddWeightsXMLTo(void * parent) const +{ + // Create the parrent XML node with name "Weights" + auto & xmlEngine = gTools().xmlengine(); + void* nn = xmlEngine.NewChild(parent, 0, "Weights"); + + /*! Get all necessary information, in order to be able to reconstruct the net + * if we read the same XML file. */ + + // Deep Net specific info + Int_t depth = fNet->GetDepth(); + + Int_t inputDepth = fNet->GetInputDepth(); + Int_t inputHeight = fNet->GetInputHeight(); + Int_t inputWidth = fNet->GetInputWidth(); + + Int_t batchSize = fNet->GetBatchSize(); + + Int_t batchDepth = fNet->GetBatchDepth(); + Int_t batchHeight = fNet->GetBatchHeight(); + Int_t batchWidth = fNet->GetBatchWidth(); + + char lossFunction = static_cast(fNet->GetLossFunction()); + char initialization = static_cast(fNet->GetInitialization()); + char regularization = static_cast(fNet->GetRegularization()); + + Double_t weightDecay = fNet->GetWeightDecay(); + + // Method specific info (not sure these are needed) + char outputFunction = static_cast(this->GetOutputFunction()); + //char lossFunction = static_cast(this->GetLossFunction()); + + // Add attributes to the parent node + xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth)); + + xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth)); + xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight)); + xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth)); + + xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize)); + xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth)); + xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight)); + xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth)); + + xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction)); + xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization)); + xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization)); + xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction)); + + gTools().AddAttr(nn, "WeightDecay", weightDecay); + + + for (Int_t i = 0; i < depth; i++) + { + fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn); + } + + +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::ReadWeightsFromXML(void * rootXML) +{ + std::cout << "READ DL network from XML " << std::endl; + + auto netXML = gTools().GetChild(rootXML, "Weights"); + if (!netXML){ + netXML = rootXML; + } + + size_t netDepth; + gTools().ReadAttr(netXML, "NetDepth", netDepth); + + size_t inputDepth, inputHeight, inputWidth; + gTools().ReadAttr(netXML, "InputDepth", inputDepth); + gTools().ReadAttr(netXML, "InputHeight", inputHeight); + gTools().ReadAttr(netXML, "InputWidth", inputWidth); + + size_t batchSize, batchDepth, batchHeight, batchWidth; + gTools().ReadAttr(netXML, "BatchSize", batchSize); + // use always batchsize = 1 + //batchSize = 1; + gTools().ReadAttr(netXML, "BatchDepth", batchDepth); + gTools().ReadAttr(netXML, "BatchHeight", batchHeight); + gTools().ReadAttr(netXML, "BatchWidth", batchWidth); + + char lossFunctionChar; + gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar); + char initializationChar; + gTools().ReadAttr(netXML, "Initialization", initializationChar); + char regularizationChar; + gTools().ReadAttr(netXML, "Regularization", regularizationChar); + char outputFunctionChar; + gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar); + double weightDecay; + gTools().ReadAttr(netXML, "WeightDecay", weightDecay); + + std::cout << "lossfunction is " << lossFunctionChar << std::endl; + + // create the net + + // DeepNetCpu_t is defined in MethodAE.h + + fNet = std::unique_ptr(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, + batchHeight, batchWidth, + static_cast(lossFunctionChar), + static_cast(initializationChar), + static_cast(regularizationChar), + weightDecay)); + + fOutputFunction = static_cast(outputFunctionChar); + + + //size_t previousWidth = inputWidth; + auto layerXML = gTools().xmlengine().GetChild(netXML); + + // loop on the layer and add them to the network + for (size_t i = 0; i < netDepth; i++) { + + TString layerName = gTools().xmlengine().GetNodeName(layerXML); + + // case of dense layer + if (layerName == "DenseLayer") { + + // read width and activation function and then we can create the layer + size_t width = 0; + gTools().ReadAttr(layerXML, "Width", width); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction func = static_cast(funcString.Atoi()); + + + fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability + + } + // Convolutional Layer + else if (layerName == "ConvLayer") { + + // read width and activation function and then we can create the layer + size_t depth = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + size_t fltHeight, fltWidth = 0; + size_t strideRows, strideCols = 0; + size_t padHeight, padWidth = 0; + gTools().ReadAttr(layerXML, "FilterHeight", fltHeight); + gTools().ReadAttr(layerXML, "FilterWidth", fltWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + gTools().ReadAttr(layerXML, "PaddingHeight", padHeight); + gTools().ReadAttr(layerXML, "PaddingWidth", padWidth); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction actFunction = static_cast(funcString.Atoi()); + + + fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + padHeight, padWidth, actFunction); + + } + + // MaxPool Layer + else if (layerName == "MaxPoolLayer") { + + // read maxpool layer info + size_t frameHeight, frameWidth = 0; + size_t strideRows, strideCols = 0; + gTools().ReadAttr(layerXML, "FrameHeight", frameHeight); + gTools().ReadAttr(layerXML, "FrameWidth", frameWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + + fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + } + else if (layerName == "ReshapeLayer") { + + // read reshape layer info + size_t depth, height, width = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + gTools().ReadAttr(layerXML, "Height", height); + gTools().ReadAttr(layerXML, "Width", width); + int flattening = 0; + gTools().ReadAttr(layerXML, "Flattening",flattening ); + + fNet->AddReshapeLayer(depth, height, width, flattening); + + } + else if (layerName == "RNNLayer") { + + std::cout << "add RNN layer " << std::endl; + + // read reshape layer info + size_t stateSize,inputSize, timeSteps = 0; + int rememberState= 0; + gTools().ReadAttr(layerXML, "StateSize", stateSize); + gTools().ReadAttr(layerXML, "InputSize", inputSize); + gTools().ReadAttr(layerXML, "TimeSteps", timeSteps); + gTools().ReadAttr(layerXML, "RememberState", rememberState ); + + fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); + + } + + + // read eventually weights and biases + fNet->GetLayers().back()->ReadWeightsFromXML(layerXML); + + // read next layer + layerXML = gTools().GetNextChild(layerXML); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::ReadWeightsFromStream(std::istream & /*istr*/) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +const Ranking *TMVA::MethodAE::CreateRanking() +{ + // TODO + return NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodAE::GetHelpMessage() const +{ + // TODO +} + +} // namespace TMVA diff --git a/tmva/tmva/test/DNN/CNN/CMakeLists.txt b/tmva/tmva/test/DNN/CNN/CMakeLists.txt index 6a7d8cf2b0361..9730bcbffc190 100644 --- a/tmva/tmva/test/DNN/CNN/CMakeLists.txt +++ b/tmva/tmva/test/DNN/CNN/CMakeLists.txt @@ -75,5 +75,7 @@ ROOT_ADD_TEST(TMVA-DNN-CNN-Backpropagation-CPU COMMAND testConvBackpropagationCp ROOT_EXECUTABLE(testMethodDLCpu TestMethodDL.cxx LIBRARIES ${Libraries}) ROOT_ADD_TEST(TMVA-DNN-CNN-MethodDL-CPU COMMAND testMethodDLCpu) +ROOT_EXECUTABLE(testMethodAECpu TestMethodAE.cxx LIBRARIES ${Libraries}) +ROOT_ADD_TEST(TMVA-DNN-CNN-MethodAE-CPU COMMAND testMethodAECpu) endif () diff --git a/tmva/tmva/test/DNN/CNN/TestMethodAE.cxx b/tmva/tmva/test/DNN/CNN/TestMethodAE.cxx new file mode 100644 index 0000000000000..9a0ce7ba82b14 --- /dev/null +++ b/tmva/tmva/test/DNN/CNN/TestMethodAE.cxx @@ -0,0 +1,38 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Vladimir Ilievski + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Testing Method DL for Conv Net for the Reference backend * + * * + * Authors (alphabetical): * + * Vladimir Ilievski - CERN, Switzerland * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include "TestMethodAE.h" +#include "TString.h" + +int main() +{ + std::cout << "Testing Method AE for CPU backend: " << std::endl; + + TString archCPU = "CPU"; + + testMethodAE_DNN(archCPU); + +} diff --git a/tmva/tmva/test/DNN/CNN/TestMethodAE.h b/tmva/tmva/test/DNN/CNN/TestMethodAE.h new file mode 100644 index 0000000000000..8db2da391c016 --- /dev/null +++ b/tmva/tmva/test/DNN/CNN/TestMethodAE.h @@ -0,0 +1,155 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Vladimir Ilievski + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Testing Method DL for Conv Net * + * * + * Authors (alphabetical): * + * Vladimir Ilievski - CERN, Switzerland * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_AE_H +#define TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_AE_H + +#include "TFile.h" +#include "TTree.h" +#include "TString.h" +#include "TROOT.h" + +#include "TMVA/MethodAE.h" +#include "TMVA/DataLoader.h" +#include "TMVA/Factory.h" +#include "TMVA/Config.h" + +#include "MakeImageData.h" + +#include + + +/** Testing the entire pipeline of the Method DL, when only a Multilayer Percepton + * is constructed. */ +//______________________________________________________________________________ +void testMethodAE_DNN(TString architectureStr) +{ + + ROOT::EnableImplicitMT(1); + TMVA::Config::Instance(); + + TFile *input(0); + // TString fname = "/Users/vladimirilievski/Desktop/Vladimir/GSoC/ROOT-CI/common-version/root/tmva/tmva/test/DNN/CNN/" + // "dataset/tmva_class_example.root"; + /* + TString fname = "http://root.cern.ch/files/tmva_class_example.root"; + TString fopt = "CACHEREAD"; + input = TFile::Open(fname,fopt); + */ + input = TFile::Open("http://root.cern.ch/files/tmva_reg_example.root", "CACHEREAD"); + + + TString outfileName("TMVA_DNN.root"); + TFile *outputFile = TFile::Open(outfileName, "RECREATE"); + + TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset"); + + dataloader->AddVariable( "var1", "Variable 1", "units", 'F' ); + dataloader->AddVariable( "var2", "Variable 2", "units", 'F' ); + dataloader->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); + dataloader->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); + + dataloader->AddTarget("var1"); + dataloader->AddTarget("var2"); + + TTree *regTree = (TTree*)input->Get("TreeR"); + + Double_t regWeight = 1.0; + + dataloader->AddRegressionTree( regTree, regWeight ); + + TCut mycut = ""; + + dataloader->PrepareTrainingAndTestTree( mycut, + "nTrain_Regression=1000:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); + + // Input Layout + TString inputLayoutString("InputLayout=1|1|2"); + + // Batch Layout + TString batchLayoutString("BatchLayout=256|1|2"); + + // General layout. + TString layoutString("Layout=Encoder={RESHAPE|1|1|2|FLAT,DENSE|128|TANH,DENSE|64|TANH}Decoder={DENSE|128|TANH,DENSE|2|LINEAR,LINEAR}"); + + // Training strategies. + TString training0("LearningRate=1e-1,Momentum=0.9,Repetitions=1," + "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," + "WeightDecay=1e-4,Regularization=L2," + "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True"); + TString training1("LearningRate=1e-2,Momentum=0.9,Repetitions=1," + "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," + "WeightDecay=1e-4,Regularization=L2," + "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); + TString training2("LearningRate=1e-3,Momentum=0.9,Repetitions=1," + "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," + "WeightDecay=1e-4,Regularization=L2," + "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); + TString trainingStrategyString("TrainingStrategy="); + trainingStrategyString += training0 + "|" + training1 + "|" + training2; + + // General Options. + TString dnnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:" + "WeightInitialization=XAVIERUNIFORM"); + + + // Concatenate all option strings + dnnOptions.Append(":"); + dnnOptions.Append(inputLayoutString); + + dnnOptions.Append(":"); + dnnOptions.Append(batchLayoutString); + + dnnOptions.Append(":"); + dnnOptions.Append(layoutString); + + dnnOptions.Append(":"); + dnnOptions.Append(trainingStrategyString); + + dnnOptions.Append(":Architecture="); + dnnOptions.Append(architectureStr); + + // create factory + TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, + "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" ); + + TString methodTitle = "AE_" + architectureStr; + factory->BookMethod(dataloader, TMVA::Types::kAE, methodTitle, dnnOptions); + + // Train MVAs using the set of training events + factory->TrainAllMethods(); + + // Save the output + outputFile->Close(); + + std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; + std::cout << "==> TMVAClassification is done!" << std::endl; + + delete factory; + delete dataloader; +} + +#endif diff --git a/tmva/tmva/test/DNN/TestLossFunctions.cxx b/tmva/tmva/test/DNN/TestLossFunctions.cxx index 6b6f9797c418a..61daf8b8e7062 100644 --- a/tmva/tmva/test/DNN/TestLossFunctions.cxx +++ b/tmva/tmva/test/DNN/TestLossFunctions.cxx @@ -71,4 +71,19 @@ int main() std::cout << "Testing softmax cross entropy gradient: "; std::cout << "maximum relative error = " << print_error(error) << std::endl; if (error > 1e-3) return 1; + + + // + // KL Divergence + // + + error = testKLDivergence>(10); + std::cout << "Testing KL divergence loss: "; + std::cout << "maximum relative error = " << print_error(error) << std::endl; + if (error > 1e-3) return 1; + + error = testKLDivergenceGradients>(10); + std::cout << "Testing KL divergence gradients: "; + std::cout << "maximum relative error = " << print_error(error) << std::endl; + if (error > 1e-3) return 1; } diff --git a/tmva/tmva/test/DNN/TestLossFunctions.h b/tmva/tmva/test/DNN/TestLossFunctions.h index e71ec3481cbbe..bc5fa2bd8eae5 100644 --- a/tmva/tmva/test/DNN/TestLossFunctions.h +++ b/tmva/tmva/test/DNN/TestLossFunctions.h @@ -328,3 +328,106 @@ auto testSoftmaxCrossEntropyGradients(size_t ntests) } return maximumError; } + +//______________________________________________________________________________ +// +// KL Divergence +//______________________________________________________________________________ + +template +auto testKLDivergence(size_t ntests) +-> typename Architecture::Scalar_t +{ + using Matrix_t = typename Architecture::Matrix_t; + using Scalar_t = typename Architecture::Scalar_t; + Double_t maximumError = 0.0; + + for (size_t i = 0; i < ntests; i++) { + size_t m = rand() % 100 + 1; + size_t n = rand() % 100 + 1; + + TMatrixT W(m, 1); + TMatrixT X(m, n); + TMatrixT Y(m, n); + TMatrixT Z(m, n); + + W = 1.0; + randomMatrix(X); + randomMatrix(Y); + + Matrix_t WArch(W); + Matrix_t XArch(X); + Matrix_t YArch(Y); + + Scalar_t ce = evaluate(ELossFunction::kKLDivergence, YArch, XArch, WArch); + + Scalar_t ceReference = 0.0; + for (size_t j = 0; j < m; j++) { + for (size_t k = 0; k < n; k++) { + ceReference += 1 + XArch(j, k) - pow(YArch(j, k), 2) - pow(exp(XArch(j, k)), 2); + } + } + ceReference /= (Scalar_t) m; + + Double_t error; + if (ceReference != 0.0) + error = std::fabs((ce - ceReference) / ceReference); + else + error = std::fabs(ce - ceReference); + maximumError = std::max(error, maximumError); + } + return maximumError; +} + +//______________________________________________________________________________ +template +auto testKLDivergenceGradients(size_t ntests) +-> typename Architecture::Scalar_t +{ + using Matrix_t = typename Architecture::Matrix_t; + using Scalar_t = typename Architecture::Scalar_t; + Double_t maximumError = 0.0; + + for (size_t i = 0; i < ntests; i++) { + size_t m = 8; //rand() % 100 + 1; + size_t n = 8; //rand() % 100 + 1; + + TMatrixT W(m, 1); + TMatrixT X(m, n); + TMatrixT Y(m, n); + TMatrixT MeanRef(m, n); + TMatrixT SDRef(m, n); + + randomMatrix(W); + randomMatrix(X); + randomMatrix(Y); + + Matrix_t WArch(W); + Matrix_t XArch(X); + Matrix_t YArch(Y); + Matrix_t MeanArch(Y); + Matrix_t SDArch(Y); + + evaluateGradients(MeanArch, SDArch, ELossFunction::kKLDivergence, YArch, XArch, WArch); + + Double_t norm = 1.0 / m; + + for (size_t j = 0; j < m; j++) { + for (size_t k = 0; k < n; k++) { + MeanRef(j, k) = -2.0 * Y(j, k) * W(j, 0) * norm; + SDRef(j, k) = (1.0 - (2.0 * exp(2.0 * X(j ,k)))) * norm; + SDRef(j, k) *= W(j, 0); + } + } + + TMatrixT dMean(MeanArch); + TMatrixT dSD(SDArch); + Double_t mean_error = maximumRelativeError(dMean, MeanRef); + Double_t sd_error = maximumRelativeError(dSD, SDRef); + Double_t error = (mean_error + sd_error) / 2; + maximumError = std::max(error, maximumError); + } + return maximumError; +} + +