tmvadnn · srk97 · May 27, 2018 · May 28, 2018 · Jun 12, 2018
diff --git a/tmva/tmva/CMakeLists.txt b/tmva/tmva/CMakeLists.txt
@@ -16,7 +16,7 @@ set(headers1 Configurable.h Factory.h  MethodBase.h MethodCompositeBase.h
 	     MethodKNN.h MethodCFMlpANN.h MethodCFMlpANN_Utils.h MethodLikelihood.h
 	     MethodHMatrix.h MethodPDERS.h MethodBDT.h MethodDT.h MethodSVM.h MethodBayesClassifier.h
 	     MethodFDA.h MethodMLP.h MethodBoost.h
-	     MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h
+	     MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h MethodAE.h
              MethodCrossValidation.h)
 set(headers2 TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h BinarySearchTree.h 
 	     Timer.h RootFinder.h CrossEntropy.h DecisionTree.h DecisionTreeNode.h MisClassificationError.h 

@@ -67,5 +67,6 @@
 #pragma link C++ class TMVA::MethodDNN+;
 #pragma link C++ class TMVA::MethodCrossValidation+;
 #pragma link C++ class TMVA::MethodDL+;
+#pragma link C++ class TMVA::MethodAE+;
 
 #endif
@@ -191,6 +191,15 @@ class TCpu
                                        const TCpuMatrix<Scalar_t> &weights);
    static void SoftmaxCrossEntropyGradients(TCpuMatrix<Scalar_t> &dY, const TCpuMatrix<Scalar_t> &Y,
                                             const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
+
+   /** KL Divergence between the distributions corresponding to mean and standard deviation.
+     * This is applied at the end of Encoder network. The StandardDeviation is assumed to
+     * be the log of standard deviation and the computation is done accordingly. */
+   static Scalar_t KLDivergence(const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation,
+                                const TCpuMatrix<Scalar_t> &weights);
+   static void KLDivergenceGradients(TCpuMatrix<Scalar_t> &dMean, TCpuMatrix<Scalar_t> &dStandardDeviation,
+                                     const TCpuMatrix<Scalar_t> &Mean, const TCpuMatrix<Scalar_t> &StandardDeviation, 
+                                     const TCpuMatrix<Scalar_t> &weights);                                    
    ///@}
 
    //____________________________________________________________________________

@@ -193,6 +193,14 @@ class TCuda
                                      const TCudaMatrix<AFloat> &weights);
    static void SoftmaxCrossEntropyGradients(TCudaMatrix<AFloat> &dY, const TCudaMatrix<AFloat> &Y,
                                             const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);
+
+   /** KL Divergence between the distributions corresponding to mean and standard deviation.
+     * This is applied at the end of Encoder network. */
+   static AFloat KLDivergence(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
+                              const TCudaMatrix<AFloat> &weights);
+   static void KLDivergenceGradients(TCudaMatrix<AFloat> &dMean, TCudaMatrix<AFloat> &dStandardDeviation,
+                                     const TCudaMatrix<AFloat> &Mean, const TCudaMatrix<AFloat> &StandardDeviation, 
+                                     const TCudaMatrix<AFloat> &weights);
    ///@}
 
    //____________________________________________________________________________

@@ -190,6 +190,15 @@ class TReference
                                     const TMatrixT<AReal> &weights);
    static void SoftmaxCrossEntropyGradients(TMatrixT<AReal> &dY, const TMatrixT<AReal> &Y,
                                             const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
+
+   /** KL Divergence between the distributions corresponding to mean and standard deviation.
+     * This is applied at the end of Encoder network. The StandardDeviation is assumed to
+     * be the log of standard deviation and the computation is done accordingly. */
+   static AReal KLDivergence(const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation,
+                             const TMatrixT<AReal> &weights);
+   static void KLDivergenceGradients(TMatrixT<AReal> &dMean, TMatrixT<AReal> &dStandardDeviation,
+                                     const TMatrixT<AReal> &Mean, const TMatrixT<AReal> &StandardDeviation, 
+                                     const TMatrixT<AReal> &weights);   
    ///@}
 
    //____________________________________________________________________________

@@ -55,7 +55,8 @@ enum class ELossFunction
 {
     kCrossEntropy        = 'C',
     kMeanSquaredError    = 'R',
-    kSoftmaxCrossEntropy = 'S'
+    kSoftmaxCrossEntropy = 'S',
+    kKLDivergence        = 'D'
 };
 
 /*! Enum representing the regularization type applied for a given layer */
@@ -171,6 +172,7 @@ inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y
     case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
     case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
     case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
+    case ELossFunction::kKLDivergence: return Architecture_t::KLDivergence(Y,output,weights);
     }
     return 0.0;
 }
@@ -190,10 +192,26 @@ inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFuncti
     case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
     case ELossFunction::kSoftmaxCrossEntropy :
        Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
-       break;
+       break;  
     }
 }
 
+/*! Compute the gradients with respect to the metrics used for 
+*  similarity measure. The gradients are returned in two separate Matrices. */
+//______________________________________________________________________________
+template <typename Architecture_t>
+inline void evaluateGradients(typename Architecture_t::Matrix_t &dMetricOne, typename Architecture_t::Matrix_t &dMetricTwo,
+                              ELossFunction f, const typename Architecture_t::Matrix_t &MetricOne,
+                              const typename Architecture_t::Matrix_t &MetricTwo,
+                              const typename Architecture_t::Matrix_t &weights)
+{
+    switch(f)
+    {
+    case ELossFunction::kKLDivergence: Architecture_t::KLDivergenceGradients(dMetricOne, dMetricTwo, MetricOne, MetricTwo, weights);
+    break;
+    } 
+}
+
 
 //______________________________________________________________________________
 //

@@ -0,0 +1,256 @@
+// @(#)root/tmva/tmva/dnn:$Id$
+// Author: Vladimir Ilievski, Saurav Shekhar, Siddhartha Rao Kamalakara
+
+/**********************************************************************************
+ * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
+ * Package: TMVA                                                                  *
+ * Class  : MethodAE                                                              *
+ * Web    : http://tmva.sourceforge.net                                           *
+ *                                                                                *
+ * Description:                                                                   *
+ *      Deep Neural Network Method                                                *
+ *                                                                                *
+ * Authors (alphabetical):                                                        *
+ *      Vladimir Ilievski  <[email protected]> - CERN, Switzerland       *
+ *      Saurav Shekhar     <[email protected]> - ETH Zurich, Switzerland  *
+ *      Siddhartha Rao Kamalakara  <[email protected]> - CERN, Switzerland         *
+ *                                                                                *
+ * Copyright (c) 2005-2015:                                                       *
+ *      CERN, Switzerland                                                         *
+ *      U. of Victoria, Canada                                                    *
+ *      MPI-K Heidelberg, Germany                                                 *
+ *      U. of Bonn, Germany                                                       *
+ *                                                                                *
+ * Redistribution and use in source and binary forms, with or without             *
+ * modification, are permitted according to the terms listed in LICENSE           *
+ * (http://tmva.sourceforge.net/LICENSE)                                          *
+ **********************************************************************************/
+
+#ifndef ROOT_TMVA_MethodAE
+#define ROOT_TMVA_MethodAE
+
+//////////////////////////////////////////////////////////////////////////
+//                                                                      //
+// MethodAE                                                             //
+//                                                                      //
+// Method class for creating Auto Encoders                              //
+//                                                                      //
+//////////////////////////////////////////////////////////////////////////
+
+#include "TString.h"
+
+#include "TMVA/MethodBase.h"
+#include "TMVA/Types.h"
+
+#include "TMVA/DNN/Architectures/Reference.h"
+
+#ifdef R__HAS_TMVACPU
+#include "TMVA/DNN/Architectures/Cpu.h"
+#endif
+
+#ifdef R__HAS_TMVACUDA
+#include "TMVA/DNN/Architectures/Cuda.h"
+#endif
+
+#include "TMVA/DNN/Functions.h"
+#include "TMVA/DNN/DeepNet.h"
+
+#include <vector>
+
+namespace TMVA {
+
+/*! All of the options that can be specified in the training string */
+struct TTrainingAESettings {
+   size_t batchSize;
+   size_t testInterval;
+   size_t convergenceSteps;
+   size_t maxEpochs; 
+   DNN::ERegularization regularization;
+   Double_t learningRate;
+   Double_t momentum;
+   Double_t weightDecay;
+   std::vector<Double_t> dropoutProbabilities;
+   bool multithreading;
+};
+
+class MethodAE : public MethodBase {
+
+private:
+   // Key-Value vector type, contining the values for the training options
+   using KeyValueVector_t = std::vector<std::map<TString, TString>>;
+#ifdef R__HAS_TMVACPU
+   using ArchitectureImpl_t = TMVA::DNN::TCpu<Double_t>;
+#else
+   using ArchitectureImpl_t = TMVA::DNN::TReference<Double_t>;
+#endif  
+   using DeepNetImpl_t = TMVA::DNN::TDeepNet<ArchitectureImpl_t>;
+   std::unique_ptr<DeepNetImpl_t> fNet;
+
+   /*! The option handling methods */
+   void DeclareOptions();
+   void ProcessOptions();
+
+   void Init();
+
+   // Function to parse the layout of the input
+   void ParseInputLayout();
+   void ParseBatchLayout();
+
+   /*! After calling the ProcesOptions(), all of the options are parsed,
+    *  so using the parsed options, and given the architecture and the
+    *  type of the layers, we build the Deep Network passed as
+    *  a reference in the function. */
+   template <typename Architecture_t, typename Layer_t>
+   void CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                      std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);
+
+   template <typename Architecture_t, typename Layer_t>
+   void CreateEncoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                      std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);
+
+   template <typename Architecture_t, typename Layer_t>
+   void CreateDecoder(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                      std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layoutString);                                   
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                        std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                       std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                          std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
+                          TString delim);
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                          std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
+                          TString delim);
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                      std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
+
+   template <typename Architecture_t, typename Layer_t>
+   void ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
+                       std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);   
+
+   size_t fInputDepth;  ///< The depth of the input.
+   size_t fInputHeight; ///< The height of the input.
+   size_t fInputWidth;  ///< The width of the input.
+
+   size_t fBatchDepth;  ///< The depth of the batch used to train the deep net.
+   size_t fBatchHeight; ///< The height of the batch used to train the deep net.
+   size_t fBatchWidth;  ///< The width of the batch used to train the deep net.
+
+   DNN::EInitialization fWeightInitialization; ///< The initialization method
+   DNN::EOutputFunction fOutputFunction;       ///< The output function for making the predictions
+   DNN::ELossFunction fLossFunction;           ///< The loss function
+
+   TString fInputLayoutString;          ///< The string defining the layout of the input
+   TString fBatchLayoutString;          ///< The string defining the layout of the batch
+   TString fLayoutString;               ///< The string defining the layout of the deep net
+   TString fErrorStrategy;              ///< The string defining the error strategy for training
+   TString fTrainingStrategyString;     ///< The string defining the training strategy
+   TString fWeightInitializationString; ///< The string defining the weight initialization method
+   TString fArchitectureString;         ///< The string defining the architecure: CPU or GPU
+   bool fResume;
+
+   KeyValueVector_t fSettings;                       ///< Map for the training strategy
+   std::vector<TTrainingAESettings> fTrainingSettings; ///< The vector defining each training strategy
+
+   ClassDef(MethodAE, 0);
+
+protected:
+   // provide a help message
+   void GetHelpMessage() const;
+
+public:
+   /*! Constructor */
+   MethodAE(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);
+
+   /*! Constructor */
+   MethodAE(DataSetInfo &theData, const TString &theWeightFile);
+
+   /*! Virtual Destructor */
+   virtual ~MethodAE();
+
+   /*! Function for parsing the training settings, provided as a string
+    *  in a key-value form.  */
+   KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);
+
+   /*! Check the type of analysis the deep learning network can do */
+   Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);
+
+   /*! Methods for training the deep learning network */
+   void Train();
+
+   Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
+
+   /*! Methods for writing and reading weights */
+   using MethodBase::ReadWeightsFromStream;
+   void AddWeightsXMLTo(void *parent) const;
+   void ReadWeightsFromXML(void *wghtnode);
+   void ReadWeightsFromStream(std::istream &);
+
+   /* Create ranking */
+   const Ranking *CreateRanking();
+
+   /* Getters */
+   size_t GetInputDepth() const { return fInputDepth; }
+   size_t GetInputHeight() const { return fInputHeight; }
+   size_t GetInputWidth() const { return fInputWidth; }
+
+   size_t GetBatchDepth() const { return fBatchDepth; }
+   size_t GetBatchHeight() const { return fBatchHeight; }
+   size_t GetBatchWidth() const { return fBatchWidth; }
+
+   const DeepNetImpl_t & GetDeepNet() const { return *fNet; }
+
+   DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; }
+   DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; }
+   DNN::ELossFunction GetLossFunction() const { return fLossFunction; }
+
+   TString GetInputLayoutString() const { return fInputLayoutString; }
+   TString GetBatchLayoutString() const { return fBatchLayoutString; }
+   TString GetLayoutString() const { return fLayoutString; }
+   TString GetErrorStrategyString() const { return fErrorStrategy; }
+   TString GetTrainingStrategyString() const { return fTrainingStrategyString; }
+   TString GetWeightInitializationString() const { return fWeightInitializationString; }
+   TString GetArchitectureString() const { return fArchitectureString; }
+
+   const std::vector<TTrainingAESettings> &GetTrainingSettings() const { return fTrainingSettings; }
+   std::vector<TTrainingAESettings> &GetTrainingSettings() { return fTrainingSettings; }
+   const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; }
+   KeyValueVector_t &GetKeyValueSettings() { return fSettings; }
+
+   /** Setters */
+   void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
+   void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
+   void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
+
+   void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
+   void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
+   void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
+
+   void SetWeightInitialization(DNN::EInitialization weightInitialization)
+   {
+      fWeightInitialization = weightInitialization;
+   }
+   void SetOutputFunction(DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
+   void SetErrorStrategyString(TString errorStrategy) { fErrorStrategy = errorStrategy; }
+   void SetTrainingStrategyString(TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
+   void SetWeightInitializationString(TString weightInitializationString)
+   {
+      fWeightInitializationString = weightInitializationString;
+   }
+   void SetArchitectureString(TString architectureString) { fArchitectureString = architectureString; }
+   void SetLayoutString(TString layoutString) { fLayoutString = layoutString; }
+};
+
+} // namespace TMVA
+
+#endif
@@ -99,6 +99,7 @@ namespace TMVA {
          kCategory       ,
          kDNN            ,
          kDL             ,
+         kAE             ,
          kPyRandomForest ,
          kPyAdaBoost     ,
          kPyGTB          ,