microsoft · tabVersion · Sep 29, 2020 · Sep 29, 2020 · Sep 29, 2020 · Sep 29, 2020
diff --git a/LearningMachine.py b/LearningMachine.py
@@ -3,7 +3,6 @@
 
 import torch
 import torch.nn as nn
-
 import os
 import time
 import numpy as np
@@ -29,7 +28,7 @@
 
 
 class LearningMachine(object):
-    def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, **kwargs):
+    def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, automl=False, **kwargs):
         if initialize is True:
             assert vocab_info is not None
             self.model = Model(conf, problem, vocab_info, use_gpu)
@@ -54,6 +53,7 @@ def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_g
         self.problem = problem
         self.phase = phase
         self.use_gpu = use_gpu
+        self.automl = automl
 
         # if it is a 2-class classification problem, figure out the real positive label
         # CAUTION: multi-class classification
@@ -335,6 +335,9 @@ def train(self, optimizer, loss_fn):
                 del data_batches, length_batches, target_batches
             lr_scheduler.step()
             epoch += 1
+        if self.automl:
+            import nni
+            nni.report_final_result(float(best_result))
 
     def test(self, loss_fn, test_data_path=None, predict_output_path=None):
         if test_data_path is None:
@@ -622,6 +625,9 @@ def evaluate(self, data, length, target, input_types, evaluator,
 
             if phase == 'valid':
                 cur_result = evaluator.get_first_metric_result()
+                if self.automl:
+                    import nni
+                    nni.report_intermediate_result(cur_result)
                 if self.evaluator.compare(cur_result, cur_best_result) == 1:
                     logging.info(
                         'Cur result %f is better than previous best result %s, renew the best model now...' % (cur_result, "%f" % cur_best_result if cur_best_result else "None"))

diff --git a/ModelConf.py b/ModelConf.py
@@ -11,13 +11,14 @@
 import torch
 import logging
 import shutil
+from string import digits
 
 from losses.BaseLossConf import BaseLossConf
-#import traceback
 from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields, ConstantStatic
 from utils.common_utils import log_set, prepare_dir, md5, load_from_json, dump_to_json
 from utils.exceptions import ConfigurationError
 import numpy as np
+import random
 
 class ConstantStaticItems(ConstantStatic):
     @staticmethod
@@ -174,6 +175,21 @@ def raise_configuration_error(self, key):
     def load_from_file(self, conf_path):
         # load file
         self.conf = load_from_json(conf_path, debug=False)
+
+        if self.params.automl:
+            parameters = nni.get_next_parameter()
+            for para in parameters.keys():
+                it = self.conf
+                for path in para.split('.'):
+                    if path[0] in digits:
+                        path = int(path)
+                    try:
+                        it = it[path]
+                    except KeyError:
+                        raise KeyError('Cannot access {} in parameter {}. Please check parameter names in search space file.'
+                                       .format(path, para))
+                it = parameters[para]
+
         self = self.Conf.load_data(self, {'Conf' : self.conf}, key_prefix_desc='Conf')
         self.language = self.language.lower()
         self.configurate_outputs()

diff --git a/Tutorial.md b/Tutorial.md
@@ -26,6 +26,7 @@
     * [Extra Feature Support](#extra-feature)
     * [Learning Rate Decay](#lr-decay)
     * [Fix Embedding Weight & Limit Vocabulary Size](#fix-embedding)
+* [Auto Tuning Hyperparameters](#auto-ml)
 * [Frequently Asked Questions](#faq)
 
 ## <span id="installation">Installation</span>
@@ -732,6 +733,22 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
     ***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.
 
     ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
+
+## <span id="auto-ml">Auto Tuning Hyperparameters</span>
+
+This function integrates with [NNI](https://github.com/microsoft/nni) to try hyperparameters generated by various tuning algorithms and run experiments on different training environments. You can use `pip install nni` to install the dependency.
+
+In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as  Hyper Parameter Optimization (HPO). 
+
+Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`.
+
+Here are some instructions about the files mentioned above. 
+
+* `config.yaml`: In this file, basic experiment settings are specified, including experiment name, author name, which tuner to use, and how to start a trial. Users can also do some extra settings, like how many GPUs are available for one trial.
+* `search_space.json`: Tuners are going to find hyperparameters within the range specified in this file. Users can set the prior distribution empirically for every hyperparameter searched to speed up the HPO process. The names of tuning variables should follow the pattern to align with the architecture described in json file. Here is an example, if you want to tune `architecture['training_params']['batch_size']` automatically, the variable name in `search_space.json` should be `training_params.batch_size`. In a word, concatenate variable paths with dot to map from search space to architecture in json file.
+* `exp.py`: Function `get_hyperparameters` accepts the model from `model.json` as a parameter and get new hyperparameters from NNI. Some hyperparameters in the model are replaced by the new ones manually.
+
+After setting up, an experiment can be launched simply by `python3 exp.py --config_file CONFIG_FILE --port PORT`.
 
 ## <span id="faq">Frequently Asked Questions</span>
 
diff --git a/Tutorial_zh_CN.md b/Tutorial_zh_CN.md
@@ -26,6 +26,7 @@
     * [额外的feature](#extra-feature)
     * [学习率衰减](#lr-decay)
     * [固定embedding 和 词表大小设置](#fix-embedding)
+* [自动超参数调整](#auto-ml)
 * [常见问题与答案](#faq)
 
 ## <span id="installation">安装</span>
@@ -723,4 +724,29 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
 
     ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
 
+## <span id="auto-ml">自动超参数调整</span>
+
+This function integrates with [NNI](https://github.com/microsoft/nni) to try multiple sets of hyperparameters generated by various tuning algorithms and run on different training environments. You can use `pip install nni` to install the dependency.
+
+此功能通过和 [NNI](https://github.com/microsoft/nni) 集成，以尝试通过多种自动调参算法生成超参数，并支持在多种平台上进行实验。您可以通过 `pip install nni` 来安装这个依赖。
+
+In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as  Hyper Parameter Optimization (HPO). 
+
+
+在实验中，首先从搜索空间中采样一组超参数，然后进行实验并收集结果。根据这些结果，自动调参算法可以在搜索空间中找到更适合当前任务的超参数值。这个过程称为超参数优化（HPO）。搜索空间指的是超参数的取值范围，自动调参算法将在这个空间内搜索合适的取值。
+
+Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`.
+
+在启动自动机器学习实验之前，需要分别在 `search_space.json` 和 `config.yaml` 中指定搜索空间和调整算法，并在 `exp.py` 中修改函数 `get_hyperparameters`。请注意，该函数接受原始的模型作为其参数，并在这里定义了模型如何使用这些超参数。可以在[这里]（https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html）找到详细信息。在目录 `autoML-demo` 中可以找到一个已经配置好的例子。
+
+Here are some instructions about the files mentioned above. 
+
+以下是之前提到文件的一些说明。
+
+* `config.yaml`：在此文件中，指定了基本的实验设置，包括实验名称，作者名称，要使用的调参算法以及开启实验的命令。用户还可以进行一些额外的设置，例如一次实验可以使用多少个GPU。
+* `search_space.json`：自动调参算法将在此文件中指定的范围内寻找超参数。用户可以根据经验为搜索的每个超参数设置先验分布来加快搜索。search space文件里定义的变量通过一定的格式和描述结构的json文件中的变量进行关联。举个例子，如果需要调整 json 文件中的 `architecture['training_params']['batch_size']`，在 search_space 中的变量名应该是 `training_params.batch_size`。通过 `.` 连接访问希望调整的变量路径来关联描述模型结构的 json 文件和搜索空间中的变量。
+* `exp.py`：函数 `get_hyperparameters` 接受来自 `model.json` 的模型作为参数，并从 NNI 获取新的超参数。模型中的某些超参数需要被手动替换为新的。
+
+设置完成后，只需通过 `python3 exp.py --config_file CONFIG_FILE --port PORT` 即可启动实验。
+
 ## <span id="faq">常见问题与答案</span>
diff --git a/autoML-demo/config.yaml b/autoML-demo/config.yaml
@@ -0,0 +1,24 @@
+authorName: default
+experimentName: lstm
+trialConcurrency: 1
+maxExecDuration: 1000h
+maxTrialNum: 500
+# 可选项: local, remote, pai, kubeflow
+trainingServicePlatform: local
+searchSpacePath: ./lstm_search_space.json
+# 可选项: true, false, 默认值: false
+useAnnotation: false
+# 可选项: true, false, 默认值: false
+multiThread: false
+tuner:
+  builtinTunerName: TPE
+  classArgs:
+    optimize_mode: maximize
+    parallel_optimize: True
+    constant_liar_type: min
+trial:
+  command: pip install -r requirements.txt && python train.py --conf_path model.json --automl
+  codeDir: .
+  gpuNum: 1
+localConfig:
+  useActiveGpu: true
diff --git a/autoML-demo/exp.py b/autoML-demo/exp.py
@@ -0,0 +1,22 @@
+from nnicli import Experiment
+import argparse
+import os
+import sys
+import yaml
+import argparse
+import nni
+
+
+def parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_file', type=str, required=True, help='experiment config file')
+    parser.add_argument('--port', type=int, default=8080, help='show webUI on which port')
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = parser()
+    exp = Experiment()
+    # exp.stop_experiment()
+    exp.start_experiment(args.config_file, port=args.port)
diff --git a/autoML-demo/model.json b/autoML-demo/model.json
@@ -0,0 +1,190 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7854 in WikiQACorpus test set.",
+  "inputs": {
+    "use_cache": false,
+    "dataset_type": "classification",
+    "data_paths": {
+      "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+      "valid_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+      "test_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+      "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
+    },
+    "file_with_col_header": true,
+    "add_start_end_for_seq": true,
+    "file_header": {
+      "question_text": 0,
+      "passage_text": 1,
+      "label":  2
+    },
+    "model_inputs": {
+      "question": ["question_text"],
+      "passage": ["passage_text"]
+    },
+    "target": ["label"]
+  },
+  "outputs":{
+    "save_base_dir": "models_wikiqa_matchatt/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "optimizer": {
+      "name": "Adam",
+      "params": {
+        "lr": 0.002
+      }
+    },
+    "lr_decay": 1,
+    "minimum_lr": 0.0001,
+    "epoch_start_lr_decay": 3,
+    "steps_per_validation": 50,
+    "use_gpu": true,
+    "batch_size": 128,
+    "batch_num_to_show_results": 100,
+    "max_epoch": 10,
+    "valid_times_per_epoch": 5,
+    "max_lengths": {
+        "query": 50,
+        "passage": 200
+    },
+    "cpu_num_workers": 4
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "conf": {
+          "word": {
+            "cols": ["question_text", "passage_text"],
+            "dim": 300,
+            "fix_weight": true,
+            "dropout": 0
+          }
+        }
+    },
+    {
+        "layer_id": "query_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.1
+        },
+        "inputs": ["question"]
+    },
+    {
+        "layer_id": "passage_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.1
+        },
+        "inputs": ["passage"]
+    },
+    {
+        "layer_id": "query_1",
+        "layer": "BiLSTM",
+        "conf": {
+            "hidden_dim": 128,
+            "dropout": 0.1,
+            "num_layers": 2
+        },
+        "inputs": ["query_dropout"]
+    },
+	{
+        "layer_id": "passage_1",
+        "layer": "BiLSTM",
+        "conf": {
+            "hidden_dim": 128,
+            "dropout": 0.1,
+            "num_layers": 2
+        },
+        "inputs": ["passage_dropout"]
+    },
+	{
+        "layer_id": "query_matched",
+        "layer": "MatchAttention",
+        "conf": {
+        },
+        "inputs": ["query_dropout", "passage_dropout"]
+    },
+    {
+        "layer_id": "passage_matched",
+        "layer": "MatchAttention",
+        "conf": {
+        },
+        "inputs": ["passage_dropout", "query_dropout"]
+    },
+	{
+        "layer_id": "query_combined",
+        "layer": "Combination",
+        "conf": {
+          "operations": ["origin"]
+        },
+        "inputs": ["query_1","query_matched"]
+    },
+    {
+        "layer_id": "passage_combined",
+        "layer": "Combination",
+        "conf": {
+          "operations": ["origin"]
+        },
+        "inputs": ["passage_1","passage_matched"]
+    },
+    {
+        "layer_id": "query_linear_att",
+        "layer": "Pooling",
+        "conf": {
+          "pool_axis": 1,
+          "pool_type": "max"
+        },
+        "inputs": ["query_combined"]
+    },
+    {
+        "layer_id": "passage_linear_att",
+        "layer": "Pooling",
+        "conf": {
+          "pool_axis": 1,
+          "pool_type": "max"
+        },
+        "inputs": ["passage_combined"]
+    },
+    {
+        "layer_id": "comb",
+        "layer": "Combination",
+        "conf": {
+          "operations": ["origin"]
+        },
+        "inputs": ["query_linear_att","passage_linear_att"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [128,2],
+          "activation": "PReLU",
+          "last_hidden_activation": false
+        },
+        "inputs": ["comb"]
+    }
+  ],
+  "loss": {
+    "losses": [
+      {
+        "type": "CrossEntropyLoss",
+        "conf": {
+          "weight": [0.4,0.6],
+          "size_average": true
+        },
+        "inputs": ["output","label"]
+      }
+    ]
+  },
+  "metrics": ["auc","accuracy"]
+}
diff --git a/autoML-demo/search_space.json b/autoML-demo/search_space.json
@@ -0,0 +1,8 @@
+{
+    "architecture.3.conf.dropout": { "_type": "uniform", "_value": [0, 0.999] },
+    "training_params.batch_size": { "_type": "choice", "_value": [32, 64, 128, 256] },
+    "training_params.optimizer.params.lr": { "_type": "loguniform", "_value": [0.0001, 0.01] },
+    "architecture.0.conf.word.dropout": { "_type": "uniform", "_value": [0, 0.999] },
+    "architecture.2.conf.dropout'": { "_type": "uniform", "_value": [0, 0.999] },
+    "training_params.lr_decay":   { "_type": "uniform", "_value": [0.95, 1] }
+}