Skip to content

Integrate nb nni #84

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
10 changes: 8 additions & 2 deletions LearningMachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import torch
import torch.nn as nn

import os
import time
import numpy as np
Expand All @@ -29,7 +28,7 @@


class LearningMachine(object):
def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, **kwargs):
def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, automl=False, **kwargs):
if initialize is True:
assert vocab_info is not None
self.model = Model(conf, problem, vocab_info, use_gpu)
Expand All @@ -54,6 +53,7 @@ def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_g
self.problem = problem
self.phase = phase
self.use_gpu = use_gpu
self.automl = automl

# if it is a 2-class classification problem, figure out the real positive label
# CAUTION: multi-class classification
Expand Down Expand Up @@ -335,6 +335,9 @@ def train(self, optimizer, loss_fn):
del data_batches, length_batches, target_batches
lr_scheduler.step()
epoch += 1
if self.automl:
import nni
nni.report_final_result(float(best_result))

def test(self, loss_fn, test_data_path=None, predict_output_path=None):
if test_data_path is None:
Expand Down Expand Up @@ -622,6 +625,9 @@ def evaluate(self, data, length, target, input_types, evaluator,

if phase == 'valid':
cur_result = evaluator.get_first_metric_result()
if self.automl:
import nni
nni.report_intermediate_result(cur_result)
if self.evaluator.compare(cur_result, cur_best_result) == 1:
logging.info(
'Cur result %f is better than previous best result %s, renew the best model now...' % (cur_result, "%f" % cur_best_result if cur_best_result else "None"))
Expand Down
18 changes: 17 additions & 1 deletion ModelConf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import torch
import logging
import shutil
from string import digits

from losses.BaseLossConf import BaseLossConf
#import traceback
from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields, ConstantStatic
from utils.common_utils import log_set, prepare_dir, md5, load_from_json, dump_to_json
from utils.exceptions import ConfigurationError
import numpy as np
import random

class ConstantStaticItems(ConstantStatic):
@staticmethod
Expand Down Expand Up @@ -174,6 +175,21 @@ def raise_configuration_error(self, key):
def load_from_file(self, conf_path):
# load file
self.conf = load_from_json(conf_path, debug=False)

if self.params.automl:
parameters = nni.get_next_parameter()
for para in parameters.keys():
it = self.conf
for path in para.split('.'):
if path[0] in digits:
path = int(path)
try:
it = it[path]
except KeyError:
raise KeyError('Cannot access {} in parameter {}. Please check parameter names in search space file.'
.format(path, para))
it = parameters[para]

self = self.Conf.load_data(self, {'Conf' : self.conf}, key_prefix_desc='Conf')
self.language = self.language.lower()
self.configurate_outputs()
Expand Down
17 changes: 17 additions & 0 deletions Tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* [Extra Feature Support](#extra-feature)
* [Learning Rate Decay](#lr-decay)
* [Fix Embedding Weight & Limit Vocabulary Size](#fix-embedding)
* [Auto Tuning Hyperparameters](#auto-ml)
* [Frequently Asked Questions](#faq)

## <span id="installation">Installation</span>
Expand Down Expand Up @@ -732,6 +733,22 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.

***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary

## <span id="auto-ml">Auto Tuning Hyperparameters</span>

This function integrates with [NNI](https://github.com/microsoft/nni) to try hyperparameters generated by various tuning algorithms and run experiments on different training environments. You can use `pip install nni` to install the dependency.

In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as Hyper Parameter Optimization (HPO).

Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`.

Here are some instructions about the files mentioned above.

* `config.yaml`: In this file, basic experiment settings are specified, including experiment name, author name, which tuner to use, and how to start a trial. Users can also do some extra settings, like how many GPUs are available for one trial.
* `search_space.json`: Tuners are going to find hyperparameters within the range specified in this file. Users can set the prior distribution empirically for every hyperparameter searched to speed up the HPO process. The names of tuning variables should follow the pattern to align with the architecture described in json file. Here is an example, if you want to tune `architecture['training_params']['batch_size']` automatically, the variable name in `search_space.json` should be `training_params.batch_size`. In a word, concatenate variable paths with dot to map from search space to architecture in json file.
* `exp.py`: Function `get_hyperparameters` accepts the model from `model.json` as a parameter and get new hyperparameters from NNI. Some hyperparameters in the model are replaced by the new ones manually.

After setting up, an experiment can be launched simply by `python3 exp.py --config_file CONFIG_FILE --port PORT`.

## <span id="faq">Frequently Asked Questions</span>

26 changes: 26 additions & 0 deletions Tutorial_zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* [额外的feature](#extra-feature)
* [学习率衰减](#lr-decay)
* [固定embedding 和 词表大小设置](#fix-embedding)
* [自动超参数调整](#auto-ml)
* [常见问题与答案](#faq)

## <span id="installation">安装</span>
Expand Down Expand Up @@ -723,4 +724,29 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em

***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary

## <span id="auto-ml">自动超参数调整</span>

This function integrates with [NNI](https://github.com/microsoft/nni) to try multiple sets of hyperparameters generated by various tuning algorithms and run on different training environments. You can use `pip install nni` to install the dependency.

此功能通过和 [NNI](https://github.com/microsoft/nni) 集成,以尝试通过多种自动调参算法生成超参数,并支持在多种平台上进行实验。您可以通过 `pip install nni` 来安装这个依赖。

In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as Hyper Parameter Optimization (HPO).


在实验中,首先从搜索空间中采样一组超参数,然后进行实验并收集结果。根据这些结果,自动调参算法可以在搜索空间中找到更适合当前任务的超参数值。这个过程称为超参数优化(HPO)。搜索空间指的是超参数的取值范围,自动调参算法将在这个空间内搜索合适的取值。

Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`.

在启动自动机器学习实验之前,需要分别在 `search_space.json` 和 `config.yaml` 中指定搜索空间和调整算法,并在 `exp.py` 中修改函数 `get_hyperparameters`。请注意,该函数接受原始的模型作为其参数,并在这里定义了模型如何使用这些超参数。可以在[这里](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html)找到详细信息。在目录 `autoML-demo` 中可以找到一个已经配置好的例子。

Here are some instructions about the files mentioned above.

以下是之前提到文件的一些说明。

* `config.yaml`:在此文件中,指定了基本的实验设置,包括实验名称,作者名称,要使用的调参算法以及开启实验的命令。用户还可以进行一些额外的设置,例如一次实验可以使用多少个GPU。
* `search_space.json`:自动调参算法将在此文件中指定的范围内寻找超参数。用户可以根据经验为搜索的每个超参数设置先验分布来加快搜索。search space文件里定义的变量通过一定的格式和描述结构的json文件中的变量进行关联。举个例子,如果需要调整 json 文件中的 `architecture['training_params']['batch_size']`,在 search_space 中的变量名应该是 `training_params.batch_size`。通过 `.` 连接访问希望调整的变量路径来关联描述模型结构的 json 文件和搜索空间中的变量。
* `exp.py`:函数 `get_hyperparameters` 接受来自 `model.json` 的模型作为参数,并从 NNI 获取新的超参数。模型中的某些超参数需要被手动替换为新的。

设置完成后,只需通过 `python3 exp.py --config_file CONFIG_FILE --port PORT` 即可启动实验。

## <span id="faq">常见问题与答案</span>
24 changes: 24 additions & 0 deletions autoML-demo/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
authorName: default
experimentName: lstm
trialConcurrency: 1
maxExecDuration: 1000h
maxTrialNum: 500
# 可选项: local, remote, pai, kubeflow
trainingServicePlatform: local
searchSpacePath: ./lstm_search_space.json
# 可选项: true, false, 默认值: false
useAnnotation: false
# 可选项: true, false, 默认值: false
multiThread: false
tuner:
builtinTunerName: TPE
classArgs:
optimize_mode: maximize
parallel_optimize: True
constant_liar_type: min
trial:
command: pip install -r requirements.txt && python train.py --conf_path model.json --automl
codeDir: .
gpuNum: 1
localConfig:
useActiveGpu: true
22 changes: 22 additions & 0 deletions autoML-demo/exp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from nnicli import Experiment
import argparse
import os
import sys
import yaml
import argparse
import nni


def parser():
parser = argparse.ArgumentParser()
parser.add_argument('--config_file', type=str, required=True, help='experiment config file')
parser.add_argument('--port', type=int, default=8080, help='show webUI on which port')
args = parser.parse_args()
return args


if __name__ == '__main__':
args = parser()
exp = Experiment()
# exp.stop_experiment()
exp.start_experiment(args.config_file, port=args.port)
190 changes: 190 additions & 0 deletions autoML-demo/model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
{
"license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
"tool_version": "1.1.0",
"model_description": "This model is used for question answer matching task, and it achieved auc: 0.7854 in WikiQACorpus test set.",
"inputs": {
"use_cache": false,
"dataset_type": "classification",
"data_paths": {
"train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
"valid_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
"test_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
"pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
},
"file_with_col_header": true,
"add_start_end_for_seq": true,
"file_header": {
"question_text": 0,
"passage_text": 1,
"label": 2
},
"model_inputs": {
"question": ["question_text"],
"passage": ["passage_text"]
},
"target": ["label"]
},
"outputs":{
"save_base_dir": "models_wikiqa_matchatt/",
"model_name": "model.nb",
"train_log_name": "train.log",
"test_log_name": "test.log",
"predict_log_name": "predict.log",
"predict_fields": ["prediction"],
"predict_output_name": "predict.tsv"
},
"training_params": {
"vocabulary": {
"min_word_frequency": 1
},
"optimizer": {
"name": "Adam",
"params": {
"lr": 0.002
}
},
"lr_decay": 1,
"minimum_lr": 0.0001,
"epoch_start_lr_decay": 3,
"steps_per_validation": 50,
"use_gpu": true,
"batch_size": 128,
"batch_num_to_show_results": 100,
"max_epoch": 10,
"valid_times_per_epoch": 5,
"max_lengths": {
"query": 50,
"passage": 200
},
"cpu_num_workers": 4
},
"architecture":[
{
"layer": "Embedding",
"conf": {
"word": {
"cols": ["question_text", "passage_text"],
"dim": 300,
"fix_weight": true,
"dropout": 0
}
}
},
{
"layer_id": "query_dropout",
"layer": "Dropout",
"conf": {
"dropout": 0.1
},
"inputs": ["question"]
},
{
"layer_id": "passage_dropout",
"layer": "Dropout",
"conf": {
"dropout": 0.1
},
"inputs": ["passage"]
},
{
"layer_id": "query_1",
"layer": "BiLSTM",
"conf": {
"hidden_dim": 128,
"dropout": 0.1,
"num_layers": 2
},
"inputs": ["query_dropout"]
},
{
"layer_id": "passage_1",
"layer": "BiLSTM",
"conf": {
"hidden_dim": 128,
"dropout": 0.1,
"num_layers": 2
},
"inputs": ["passage_dropout"]
},
{
"layer_id": "query_matched",
"layer": "MatchAttention",
"conf": {
},
"inputs": ["query_dropout", "passage_dropout"]
},
{
"layer_id": "passage_matched",
"layer": "MatchAttention",
"conf": {
},
"inputs": ["passage_dropout", "query_dropout"]
},
{
"layer_id": "query_combined",
"layer": "Combination",
"conf": {
"operations": ["origin"]
},
"inputs": ["query_1","query_matched"]
},
{
"layer_id": "passage_combined",
"layer": "Combination",
"conf": {
"operations": ["origin"]
},
"inputs": ["passage_1","passage_matched"]
},
{
"layer_id": "query_linear_att",
"layer": "Pooling",
"conf": {
"pool_axis": 1,
"pool_type": "max"
},
"inputs": ["query_combined"]
},
{
"layer_id": "passage_linear_att",
"layer": "Pooling",
"conf": {
"pool_axis": 1,
"pool_type": "max"
},
"inputs": ["passage_combined"]
},
{
"layer_id": "comb",
"layer": "Combination",
"conf": {
"operations": ["origin"]
},
"inputs": ["query_linear_att","passage_linear_att"]
},
{
"output_layer_flag": true,
"layer_id": "output",
"layer": "Linear",
"conf": {
"hidden_dim": [128,2],
"activation": "PReLU",
"last_hidden_activation": false
},
"inputs": ["comb"]
}
],
"loss": {
"losses": [
{
"type": "CrossEntropyLoss",
"conf": {
"weight": [0.4,0.6],
"size_average": true
},
"inputs": ["output","label"]
}
]
},
"metrics": ["auc","accuracy"]
}
8 changes: 8 additions & 0 deletions autoML-demo/search_space.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"architecture.3.conf.dropout": { "_type": "uniform", "_value": [0, 0.999] },
"training_params.batch_size": { "_type": "choice", "_value": [32, 64, 128, 256] },
"training_params.optimizer.params.lr": { "_type": "loguniform", "_value": [0.0001, 0.01] },
"architecture.0.conf.word.dropout": { "_type": "uniform", "_value": [0, 0.999] },
"architecture.2.conf.dropout'": { "_type": "uniform", "_value": [0, 0.999] },
"training_params.lr_decay": { "_type": "uniform", "_value": [0.95, 1] }
}
Loading