-
Notifications
You must be signed in to change notification settings - Fork 0
/
finetune.sh
164 lines (147 loc) · 5.89 KB
/
finetune.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env bash
# -*- coding: utf-8 -*-
##################################################################################################
# Example usage:
# bash finetune.sh model_name_or_path save_dir seed [optional args (see below)]
##################################################################################################
model_name_or_path=$1
save_dir=$2
seed=${3:-42}
is_encoder_decoder=${4:-False}
tie_encoder_decoder=${5:-False}
max_train_samples=${6:-1.0}
eval_runs_per_epoch=${7:-1} # for ablations
init_as_random=${8:-False}
data_dir=${9:-"resources/data/Topical-Chat/KGD"}
log_file=${10:-"$save_dir/finetune.log"}
# check if required args are provided
{ [ -z "$model_name_or_path" ] || [ -z "$save_dir" ]; } && echo "Usage: bash finetune.sh model_name_or_path save_dir [seed] [is_encoder_decoder] [tie_encoder_decoder]" && exit 1
# create save dir (will overwrite if exists)
rm -rf "$save_dir"
mkdir -p "$save_dir"
echo ""
echo -e "model_name_or_path:\t$model_name_or_path"
echo -e "seed:\t\t\t$seed"
echo -e "data_dir:\t\t$data_dir"
echo -e "save_dir:\t\t$save_dir"
echo -e "log_file:\t\t$log_file"
echo -e "is_encoder_decoder:\t$is_encoder_decoder"
echo -e "tie_encoder_decoder:\t$tie_encoder_decoder"
echo -e "max_train_samples:\t$max_train_samples"
echo -e "eval_runs_per_epoch:\t$eval_runs_per_epoch"
echo -e "init_as_random:\t\t$init_as_random"
if [[ $data_dir == *"KGD"* ]]; then
# alternative training sets for for investigations
if [[ $save_dir == *"_sub" ]]; then
train_file="$data_dir/train_39572.json"
elif [[ $save_dir == *"_max0qus" ]]; then
train_file="$data_dir/train_max0qus.json"
elif [[ $save_dir == *"_max1qus" ]]; then
train_file="$data_dir/train_max1qus.json"
elif [[ $save_dir == *"_max2qus" ]]; then
train_file="$data_dir/train_max2qus.json"
elif [[ $save_dir == *"_max3qus" ]]; then
train_file="$data_dir/train_max3qus.json"
elif [[ $save_dir == *"_max4qus" ]]; then
train_file="$data_dir/train_max4qus.json"
elif [[ $save_dir == *"_max5qus" ]]; then
train_file="$data_dir/train_max5qus.json"
else
train_file="$data_dir/train.json"
fi
validation_file="$data_dir/valid_freq.json"
test_file="$data_dir/test_freq.json"
knowledge_column="knowledge"
elif [[ $data_dir == *"TC"* ]]; then
train_file="$data_dir/train.json"
validation_file="$data_dir/valid_freq.json"
test_file="$data_dir/test_freq.json"
knowledge_column="none"
elif [[ $data_dir == *"CSD"* ]]; then
train_file="$data_dir/train.json"
validation_file="$data_dir/valid.json"
test_file="$data_dir/test.json"
knowledge_column="context"
elif [[ $data_dir == *"DD"* ]]; then
train_file="$data_dir/train.json"
validation_file="$data_dir/valid.json"
test_file="$data_dir/test.json"
knowledge_column="none"
else
echo "Invalid data_dir: $data_dir" && exit 1
fi
echo -e "train_file:\t\t$train_file"
echo -e "validation_file:\t$validation_file"
echo -e "test_file:\t\t$test_file"
echo -e "knowledge_column:\t\t$knowledge_column"
echo ""
python finetune.py \
--model_name_or_path "$model_name_or_path" \
--output_dir "$save_dir" \
--is_encoder_decoder "$is_encoder_decoder" \
--tie_encoder_decoder "$tie_encoder_decoder" \
--overwrite_output_dir True \
--train_file "$train_file" \
--validation_file "$validation_file" \
--test_file "$test_file" \
--text_column "turns" \
--summary_column "target" \
--knowledge_column "$knowledge_column" \
--overwrite_cache True \
--preprocessing_num_workers 1 \
--max_source_length 256 --max_target_length 64 \
--learning_rate 0.0000625 \
--num_beams 4 \
--num_train_epochs 10 \
--per_device_train_batch_size 20 \
--gradient_accumulation_steps 1 \
--seed "$seed" \
--fp16 \
--do_train --do_eval \
--save_strategy "steps" \
--evaluation_strategy "steps" --per_device_eval_batch_size 20 \
--eval_runs_per_epoch "$eval_runs_per_epoch" \
--write_intermediate_eval_results False --include_inputs_for_metrics False --predict_with_generate False \
--early_stopping False \
--max_train_samples "$max_train_samples" \
--load_best_model_at_end True --metric_for_best_model "loss" \
--init_as_random "$init_as_random" \
--report_to "wandb" | tee "$log_file"
# else
# echo "Running with validation strategy 'epoch'"
# echo -e "eval_runs_per_epoch:\t$eval_runs_per_epoch"
# echo ""
# # default fine-tuning settings
# python finetune.py \
# --model_name_or_path "$model_name_or_path" \
# --output_dir "$save_dir" \
# --is_encoder_decoder "$is_encoder_decoder" \
# --tie_encoder_decoder "$tie_encoder_decoder" \
# --overwrite_output_dir True \
# --train_file "$data_dir/train.json" \
# --validation_file "$data_dir/valid_freq.json" \
# --test_file "$data_dir/test_freq.json" \
# --text_column "turns" \
# --summary_column "target" \
# --knowledge_column "knowledge" \
# --overwrite_cache True \
# --preprocessing_num_workers 16 \
# --max_source_length 256 --max_target_length 64 \
# --learning_rate 0.0000625 \
# --num_beams 4 \
# --num_train_epochs 10 \
# --per_device_train_batch_size 20 \
# --gradient_accumulation_steps 1 \
# --seed "$seed" \
# --fp16 \
# --do_train --do_eval --do_predict \
# --save_strategy "epoch" \
# --evaluation_strategy "epoch" --per_device_eval_batch_size 20 \
# --eval_runs_per_epoch "$eval_runs_per_epoch" --write_intermediate_eval_results False \
# --include_inputs_for_metrics False \
# --early_stopping True \
# --predict_with_generate False \
# --max_train_samples "$max_train_samples" \
# --load_best_model_at_end True --metric_for_best_model "loss" \
# --report_to "wandb" | tee "$log_file"
# fi