-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathbenchmarks.yml
100 lines (95 loc) · 3.27 KB
/
benchmarks.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
---
common_options: &common_options
output:
- [throughput, 'throughput']
- [loss, 'loss']
data:
throughput:
regexp: 'loss.*tput=([e\d\+\.]+)'
skip: 1
loss:
reduction_type: 'final'
regexp: 'loss=([\d\+\.e]+)'
skip: 1
config_options: &config_options
requirements_path: requirements.txt
required_apt_packages_path: required_apt_packages.txt
pytorch_fit_pretrain_gen_pod16:
<<: [*common_options, *config_options]
description: |
Frozen in time n-frame pretraining throughput test
using host generated data on 8IPUs.
cmd: >-
python run.py
--config_name configs/webvid2m-8ipu-1f.json
--trainer.epochs 5
--timestamp_ckpt False
--arch.args.load_checkpoint ""
--data_loader.training.dataset_name synthetic
--data_loader.training.data_dir ""
--data_loader.inference.dataset_name synthetic
--data_loader.inference.data_dir ""
--trainer.wandb False
pytorch_fit_1f_pretrain_real_pod16_conv:
<<: [*common_options, *config_options]
description: |
Frozen in time 1-frame pretraining on 8IPUs.
cmd: >-
poprun
-vv
--num-instances 1
--num-replicas 1
--ipus-per-replica 8
--vipu-partition $IPUOF_VIPU_API_PARTITION_ID
--vipu-allocation=$VIPU_ALLOCATION_ID
--vipu-server-host $VIPU_CLI_API_HOST
--executable-cache-path ./exps/exe_cache
python run.py
--config_name configs/webvid2m-8ipu-1f.json
--timestamp_ckpt False
--arch.args.load_checkpoint ""
--data_loader.training.data_dir $DATASETS_DIR/WebVid
--data_loader.inference.data_dir $DATASETS_DIR/MSRVTT
--trainer.run_name pytorch_fit_1f_pretrain_real_pod16_conv
pytorch_fit_2f_pretrain_real_pod16_conv:
<<: [*common_options, *config_options]
description: |
Frozen in time 2-frame pretraining on 8IPUs.
cmd: >-
poprun
-vv
--num-instances 1
--num-replicas 1
--ipus-per-replica 8
--vipu-partition $IPUOF_VIPU_API_PARTITION_ID
--vipu-allocation=$VIPU_ALLOCATION_ID
--vipu-server-host $VIPU_CLI_API_HOST
--executable-cache-path ./exps/exe_cache
python run.py
--config_name configs/webvid2m-8ipu-2f.json
--timestamp_ckpt False
--arch.args.load_checkpoint exps/models/WebVid2M-IPU/webvid2m-8ipu-1f_model_best.pth
--data_loader.training.data_dir $DATASETS_DIR/WebVid
--data_loader.inference.data_dir $DATASETS_DIR/MSRVTT
--trainer.run_name pytorch_fit_2f_pretrain_real_pod16_conv
pytorch_fit_4f_pretrain_real_pod16_conv:
<<: [*common_options, *config_options]
description: |
Frozen in time 4-frame pretraining on 8IPUs.
cmd: >-
poprun
-vv
--num-instances 1
--num-replicas 1
--ipus-per-replica 8
--vipu-partition $IPUOF_VIPU_API_PARTITION_ID
--vipu-allocation=$VIPU_ALLOCATION_ID
--vipu-server-host $VIPU_CLI_API_HOST
--executable-cache-path ./exps/exe_cache
python run.py
--config_name configs/webvid2m-8ipu-4f.json
--timestamp_ckpt False
--arch.args.load_checkpoint exps/models/WebVid2M-IPU/webvid2m-8ipu-2f_model_best.pth
--data_loader.training.data_dir $DATASETS_DIR/WebVid
--data_loader.inference.data_dir $DATASETS_DIR/MSRVTT
--trainer.run_name pytorch_fit_4f_pretrain_real_pod16_conv