-
Notifications
You must be signed in to change notification settings - Fork 40
/
optimization_driver.m
120 lines (106 loc) · 4.77 KB
/
optimization_driver.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
%% driver for statistical and systems heterogeneity experiments
%% load dataset
datarepo = 'data/'; % location of data folder
dataset = 'small'; % small test dataset
load([datarepo dataset]); % load data
%% set parameters
addpath('opt/'); addpath('util/'); % add helper functions
training_percent = 0.75; % percentage of data for training
[Xtrain, Ytrain, Xtest, Ytest] = split_data(X, Y, training_percent);
opts.obj='C'; % classification
opts.avg = 1; % compute average error
opts.sys_het = 0; % run systems (1) or stats heterogeneity exps (0)
opts.top = 1.0; % highest number of rounds
opts.bottom = 0.1; % lowest number of rounds
lambda = 1e-4;
%% mocha [need to tune sdca_frac]
opts.mocha_outer_iters = 1;
opts.mocha_inner_iters = 2000;
opts.mocha_sdca_frac = 1.0;
opts.w_update = 1; % just do a single w-update
[rmse_mocha, primal_mocha, dual_mocha] = run_mocha(Xtrain, Ytrain, Xtest, Ytest, lambda, opts);
%% cocoa [need to tune sdca_frac]
opts.cocoa_outer_iters = 1;
opts.cocoa_inner_iters = 500;
opts.theta = 0.5;
[rmse_cocoa, primal_cocoa, dual_cocoa, max_its] = run_cocoa(Xtrain, Ytrain, Xtest, Ytest, lambda, opts);
%% mbsdca [need to tune sdca_frac, scaling]
opts.mbsdca_outer_iters = 1;
opts.mbsdca_inner_iters = 5000;
opts.mbsdca_sdca_frac = 0.5;
opts.mbsdca_scaling = 10;
[rmse_mbsdca, primal_mbsdca, dual_mbsdca] = run_mbsdca(Xtrain, Ytrain, Xtest, Ytest, lambda, opts);
%% mbsgd [need to tune sgd_frac, scaling]
opts.mbsgd_outer_iters = 1;
opts.mbsgd_inner_iters = 5000;
opts.mbsgd_sgd_frac = 0.5;
opts.mbsgd_scaling = 0.1;
[rmse_mbsgd, primal_mbsgd] = run_mbsgd(Xtrain, Ytrain, Xtest, Ytest, lambda, opts);
if(opts.sys_het)
%% plot systems heterogeneity
% note: ensure methods have reached global optimal or enter manually
optimal = min([primal_mocha; primal_cocoa; primal_mbsdca; primal_mbsgd]);
%% calculate estimated time
comm_cost = 100; % communication cost: Wifi=10, LTE=100, 3G=1000
train_n = 0;
for t=1:length(Xtrain)
train_n = train_n + size(Xtrain{t},1);
end
%% calculate time based on flops and communication cost
local_mocha_time = 8 * opts.top * train_n + comm_cost;
local_mbsdca_time = 6 * opts.top * train_n + comm_cost;
local_mbsgd_time = 4 * opts.top * train_n + comm_cost;
mocha_time = 1:local_mocha_time:local_mocha_time*length(primal_mocha);
cocoa_time = cumsum(max_its .* (8 * length(Xtrain)) + comm_cost);
mbsdca_time = 1:local_mbsdca_time:local_mbsdca_time*length(primal_mbsdca);
mbsgd_time = 1:local_mbsgd_time:local_mbsgd_time*length(primal_mbsgd);
%% plot results
figure;
step = 100;
semilogy(mocha_time(1:step:end), primal_mocha(1:step:end) - optimal, 'LineWidth', 6)
hold on;
semilogy(cocoa_time(1:step:end), primal_cocoa(1:step:end) - optimal, 'LineWidth', 6)
hold on;
semilogy(mbsdca_time(1:step:end), primal_mbsdca(1:step:end) - optimal, 'LineWidth', 6)
hold on;
semilogy(mbsgd_time(1:step:end), primal_mbsgd(1:step:end) - optimal, 'LineWidth', 6)
title([dataset ': Systems Heterogeneity'])
xlabel('Estimated Time')
ylabel('Primal Sub-Optimality')
set(gca, 'fontsize', 16)
legend({'MOCHA', 'CoCoA', 'Mb-SDCA', 'Mb-SGD'})
axis([0 8000000 .001 100]) % set manually
else
%% plot statistical heterogeneity
% note: ensure methods have reached global optimal or enter manually
optimal = min([primal_mocha; primal_cocoa; primal_mbsdca; primal_mbsgd]);
%% calculate time based on flops and communication cost
train_n = 0;
for t=1:length(Xtrain)
train_n = train_n + size(Xtrain{t},1);
end
comm_cost = 100; % communication cost: Wifi=10, LTE=100, 3G=1000
local_mocha_time = 8 * opts.mocha_sdca_frac * train_n + comm_cost;
local_mbsdca_time = 6 * opts.mbsdca_sdca_frac * train_n + comm_cost;
local_mbsgd_time = 4 * opts.mbsgd_sgd_frac * train_n + comm_cost;
cocoa_time = cumsum(max_its .* (8 * length(Xtrain)) + comm_cost);
mocha_time = 1:local_mocha_time:local_mocha_time*length(primal_mocha);
mbsdca_time = 1:local_mbsdca_time:local_mbsdca_time*length(primal_mbsdca);
mbsgd_time = 1:local_mbsgd_time:local_mbsgd_time*length(primal_mbsgd);
%% plot results
figure;
step = 100;
semilogy(mocha_time(1:step:end), primal_mocha(1:step:end) - optimal, 'LineWidth', 6)
hold on;
semilogy(cocoa_time, primal_cocoa - optimal, 'LineWidth', 6)
hold on;
semilogy(mbsdca_time(1:step:end), primal_mbsdca(1:step:end) - optimal, 'LineWidth', 6)
hold on;
semilogy(mbsgd_time(1:step:end), primal_mbsgd(1:step:end) - optimal, 'LineWidth', 6)
title([dataset ': Statistical Heterogeneity'])
xlabel('Estimated Time')
ylabel('Primal Sub-Optimality')
set(gca, 'fontsize', 16)
legend({'MOCHA', 'CoCoA', 'Mb-SDCA', 'Mb-SGD'})
axis([0 7000000 .001 100]) % set manually
end