Skip to content

Commit 2b29972

Browse files
committed
[SYSTEMDS-3840] Builtin scripts parameter consolidation
This commit consolidates the parameters of the builtin DML scripts and introduces a formatting sheme for the DML builtin parameters. Additionally, the changes were also applied to the Python API using the auto generator script, where I added a small fix, because some builtins were not parsed correctly. The changes in the builtin scripts arguments: order_rand -> Order search_iterations -> iter batch_size -> batchSize n_bins -> n ytest -> Ytest max_iter -> maxIter max_func_invoc, max_iterations -> maxIter include_mean -> includeMean icpt -> intercept maxi -> maxIteration maxii -> maxInnerIteration cMask -> ctypes is_verbose -> verbose k_max -> maxK kmax -> iter (start/end)_stepsize -> (start/end)Stepsize (start/end)_vicinity -> (start/end)Vicinity sim_seed -> seed k_value -> k CL_T -> ctypes trans_continuous -> tranCont select_k -> selectK k_min -> minK k_max -> maxK select_feature -> selectFeature feature_max -> maxFeatures feature_importance -> featureImportance predict_con_tg -> predictCont START_SELECTED -> initSelectFeature frequency_threshold -> frequencyThreshold distance_threshold -> distanceThreshold is_verbose -> verbose thresh -> threshold reduced_dims -> reducedDims max_iter -> maxIter is_verbose -> verbose print_iter -> printIter min_leaf -> minLeaf min_split -> minSplit num_trees -> numTrees max_depth -> maxDepth max_features -> maxFeatures max_values -> maxValues max_dataratio -> maxDataRatio sample_frac -> sampleFrac feature_frac -> featureFrac window_size -> windowSize sample_percent -> sampleFrac is_verbose -> verbose alpha -> lr batch_size -> batchSize learning_rate -> lr out_activation -> outActivation loss_fcn -> lossFn validation_split -> validationSplit types -> ctypes sml_type -> smlType num_trees -> numTrees learning_rate -> lr max_depth -> maxDepth lambda -> reg icpt -> intercept maxi, moi, iter -> maxIter mii -> maxInnerIter epsilon -> tol maxIterations -> maxIter maxii -> maxInnerIter eps -> tol max_iter, iterations -> maxIter is_verbose -> verbose avg_sample_size_per_centroid -> avgSampleSizePerCentroid space_decomp -> spaceDecomp n_components -> nComponents init_param -> initParams reg_covar -> reg rnk -> rank eps -> tolerance maxi -> maxIter dth, thr -> threshold maxi -> maxIter dth, thr -> threshold maxi -> maxIter
1 parent 1609470 commit 2b29972

File tree

353 files changed

+2676
-2170
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

353 files changed

+2676
-2170
lines changed

scripts/builtin/abstain.dml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ return (Matrix[Double] Xout, Matrix[Double] Yout)
4343
Yout = Y
4444
if(min(Y) != max(Y) & max(Y) <= 2)
4545
{
46-
betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose)
46+
betas = multiLogReg(X=X, Y=Y, intercept=1, reg=1e-4, maxIter=100, maxInnerIter=0, verbose=verbose)
4747
[prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
4848

4949
inc = ((yhat != Y) & (rowMaxs(prob) > threshold))

scripts/builtin/adasyn.dml

+10-10
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,22 @@
2424
#
2525
# INPUT:
2626
# --------------------------------------------------------------------------------------
27-
# X Feature matrix [shape: n-by-m]
28-
# Y Class labels [shape: n-by-1]
29-
# k Number of nearest neighbors
30-
# beta Desired balance level after generation of synthetic data [0, 1]
31-
# dth Distribution threshold
32-
# seed Seed for randomized data point selection
27+
# X Feature matrix [shape: n-by-m]
28+
# Y Class labels [shape: n-by-1]
29+
# k Number of nearest neighbors
30+
# beta Desired balance level after generation of synthetic data [0, 1]
31+
# threshold Distribution threshold
32+
# seed Seed for randomized data point selection
3333
# --------------------------------------------------------------------------------------
3434
#
3535
# OUTPUT:
3636
# -------------------------------------------------------------------------------------
37-
# Xp Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
38-
# Yp Class labels aligned with output X
37+
# Xp Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
38+
# Yp Class labels aligned with output X
3939
# -------------------------------------------------------------------------------------
4040

4141
m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
42-
Double beta = 1.0, Double dth = 0.9, Integer seed = -1)
42+
Double beta = 1.0, Double threshold = 0.9, Integer seed = -1)
4343
return (Matrix[Double] Xp, Matrix[Double] Yp)
4444
{
4545
if(k < 1) {
@@ -60,7 +60,7 @@ m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
6060
# Check if imbalance is lower than predefined threshold
6161
print("ADASYN: class imbalance: " + d)
6262

63-
if(d >= dth) {
63+
if(d >= threshold) {
6464
stop("ADASYN: Class imbalance not large enough.")
6565
}
6666

scripts/builtin/als.dml

+24-24
Original file line numberDiff line numberDiff line change
@@ -25,41 +25,41 @@
2525
#
2626
# INPUT:
2727
# -------------------------------------------------------------------------------------------
28-
# X Location to read the input matrix X to be factorized
29-
# rank Rank of the factorization
30-
# regType Regularization:
31-
# "L2" = L2 regularization;
32-
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
33-
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
34-
# "wL2" = weighted L2 regularization
35-
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
36-
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
37-
# + sum (V ^ 2 * col_nonzeros))
38-
# reg Regularization parameter, no regularization if 0.0
39-
# maxi Maximum number of iterations
40-
# check Check for convergence after every iteration, i.e., updating U and V once
41-
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
42-
# if the decrease in loss in any two consecutive iterations falls below this threshold;
43-
# if check is FALSE thr is ignored
44-
# seed The seed to random parts of the algorithm
45-
# verbose If the algorithm should run verbosely
28+
# X Location to read the input matrix X to be factorized
29+
# rank Rank of the factorization
30+
# regType Regularization:
31+
# "L2" = L2 regularization;
32+
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
33+
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
34+
# "wL2" = weighted L2 regularization
35+
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
36+
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
37+
# + sum (V ^ 2 * col_nonzeros))
38+
# reg Regularization parameter, no regularization if 0.0
39+
# maxIter Maximum number of iterations
40+
# check Check for convergence after every iteration, i.e., updating U and V once
41+
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
42+
# if the decrease in loss in any two consecutive iterations falls below this threshold;
43+
# if check is FALSE thr is ignored
44+
# seed The seed to random parts of the algorithm
45+
# verbose If the algorithm should run verbosely
4646
# -------------------------------------------------------------------------------------------
4747
#
4848
# OUTPUT:
4949
# -------------------------------------------------------------------------------------------
50-
# U An m x r matrix where r is the factorization rank
51-
# V An m x r matrix where r is the factorization rank
50+
# U An m x r matrix where r is the factorization rank
51+
# V An m x r matrix where r is the factorization rank
5252
# -------------------------------------------------------------------------------------------
5353

5454
m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
55-
Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
55+
Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
5656
return (Matrix[Double] U, Matrix[Double] V)
5757
{
5858
N = 10000; # for large problems, use scalable alsCG
5959
if( reg != "L2" | nrow(X) > N | ncol(X) > N )
6060
[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg,
61-
maxi=maxi, check=check, thr=thr, seed = seed, verbose=verbose);
61+
maxIter=maxIter, check=check, threshold=threshold, seed = seed, verbose=verbose);
6262
else
63-
[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi,
64-
check=check, thr=thr, seed =seed, verbose=verbose);
63+
[U, V] = alsDS(X=X, rank=rank, reg=reg, maxIter=maxIter,
64+
check=check, threshold=threshold, seed =seed, verbose=verbose);
6565
}

scripts/builtin/alsCG.dml

+25-24
Original file line numberDiff line numberDiff line change
@@ -25,38 +25,39 @@
2525
#
2626
# INPUT:
2727
# --------------------------------------------------------------------------------------------
28-
# X Location to read the input matrix X to be factorized
29-
# rank Rank of the factorization
30-
# regType Regularization:
31-
# "L2" = L2 regularization;
32-
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
33-
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
34-
# "wL2" = weighted L2 regularization
35-
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
36-
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
37-
# + sum (V ^ 2 * col_nonzeros))
38-
# reg Regularization parameter, no regularization if 0.0
39-
# maxi Maximum number of iterations
40-
# check Check for convergence after every iteration, i.e., updating U and V once
41-
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
42-
# if the decrease in loss in any two consecutive iterations falls below this threshold;
43-
# if check is FALSE thr is ignored
44-
# seed The seed to random parts of the algorithm
45-
# verbose If the algorithm should run verbosely
28+
# X Location to read the input matrix X to be factorized
29+
# rank Rank of the factorization
30+
# regType Regularization:
31+
# "L2" = L2 regularization;
32+
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
33+
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
34+
# "wL2" = weighted L2 regularization
35+
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
36+
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
37+
# + sum (V ^ 2 * col_nonzeros))
38+
# reg Regularization parameter, no regularization if 0.0
39+
# maxIter Maximum number of iterations
40+
# check Check for convergence after every iteration, i.e., updating U and V once
41+
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
42+
# if the decrease in loss in any two consecutive iterations falls below this threshold;
43+
# if check is FALSE threshold is ignored
44+
# seed The seed to random parts of the algorithm
45+
# verbose If the algorithm should run verbosely
4646
# --------------------------------------------------------------------------------------------
4747
#
4848
# OUTPUT:
4949
# --------------------------------------------------------------------------------------------
50-
# U An m x r matrix where r is the factorization rank
51-
# V An m x r matrix where r is the factorization rank
50+
# U An m x r matrix where r is the factorization rank
51+
# V An m x r matrix where r is the factorization rank
5252
# --------------------------------------------------------------------------------------------
5353

54-
m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50,
55-
Boolean check = TRUE, Double thr = 0.0001, Integer seed = 132521, Boolean verbose = TRUE)
54+
m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
55+
Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 132521,
56+
Boolean verbose = TRUE)
5657
return (Matrix[Double] U, Matrix[Double] V)
5758
{
5859
r = rank;
59-
max_iter = maxi;
60+
max_iter = maxIter;
6061

6162
###### MAIN PART ######
6263
m = nrow (X);
@@ -149,7 +150,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", D
149150
loss_dec = (loss_init - loss_cur) / loss_init;
150151
if( verbose )
151152
print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
152-
if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
153+
if( loss_dec >= 0 & loss_dec < threshold | loss_init == 0 ) {
153154
if( verbose )
154155
print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
155156
converged = TRUE;

scripts/builtin/alsDS.dml

+15-15
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,30 @@
2626
#
2727
# INPUT:
2828
# -------------------------------------------------------------------------------------------
29-
# X Location to read the input matrix V to be factorized
30-
# rank Rank of the factorization
31-
# reg Regularization parameter, no regularization if 0.0
32-
# maxi Maximum number of iterations
33-
# check Check for convergence after every iteration, i.e., updating L and R once
34-
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
35-
# if the decrease in loss in any two consecutive iterations falls below this threshold;
36-
# if check is FALSE thr is ignored
37-
# seed The seed to random parts of the algorithm
38-
# verbose If the algorithm should run verbosely
29+
# X Location to read the input matrix V to be factorized
30+
# rank Rank of the factorization
31+
# reg Regularization parameter, no regularization if 0.0
32+
# maxIter Maximum number of iterations
33+
# check Check for convergence after every iteration, i.e., updating L and R once
34+
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
35+
# if the decrease in loss in any two consecutive iterations falls below this threshold;
36+
# if check is FALSE threshold is ignored
37+
# seed The seed to random parts of the algorithm
38+
# verbose If the algorithm should run verbosely
3939
# -------------------------------------------------------------------------------------------
4040
#
4141
# OUTPUT:
4242
# -------------------------------------------------------------------------------------------
43-
# U An m x r matrix where r is the factorization rank
44-
# V An m x r matrix where r is the factorization rank
43+
# U An m x r matrix where r is the factorization rank
44+
# V An m x r matrix where r is the factorization rank
4545
# -------------------------------------------------------------------------------------------
4646

4747
m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
48-
Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
48+
Integer maxIter = 50, Boolean check = FALSE, Double threshold = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
4949
return (Matrix[Double] U, Matrix[Double] V)
5050
{
5151
r = rank;
52-
max_iter = maxi;
52+
max_iter = maxIter;
5353

5454
# check the input matrix V, if some rows or columns contain only zeros remove them from V
5555
X_nonzero_ind = X != 0;
@@ -128,7 +128,7 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
128128
loss_dec = (loss_init - loss_cur) / loss_init;
129129
if( verbose )
130130
print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
131-
if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
131+
if (loss_dec >= 0 & loss_dec < threshold | loss_init == 0) {
132132
if( verbose )
133133
print ("----- ALS converged after " + it + " iterations!");
134134
converged = TRUE;

scripts/builtin/arima.dml

+8-8
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@
2424
# INPUT:
2525
# ------------------------------------------------------------------------------------------
2626
# X The input Matrix to apply Arima on.
27-
# max_func_invoc ---
27+
# maxIter max_func_invoc
2828
# p non-seasonal AR order
2929
# d non-seasonal differencing order
3030
# q non-seasonal MA order
3131
# P seasonal AR order
3232
# D seasonal differencing order
3333
# Q seasonal MA order
3434
# s period in terms of number of time-steps
35-
# include_mean center to mean 0, and include in result
35+
# includeMean center to mean 0, and include in result
3636
# solver solver, is either "cg" or "jacobi"
3737
# ------------------------------------------------------------------------------------------
3838
#
@@ -41,23 +41,23 @@
4141
# best_point The calculated coefficients
4242
# ----------------------------------------------------------------------------------------
4343

44-
m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
44+
m_arima = function(Matrix[Double] X, Integer maxIter=1000, Integer p=0,
4545
Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
46-
Boolean include_mean=FALSE, String solver="jacobi")
46+
Boolean includeMean=FALSE, String solver="jacobi")
4747
return (Matrix[Double] best_point)
4848
{
4949
totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
5050
#print ("totcols=" + totcols)
5151

52-
#TODO: check max_func_invoc < totcols --> print warning (stop here ??)
52+
#TODO: check maxIter < totcols --> print warning (stop here ??)
5353

5454
num_rows = nrow(X)
5555
#print("nrows of X: " + num_rows)
5656
if(num_rows <= d)
5757
print("non-seasonal differencing order should be smaller than length of the time-series")
5858

5959
mu = 0.0
60-
if(include_mean == 1){
60+
if(includeMean == 1){
6161
mu = mean(X)
6262
X = X - mu
6363
}
@@ -117,7 +117,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
117117
tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
118118

119119
continue = TRUE
120-
while(continue & num_func_invoc <= max_func_invoc){
120+
while(continue & num_func_invoc <= maxIter){
121121
best_index = as.scalar(rowIndexMin(objvals))
122122
worst_index = as.scalar(rowIndexMax(objvals))
123123

@@ -170,7 +170,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
170170
}
171171

172172
best_point = simplex[,best_index]
173-
if(include_mean)
173+
if(includeMean)
174174
best_point = rbind(best_point, as.matrix(mu))
175175
}
176176

0 commit comments

Comments
 (0)