Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYSTEMDS-3840] Builtin scripts parameter consolidation #2228

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion scripts/builtin/abstain.dml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ return (Matrix[Double] Xout, Matrix[Double] Yout)
Yout = Y
if(min(Y) != max(Y) & max(Y) <= 2)
{
betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose)
betas = multiLogReg(X=X, Y=Y, intercept=1, reg=1e-4, maxIter=100, maxInnerIter=0, verbose=verbose)
[prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)

inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
Expand Down
20 changes: 10 additions & 10 deletions scripts/builtin/adasyn.dml
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,22 @@
#
# INPUT:
# --------------------------------------------------------------------------------------
# X Feature matrix [shape: n-by-m]
# Y Class labels [shape: n-by-1]
# k Number of nearest neighbors
# beta Desired balance level after generation of synthetic data [0, 1]
# dth Distribution threshold
# seed Seed for randomized data point selection
# X Feature matrix [shape: n-by-m]
# Y Class labels [shape: n-by-1]
# k Number of nearest neighbors
# beta Desired balance level after generation of synthetic data [0, 1]
# threshold Distribution threshold
# seed Seed for randomized data point selection
# --------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------
# Xp Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
# Yp Class labels aligned with output X
# Xp Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
# Yp Class labels aligned with output X
# -------------------------------------------------------------------------------------

m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
Double beta = 1.0, Double dth = 0.9, Integer seed = -1)
Double beta = 1.0, Double threshold = 0.9, Integer seed = -1)
return (Matrix[Double] Xp, Matrix[Double] Yp)
{
if(k < 1) {
Expand All @@ -60,7 +60,7 @@ m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
# Check if imbalance is lower than predefined threshold
print("ADASYN: class imbalance: " + d)

if(d >= dth) {
if(d >= threshold) {
stop("ADASYN: Class imbalance not large enough.")
}

Expand Down
48 changes: 24 additions & 24 deletions scripts/builtin/als.dml
Original file line number Diff line number Diff line change
Expand Up @@ -25,41 +25,41 @@
#
# INPUT:
# -------------------------------------------------------------------------------------------
# X Location to read the input matrix X to be factorized
# rank Rank of the factorization
# regType Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
# reg Regularization parameter, no regularization if 0.0
# maxi Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating U and V once
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# X Location to read the input matrix X to be factorized
# rank Rank of the factorization
# regType Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
# reg Regularization parameter, no regularization if 0.0
# maxIter Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating U and V once
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# -------------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------------
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# -------------------------------------------------------------------------------------------

m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
N = 10000; # for large problems, use scalable alsCG
if( reg != "L2" | nrow(X) > N | ncol(X) > N )
[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg,
maxi=maxi, check=check, thr=thr, seed = seed, verbose=verbose);
maxIter=maxIter, check=check, threshold=threshold, seed = seed, verbose=verbose);
else
[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi,
check=check, thr=thr, seed =seed, verbose=verbose);
[U, V] = alsDS(X=X, rank=rank, reg=reg, maxIter=maxIter,
check=check, threshold=threshold, seed =seed, verbose=verbose);
}
49 changes: 25 additions & 24 deletions scripts/builtin/alsCG.dml
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,39 @@
#
# INPUT:
# --------------------------------------------------------------------------------------------
# X Location to read the input matrix X to be factorized
# rank Rank of the factorization
# regType Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
# reg Regularization parameter, no regularization if 0.0
# maxi Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating U and V once
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# X Location to read the input matrix X to be factorized
# rank Rank of the factorization
# regType Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
# reg Regularization parameter, no regularization if 0.0
# maxIter Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating U and V once
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE threshold is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# --------------------------------------------------------------------------------------------
#
# OUTPUT:
# --------------------------------------------------------------------------------------------
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# --------------------------------------------------------------------------------------------

m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50,
Boolean check = TRUE, Double thr = 0.0001, Integer seed = 132521, Boolean verbose = TRUE)
m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 132521,
Boolean verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
r = rank;
max_iter = maxi;
max_iter = maxIter;

###### MAIN PART ######
m = nrow (X);
Expand Down Expand Up @@ -149,7 +150,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", D
loss_dec = (loss_init - loss_cur) / loss_init;
if( verbose )
print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
if( loss_dec >= 0 & loss_dec < threshold | loss_init == 0 ) {
if( verbose )
print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
converged = TRUE;
Expand Down
30 changes: 15 additions & 15 deletions scripts/builtin/alsDS.dml
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,30 @@
#
# INPUT:
# -------------------------------------------------------------------------------------------
# X Location to read the input matrix V to be factorized
# rank Rank of the factorization
# reg Regularization parameter, no regularization if 0.0
# maxi Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating L and R once
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# X Location to read the input matrix V to be factorized
# rank Rank of the factorization
# reg Regularization parameter, no regularization if 0.0
# maxIter Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating L and R once
# threshold Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE threshold is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# -------------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------------
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# -------------------------------------------------------------------------------------------

m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
Integer maxIter = 50, Boolean check = FALSE, Double threshold = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
r = rank;
max_iter = maxi;
max_iter = maxIter;

# check the input matrix V, if some rows or columns contain only zeros remove them from V
X_nonzero_ind = X != 0;
Expand Down Expand Up @@ -128,7 +128,7 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
loss_dec = (loss_init - loss_cur) / loss_init;
if( verbose )
print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
if (loss_dec >= 0 & loss_dec < threshold | loss_init == 0) {
if( verbose )
print ("----- ALS converged after " + it + " iterations!");
converged = TRUE;
Expand Down
16 changes: 8 additions & 8 deletions scripts/builtin/arima.dml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
# INPUT:
# ------------------------------------------------------------------------------------------
# X The input Matrix to apply Arima on.
# max_func_invoc ---
# maxIter max_func_invoc
# p non-seasonal AR order
# d non-seasonal differencing order
# q non-seasonal MA order
# P seasonal AR order
# D seasonal differencing order
# Q seasonal MA order
# s period in terms of number of time-steps
# include_mean center to mean 0, and include in result
# includeMean center to mean 0, and include in result
# solver solver, is either "cg" or "jacobi"
# ------------------------------------------------------------------------------------------
#
Expand All @@ -41,23 +41,23 @@
# best_point The calculated coefficients
# ----------------------------------------------------------------------------------------

m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
m_arima = function(Matrix[Double] X, Integer maxIter=1000, Integer p=0,
Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
Boolean include_mean=FALSE, String solver="jacobi")
Boolean includeMean=FALSE, String solver="jacobi")
return (Matrix[Double] best_point)
{
totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
#print ("totcols=" + totcols)

#TODO: check max_func_invoc < totcols --> print warning (stop here ??)
#TODO: check maxIter < totcols --> print warning (stop here ??)

num_rows = nrow(X)
#print("nrows of X: " + num_rows)
if(num_rows <= d)
print("non-seasonal differencing order should be smaller than length of the time-series")

mu = 0.0
if(include_mean == 1){
if(includeMean == 1){
mu = mean(X)
X = X - mu
}
Expand Down Expand Up @@ -117,7 +117,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])

continue = TRUE
while(continue & num_func_invoc <= max_func_invoc){
while(continue & num_func_invoc <= maxIter){
best_index = as.scalar(rowIndexMin(objvals))
worst_index = as.scalar(rowIndexMax(objvals))

Expand Down Expand Up @@ -170,7 +170,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
}

best_point = simplex[,best_index]
if(include_mean)
if(includeMean)
best_point = rbind(best_point, as.matrix(mu))
}

Expand Down
Loading
Loading