diff --git a/scripts/builtin/abstain.dml b/scripts/builtin/abstain.dml
index 6e0cb516343..87d835525c8 100644
--- a/scripts/builtin/abstain.dml
+++ b/scripts/builtin/abstain.dml
@@ -43,7 +43,7 @@ return (Matrix[Double] Xout, Matrix[Double] Yout)
   Yout = Y
   if(min(Y) != max(Y) & max(Y) <= 2)
   {
-    betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose)
+    betas = multiLogReg(X=X, Y=Y, intercept=1, reg=1e-4, maxIter=100, maxInnerIter=0, verbose=verbose)
     [prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
     
     inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
diff --git a/scripts/builtin/adasyn.dml b/scripts/builtin/adasyn.dml
index 6424e5b1938..68b59a0c7e5 100644
--- a/scripts/builtin/adasyn.dml
+++ b/scripts/builtin/adasyn.dml
@@ -24,22 +24,22 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X        Feature matrix [shape: n-by-m]
-# Y        Class labels [shape: n-by-1]
-# k        Number of nearest neighbors
-# beta     Desired balance level after generation of synthetic data [0, 1]
-# dth      Distribution threshold
-# seed     Seed for randomized data point selection
+# X          Feature matrix [shape: n-by-m]
+# Y          Class labels [shape: n-by-1]
+# k          Number of nearest neighbors
+# beta       Desired balance level after generation of synthetic data [0, 1]
+# threshold  Distribution threshold
+# seed       Seed for randomized data point selection
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------
-# Xp       Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
-# Yp       Class labels aligned with output X
+# Xp         Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
+# Yp         Class labels aligned with output X
 # -------------------------------------------------------------------------------------
 
 m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
-  Double beta = 1.0, Double dth = 0.9, Integer seed = -1)
+  Double beta = 1.0, Double threshold = 0.9, Integer seed = -1)
   return (Matrix[Double] Xp, Matrix[Double] Yp)
 {
   if(k < 1) {
@@ -60,7 +60,7 @@ m_adasyn = function(Matrix[Double] X, Matrix[Double] Y, Integer k = 2,
   # Check if imbalance is lower than predefined threshold
   print("ADASYN: class imbalance: " + d)
 
-  if(d >= dth) {
+  if(d >= threshold) {
       stop("ADASYN: Class imbalance not large enough.")
   }
 
diff --git a/scripts/builtin/als.dml b/scripts/builtin/als.dml
index e7ab63f06b2..6c46d14d4be 100644
--- a/scripts/builtin/als.dml
+++ b/scripts/builtin/als.dml
@@ -25,41 +25,41 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# X        Location to read the input matrix X to be factorized
-# rank     Rank of the factorization
-# regType  Regularization: 
-#           "L2" = L2 regularization;
-#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                       + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
-#           "wL2" = weighted L2 regularization
-#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                       + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) 
-#                       + sum (V ^ 2 * col_nonzeros))
-# reg      Regularization parameter, no regularization if 0.0
-# maxi     Maximum number of iterations
-# check    Check for convergence after every iteration, i.e., updating U and V once
-# thr      Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-#          if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#          if check is FALSE thr is ignored
-# seed     The seed to random parts of the algorithm
-# verbose  If the algorithm should run verbosely
+# X              Location to read the input matrix X to be factorized
+# rank           Rank of the factorization
+# regType        Regularization:
+#                 "L2" = L2 regularization;
+#                    f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                             + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+#                   "wL2" = weighted L2 regularization
+#                    f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                             + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
+#                             + sum (V ^ 2 * col_nonzeros))
+# reg            Regularization parameter, no regularization if 0.0
+# maxIter        Maximum number of iterations
+# check          Check for convergence after every iteration, i.e., updating U and V once
+# threshold      Assuming check is set to TRUE, the algorithm stops and convergence is declared
+#                if the decrease in loss in any two consecutive iterations falls below this threshold;
+#                if check is FALSE thr is ignored
+# seed           The seed to random parts of the algorithm
+# verbose        If the algorithm should run verbosely
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------------
-# U     An m x r matrix where r is the factorization rank
-# V     An m x r matrix where r is the factorization rank
+# U              An m x r matrix where r is the factorization rank
+# V              An m x r matrix where r is the factorization rank
 # -------------------------------------------------------------------------------------------
 
 m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
-  Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
+  Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
   return (Matrix[Double] U, Matrix[Double] V)
 {
   N = 10000; # for large problems, use scalable alsCG
   if( reg != "L2" | nrow(X) > N | ncol(X) > N )
     [U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg,
-                   maxi=maxi, check=check, thr=thr, seed = seed, verbose=verbose);
+                   maxIter=maxIter, check=check, threshold=threshold, seed = seed, verbose=verbose);
   else
-    [U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi,
-                   check=check, thr=thr, seed =seed, verbose=verbose);
+    [U, V] = alsDS(X=X, rank=rank, reg=reg, maxIter=maxIter,
+                   check=check, threshold=threshold, seed =seed, verbose=verbose);
 }
diff --git a/scripts/builtin/alsCG.dml b/scripts/builtin/alsCG.dml
index 7c4f546656d..ac9e0391422 100644
--- a/scripts/builtin/alsCG.dml
+++ b/scripts/builtin/alsCG.dml
@@ -25,38 +25,39 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------------
-# X         Location to read the input matrix X to be factorized
-# rank      Rank of the factorization
-# regType   Regularization:
-#           "L2" = L2 regularization;
-#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                       + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
-#           "wL2" = weighted L2 regularization
-#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                       + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
-#                       + sum (V ^ 2 * col_nonzeros))
-# reg       Regularization parameter, no regularization if 0.0
-# maxi      Maximum number of iterations
-# check     Check for convergence after every iteration, i.e., updating U and V once
-# thr       Assuming check is set to TRUE, the algorithm stops and convergence is declared
-#           if the decrease in loss in any two consecutive iterations falls below this threshold;
-#           if check is FALSE thr is ignored
-# seed      The seed to random parts of the algorithm
-# verbose   If the algorithm should run verbosely
+# X              Location to read the input matrix X to be factorized
+# rank           Rank of the factorization
+# regType        Regularization:
+#                "L2" = L2 regularization;
+#                   f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                            + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+#                "wL2" = weighted L2 regularization
+#                   f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                            + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
+#                            + sum (V ^ 2 * col_nonzeros))
+# reg            Regularization parameter, no regularization if 0.0
+# maxIter        Maximum number of iterations
+# check          Check for convergence after every iteration, i.e., updating U and V once
+# threshold      Assuming check is set to TRUE, the algorithm stops and convergence is declared
+#                if the decrease in loss in any two consecutive iterations falls below this threshold;
+#                if check is FALSE threshold is ignored
+# seed           The seed to random parts of the algorithm
+# verbose        If the algorithm should run verbosely
 # --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # --------------------------------------------------------------------------------------------
-# U     An m x r matrix where r is the factorization rank
-# V     An m x r matrix where r is the factorization rank
+# U              An m x r matrix where r is the factorization rank
+# V              An m x r matrix where r is the factorization rank
 # --------------------------------------------------------------------------------------------
 
-m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50,
- Boolean check = TRUE, Double thr = 0.0001, Integer seed = 132521, Boolean verbose = TRUE)
+m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
+ Integer maxIter = 50, Boolean check = TRUE, Double threshold = 0.0001, Integer seed = 132521,
+ Boolean verbose = TRUE)
     return (Matrix[Double] U, Matrix[Double] V)
 {
   r = rank;
-  max_iter = maxi;
+  max_iter = maxIter;
 
   ###### MAIN PART ######
   m = nrow (X);
@@ -149,7 +150,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", D
       loss_dec = (loss_init - loss_cur) / loss_init;
       if( verbose )
         print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
-      if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
+      if( loss_dec >= 0 & loss_dec < threshold | loss_init == 0 ) {
         if( verbose )
           print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
         converged = TRUE;
diff --git a/scripts/builtin/alsDS.dml b/scripts/builtin/alsDS.dml
index 636f0ef4880..a3faac93515 100644
--- a/scripts/builtin/alsDS.dml
+++ b/scripts/builtin/alsDS.dml
@@ -26,30 +26,30 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# X        Location to read the input matrix V to be factorized
-# rank     Rank of the factorization
-# reg      Regularization parameter, no regularization if 0.0
-# maxi     Maximum number of iterations
-# check    Check for convergence after every iteration, i.e., updating L and R once
-# thr      Assuming check is set to TRUE, the algorithm stops and convergence is declared
-#          if the decrease in loss in any two consecutive iterations falls below this threshold;
-#          if check is FALSE thr is ignored
-# seed     The seed to random parts of the algorithm
-# verbose  If the algorithm should run verbosely
+# X              Location to read the input matrix V to be factorized
+# rank           Rank of the factorization
+# reg            Regularization parameter, no regularization if 0.0
+# maxIter        Maximum number of iterations
+# check          Check for convergence after every iteration, i.e., updating L and R once
+# threshold      Assuming check is set to TRUE, the algorithm stops and convergence is declared
+#                if the decrease in loss in any two consecutive iterations falls below this threshold;
+#                if check is FALSE threshold is ignored
+# seed           The seed to random parts of the algorithm
+# verbose        If the algorithm should run verbosely
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------------
-# U     An m x r matrix where r is the factorization rank
-# V     An m x r matrix where r is the factorization rank
+# U              An m x r matrix where r is the factorization rank
+# V              An m x r matrix where r is the factorization rank
 # -------------------------------------------------------------------------------------------
 
 m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001, 
-  Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
+  Integer maxIter = 50, Boolean check = FALSE, Double threshold = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
   return (Matrix[Double] U, Matrix[Double] V)
 {
   r = rank;
-  max_iter = maxi;
+  max_iter = maxIter;
 
   # check the input matrix V, if some rows or columns contain only zeros remove them from V
   X_nonzero_ind = X != 0;
@@ -128,7 +128,7 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
       loss_dec = (loss_init - loss_cur) / loss_init;
       if( verbose )
         print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
-      if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+      if (loss_dec >= 0 & loss_dec < threshold | loss_init == 0) {
         if( verbose )
           print ("----- ALS converged after " + it + " iterations!");
         converged = TRUE;
diff --git a/scripts/builtin/arima.dml b/scripts/builtin/arima.dml
index f2645fcbc4c..26cf395c265 100644
--- a/scripts/builtin/arima.dml
+++ b/scripts/builtin/arima.dml
@@ -24,7 +24,7 @@
 # INPUT:
 # ------------------------------------------------------------------------------------------
 # X                 The input Matrix to apply Arima on.
-# max_func_invoc    ---
+# maxIter           max_func_invoc
 # p                 non-seasonal AR order
 # d                 non-seasonal differencing order
 # q                 non-seasonal MA order
@@ -32,7 +32,7 @@
 # D                 seasonal differencing order
 # Q                 seasonal MA order
 # s                 period in terms of number of time-steps
-# include_mean      center to mean 0, and include in result
+# includeMean      center to mean 0, and include in result
 # solver            solver, is either "cg" or "jacobi"
 # ------------------------------------------------------------------------------------------
 #
@@ -41,15 +41,15 @@
 # best_point   The calculated coefficients
 # ----------------------------------------------------------------------------------------
 
-m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
+m_arima = function(Matrix[Double] X, Integer maxIter=1000, Integer p=0,
   Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
-  Boolean include_mean=FALSE, String solver="jacobi")
+  Boolean includeMean=FALSE, String solver="jacobi")
   return (Matrix[Double] best_point)
 {
   totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
   #print ("totcols=" + totcols)
 
-  #TODO: check max_func_invoc < totcols --> print warning (stop here ??)
+  #TODO: check maxIter < totcols --> print warning (stop here ??)
 
   num_rows = nrow(X)
   #print("nrows of X: " + num_rows)
@@ -57,7 +57,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
     print("non-seasonal differencing order should be smaller than length of the time-series")
 
   mu = 0.0
-  if(include_mean == 1){
+  if(includeMean == 1){
     mu = mean(X)
     X = X - mu
   }
@@ -117,7 +117,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
   tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
 
   continue = TRUE
-  while(continue & num_func_invoc <= max_func_invoc){
+  while(continue & num_func_invoc <= maxIter){
     best_index = as.scalar(rowIndexMin(objvals))
     worst_index = as.scalar(rowIndexMax(objvals))
 
@@ -170,7 +170,7 @@ m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
   }
 
   best_point = simplex[,best_index]
-  if(include_mean)
+  if(includeMean)
     best_point = rbind(best_point, as.matrix(mu))
 }
 
diff --git a/scripts/builtin/autoencoder_2layer.dml b/scripts/builtin/autoencoder_2layer.dml
index ae2d30e67a8..e5822849d83 100644
--- a/scripts/builtin/autoencoder_2layer.dml
+++ b/scripts/builtin/autoencoder_2layer.dml
@@ -30,20 +30,21 @@
 # INPUT:
 # ---------------------------------------------------------------------------------------------
 # X             Filename where the input is stored
-# num_hidden1   Number of neurons in the 1st hidden layer
-# num_hidden2   Number of neurons in the 2nd hidden layer
-# max_epochs    Number of epochs to train for
-# full_obj      If TRUE, Computes objective function value (squared-loss)
+# numHidden1   Number of neurons in the 1st hidden layer
+# numHidden2   Number of neurons in the 2nd hidden layer
+# epochs        Number of epochs to train for
+# fullObj      If TRUE, Computes objective function value (squared-loss)
 #               at the end of each epoch. Note that, computing the full 
 #               objective can take a lot of time. 
-# batch_size    Mini-batch size (training parameter)
+# batchSize     Mini-batch size (training parameter)
 # step          Initial step size (training parameter)
 # decay         Decays step size after each epoch (training parameter)
 # mu            Momentum parameter (training parameter)
-# W1_rand       Weights might be initialized via input matrices
-# W2_rand       --- 
-# W3_rand       ---
-# W4_rand       ---
+# W1            Weights might be initialized via input matrices
+# W2            ---
+# W3            ---
+# W4            ---
+# Order         ---
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -59,21 +60,21 @@
 # HIDDEN   Matrix storing the hidden (2nd) layer representation if needed
 # ----------------------------------------------------------------------------------------------------
 
-m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer num_hidden2, Integer max_epochs,
-  Boolean full_obj = FALSE, Integer batch_size = 256, Double step = 1e-5, Double decay = 0.95, Double mu = 0.9,
-  Matrix[Double] W1_rand = matrix(0, rows=0, cols=0), Matrix[Double] W2_rand = matrix(0, rows=0, cols=0), 
-  Matrix[Double] W3_rand = matrix(0, rows=0, cols=0), Matrix[Double] W4_rand = matrix(0, rows=0, cols=0), 
-  Matrix[Double] order_rand = matrix(0, rows=0, cols=0))
+m_autoencoder_2layer = function(Matrix[Double] X, Integer numHidden1, Integer numHidden2, Integer epochs,
+  Boolean fullObj = FALSE, Integer batchSize = 256, Double step = 1e-5, Double decay = 0.95, Double mu = 0.9,
+  Matrix[Double] W1 = matrix(0, rows=0, cols=0), Matrix[Double] W2 = matrix(0, rows=0, cols=0), 
+  Matrix[Double] W3 = matrix(0, rows=0, cols=0), Matrix[Double] W4 = matrix(0, rows=0, cols=0), 
+  Matrix[Double] Order = matrix(0, rows=0, cols=0))
   return(Matrix[Double] W1, Matrix[Double] b1, Matrix[Double] W2, Matrix[Double] b2,
     Matrix[Double] W3, Matrix[Double] b3, Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] reordered_H)
 {
   n = nrow(X)
   m = ncol(X)
   #randomly reordering rows
-  if(nrow(order_rand) == 0 & ncol(order_rand) == 0) 
+  if(nrow(Order) == 0 & ncol(Order) == 0)
     permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1, pdf="uniform"), by=1, index.return=TRUE), n, n)
   else 
-    permut = table(seq(1,n,1), order(target=order_rand, by=1, index.return=TRUE), n, n)
+    permut = table(seq(1,n,1), order(target=Order, by=1, index.return=TRUE), n, n)
   X = permut %*% X
 
   #z-transform, whitening operator is better
@@ -81,22 +82,22 @@ m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer n
   stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17
   X = (X - means)/stds
   
-  if(nrow(W1_rand) == 0 & ncol(W1_rand) == 0) 
-    W1_rand = Rand(rows=num_hidden1, cols=m, min=-1, max=1, pdf="uniform")
-  if(nrow(W2_rand) == 0 & ncol(W2_rand) == 0) 
-    W2_rand = Rand(rows=num_hidden2, cols=num_hidden1, min=-1, max=1, pdf="uniform")
-  if(nrow(W3_rand) == 0 & ncol(W3_rand) == 0) 
-    W3_rand = Rand(rows=num_hidden1, cols=num_hidden2, min=-1, max=1, pdf="uniform")
-  if(nrow(W4_rand) == 0 & ncol(W4_rand) == 0) 
-    W4_rand = Rand(rows=m, cols=num_hidden1, min=-1, max=1, pdf="uniform")
-
-  W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand 
-  b1 = matrix(0, rows=num_hidden1, cols=1)
-  W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand 
-  b2 = matrix(0, rows=num_hidden2, cols=1)
-  W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand 
-  b3 = matrix(0, rows=num_hidden1, cols=1)
-  W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand 
+  if(nrow(W1) == 0 & ncol(W1) == 0) 
+    W1 = Rand(rows=numHidden1, cols=m, min=-1, max=1, pdf="uniform")
+  if(nrow(W2) == 0 & ncol(W2) == 0) 
+    W2 = Rand(rows=numHidden2, cols=numHidden1, min=-1, max=1, pdf="uniform")
+  if(nrow(W3) == 0 & ncol(W3) == 0) 
+    W3 = Rand(rows=numHidden1, cols=numHidden2, min=-1, max=1, pdf="uniform")
+  if(nrow(W4) == 0 & ncol(W4) == 0) 
+    W4 = Rand(rows=m, cols=numHidden1, min=-1, max=1, pdf="uniform")
+
+  W1 = sqrt(6)/sqrt(m + numHidden1) * W1
+  b1 = matrix(0, rows=numHidden1, cols=1)
+  W2 = sqrt(6)/sqrt(numHidden1 + numHidden2) * W2
+  b2 = matrix(0, rows=numHidden2, cols=1)
+  W3 = sqrt(6)/sqrt(numHidden2 + numHidden1) * W3
+  b3 = matrix(0, rows=numHidden1, cols=1)
+  W4 = sqrt(6)/sqrt(numHidden2 + m) * W4
   b4 = matrix(0, rows=m, cols=1)
 
   upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1))
@@ -108,7 +109,7 @@ m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer n
   upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4))
   upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4))
 
-  if( full_obj ){
+  if( fullObj ){
     [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, 
      full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
     full_o = obj(full_E)
@@ -116,12 +117,12 @@ m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer n
   }
 
   iter = 0
-  num_iters_per_epoch = ceil(n / batch_size)
-  max_iterations = max_epochs * num_iters_per_epoch
+  num_iters_per_epoch = ceil(n / batchSize)
+  max_iterations = epochs * num_iters_per_epoch
   #print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations)
   beg = 1
   while( iter < max_iterations ){
-    end = beg + batch_size - 1
+    end = beg + batchSize - 1
     if(end > n) end = n
     X_batch = X[beg:end,]
 
@@ -158,7 +159,7 @@ m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer n
 
     if( iter %% num_iters_per_epoch == 0 ) step = step * decay
 
-    if( full_obj & iter %% num_iters_per_epoch == 0 ){
+    if( fullObj & iter %% num_iters_per_epoch == 0 ){
       [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, 
        full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
       full_o = obj(full_E)
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index d39af18f80a..3ee31551375 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -24,10 +24,10 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------
-# X_train         ---
-# Y_train         ---
-# X_test          ---
-# Y_test          ---
+# X          ---
+# Y          ---
+# Xtest           ---
+# Ytest           ---
 # metaList        ---
 # evaluationFunc  ---
 # evalFunHp       ---
@@ -40,7 +40,6 @@
 # cv              ---
 # cvk             ---
 # verbose         ---
-# output          ---
 # -----------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -48,7 +47,7 @@
 # perf    ---
 # --------------------------------------
 
-m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, Matrix[Double] X_test, Matrix[Double] Y_test, List[Unknown] metaList,
+m_bandit = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, List[Unknown] metaList,
   String evaluationFunc, Matrix[Double] evalFunHp, Frame[Unknown] lp, Matrix[Double] lpHp, Frame[Unknown] primitives, Frame[Unknown] param, Integer k = 3,
   Integer R=50, Double baseLineScore, Boolean cv,  Integer cvk = 2, Double ref = 0, Integer seed = -1, Boolean enablePruning = FALSE, Boolean verbose = TRUE)
   return (Frame[Unknown] bestPipeline, Matrix[Double] bestHyperparams, Matrix[Double] bestAccuracy, Frame[String] applyFunc) 
@@ -110,7 +109,7 @@ m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, Matrix[Doubl
       }
       configurations = configurations[1:n_i, ]
       pipelines_executed = pipelines_executed + (n_i * r_i)
-      [outPip,outHp, pruneCount] = run_with_hyperparam(ph_pip=configurations, r_i=r_i, X=X_train, Y=Y_train, Xtest=X_test, Ytest=Y_test, metaList=metaList,
+      [outPip,outHp, pruneCount] = run_with_hyperparam(ph_pip=configurations, r_i=r_i, X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList,
         evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, param=param, cv=cv, cvk=cvk, ref=ref, seed = seed, enablePruning=enablePruning)
       totalPruneCount = totalPruneCount + pruneCount
       # sort the pipelines by order of accuracy decreasing
@@ -310,7 +309,7 @@ run_with_hyperparam = function(Frame[Unknown] ph_pip, Integer r_i = 1, Matrix[Do
       else 
       {
         [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp, changesByPip] = executePipeline(pipeline=op, 
-          Xtrain=X, Ytrain=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList2,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
+          X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList2,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
           changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE, startInd=1, endInd=ncol(op))
         if(max(eYtrain) == min(eYtrain)) 
           print("Y contains only one class")
@@ -539,7 +538,7 @@ return (Double accuracy, Matrix[Double] evalFunHp, Matrix[Double] hpForPruning,
     if(as.scalar(pipList['flags']) != 0)  # this flag is zero when CV is called from the dirtyScore function, means only accuracy calculation but no pipeline execution
     {
       [trainX, trainy, testX, testy, Tr, hpForPruning, changesByOp, changesByPip] = executePipeline(pipeline=as.frame(pipList['ph']),
-        Xtrain=trainX, Ytrain=trainy, Xtest= testX, Ytest=testy, metaList=metaList, hyperParameters=as.matrix(pipList['hp']), hpForPruning=hpForPruning,
+        X=trainX, Y=trainy, Xtest= testX, Ytest=testy, metaList=metaList, hyperParameters=as.matrix(pipList['hp']), hpForPruning=hpForPruning,
         changesByOp=changesByOp, flagsCount=as.scalar(pipList['flags']), test=TRUE, verbose=FALSE, startInd=1, endInd=ncol(as.frame(pipList['ph'])))
       #TODO double check why this is necessary
       mincol = min(ncol(cvChanges),ncol(changesByOp))
@@ -669,7 +668,7 @@ run_with_hyperparamNested = function(Frame[Unknown] ph_pip, Integer r_i = 1, Mat
         else 
         {
           [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp, changesByPip] = executePipeline(pipeline=op, 
-            Xtrain=X, Ytrain=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList2,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
+            X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList2,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
             changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE, startInd=1, endInd=ncol(op))
           if(max(eYtrain) == min(eYtrain)) 
             print("Y contains only one class")
@@ -727,4 +726,4 @@ return(Boolean execute)
     }
   }
   execute = !(changeCount > 0)
-}
+}
\ No newline at end of file
diff --git a/scripts/builtin/components.dml b/scripts/builtin/components.dml
index dfbe8bd700e..78f28f2c772 100644
--- a/scripts/builtin/components.dml
+++ b/scripts/builtin/components.dml
@@ -26,16 +26,16 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------------------
-# X        Location to read the matrix of feature vectors
-# Y        Location to read the matrix with category labels
-# icpt     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
-#          no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
-#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# tol      tolerance ("epsilon")
-# reg      regularization parameter (lambda = 1/C); intercept is not regularized
-# maxi     max. number of outer (Newton) iterations
-# maxii    max. number of inner (conjugate gradient) iterations, 0 = no max
-# verbose  flag specifying if logging information should be printed
+# X             Location to read the matrix of feature vectors
+# Y             Location to read the matrix with category labels
+# intercept     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+#               no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
+#               2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# tol           tolerance ("epsilon")
+# reg           regularization parameter (lambda = 1/C); intercept is not regularized
+# maxIter       max. number of outer (Newton) iterations
+# maxInnerIter  max. number of inner (conjugate gradient) iterations, 0 = no max
+# verbose       flag specifying if logging information should be printed
 # -----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -43,7 +43,7 @@
 # betas  regression betas as output for prediction
 # ----------------------------------------------------------------------------------------------------
 
-m_components = function(Matrix[Double] G, Integer maxi = 0, Boolean verbose = TRUE) 
+m_components = function(Matrix[Double] G, Integer maxIter = 0, Boolean verbose = TRUE)
   return (Matrix[Double] C) 
 {
   # best effort check for symmetry (not exact but fast)
@@ -58,7 +58,7 @@ m_components = function(Matrix[Double] G, Integer maxi = 0, Boolean verbose = TR
   iter = 1;
 
   # iterative computation of connected components
-  while( diff > 0 & (maxi==0 | iter<=maxi) ) {
+  while( diff > 0 & (maxIter==0 | iter<=maxIter) ) {
     u = max(rowMaxs(G * t(c)), c);
     diff = sum(u != c)
     c = u; # update assignment
diff --git a/scripts/builtin/correctTypos.dml b/scripts/builtin/correctTypos.dml
index f772d8ecf08..25aa39f1ec0 100644
--- a/scripts/builtin/correctTypos.dml
+++ b/scripts/builtin/correctTypos.dml
@@ -35,9 +35,9 @@
 # INPUT:
 # ----------------------------------------------------------------------------------------
 # strings              The nx1 input frame of corrupted strings
-# frequency_threshold  Strings that occur above this frequency level will not be corrected
-# distance_threshold   Max distance at which strings are considered similar
-# is_verbose           Print debug information
+# frequencyThreshold   Strings that occur above this frequency level will not be corrected
+# distanceThreshold    Max distance at which strings are considered similar
+# verbose              Print debug information
 # ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -47,14 +47,14 @@
 
 # TODO: future: add parameter for list of words that are sure to be correct
 
-f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05, Integer distance_threshold=2,  Boolean is_verbose=FALSE)
-  return (Frame[String] Y, Double frequency_threshold, Integer distance_threshold, Matrix[Double] distance_matrix, Frame[Unknown] dict)
+f_correctTypos = function(Frame[String] strings, Double frequencyThreshold=0.05, Integer distanceThreshold=2,  Boolean verbose=FALSE)
+  return (Frame[String] Y, Double frequencyThreshold, Integer distanceThreshold, Matrix[Double] distance_matrix, Frame[Unknown] dict)
 {
-  if(is_verbose)
+  if(verbose)
     print ("BEGIN CORRECT-TYPOS SCRIPT");
   num_strings = length(strings);
 
-  if(is_verbose)
+  if(verbose)
     print("num strings: " + num_strings + "\n")
 
   strings = map(strings, "s -> s.toLowerCase()");
@@ -67,7 +67,7 @@ f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05
   lengths = as.matrix(map(strings, "s -> s.length()"));
   
   num_different_strings = nrow(strings);
-  if (is_verbose) {
+  if (verbose) {
     print("dict:" )
     print(toString(dict));
     print("lengths:")
@@ -76,7 +76,7 @@ f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05
 
   # generate ascii matrix
   max_len = max(lengths);
-  if (is_verbose) {
+  if (verbose) {
     print("max_len: " + max_len + "\n");
   }
   # TODO: when proper lambda expressions are supported: rewrite in not so hacky
@@ -87,7 +87,7 @@ f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05
       ascii_matrix[j, i] = tmp[1, 1];
     }
   }
-  if (is_verbose) {
+  if (verbose) {
     print("ascii_matrix: ")
     print(toString(ascii_matrix));
   }
@@ -97,12 +97,12 @@ f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05
   parfor (i in 1:num_different_strings) {
     parfor (j in i:num_different_strings) {
       if (i != j) {
-        if(abs(as.scalar(lengths[i, 1]) - as.scalar(lengths[j , 1])) >= distance_threshold) {
+        if(abs(as.scalar(lengths[i, 1]) - as.scalar(lengths[j , 1])) >= distanceThreshold) {
           distance_matrix[i, j] = 42000;
         } else {
           A = ascii_matrix[1:as.scalar(lengths[i,1]), i];
           B = ascii_matrix[1:as.scalar(lengths[j,1]), j];
-          d = damerauLevenshteinDistanceBound(A, B, distance_threshold, FALSE);
+          d = damerauLevenshteinDistanceBound(A, B, distanceThreshold, FALSE);
           distance_matrix[i, j] = ifelse(d == -1, 42000, d);
         }
       }
@@ -110,7 +110,7 @@ f_correctTypos = function(Frame[String] strings, Double frequency_threshold=0.05
   }
   upper_triangle = upper.tri(target=distance_matrix, values=TRUE);
   distance_matrix = distance_matrix + t(upper_triangle) + diag(matrix(42000, num_different_strings, 1));
-  Y = correctTyposApply(Y, frequency_threshold, distance_threshold, distance_matrix, dict)
+  Y = correctTyposApply(Y, frequencyThreshold, distanceThreshold, distance_matrix, dict)
 }
 
 replaceStrings = function(String replacement, String to_replace, Frame[String] strings)
@@ -129,7 +129,7 @@ buildDictionary = function(Frame[String] S)
   dict = cbind(dstr, as.frame(frequencies));
 }
 
-damerauLevenshteinDistanceBound = function(matrix[double] A, matrix[double] B, double bound, Boolean is_verbose) 
+damerauLevenshteinDistanceBound = function(matrix[double] A, matrix[double] B, double bound, Boolean verbose)
   return(double dl_distance) {
 
   dl_matrix = matrix(0, rows = length(A) + 1, cols = length(B) + 1);
@@ -176,7 +176,7 @@ damerauLevenshteinDistanceBound = function(matrix[double] A, matrix[double] B, d
     break_condition = min(dl_matrix[i - 1, ]) > bound & min(dl_matrix[i, ]) > bound;
   }
 
-  if (is_verbose){
+  if (verbose){
     print("dl distance matrix:")
     print(toString(dl_matrix));
   }
diff --git a/scripts/builtin/correctTyposApply.dml b/scripts/builtin/correctTyposApply.dml
index 8d5870caef8..e34164aa5db 100644
--- a/scripts/builtin/correctTyposApply.dml
+++ b/scripts/builtin/correctTyposApply.dml
@@ -38,9 +38,9 @@
 # ----------------------------------------------------------------------------------------
 # strings              The nx1 input frame of corrupted strings
 # nullMask             ---
-# frequency_threshold  Strings that occur above this frequency level will not be corrected
-# distance_threshold   Max distance at which strings are considered similar
-# distance matrix      ---
+# frequencyThreshold   Strings that occur above this frequency level will not be corrected
+# distanceThreshold    Max distance at which strings are considered similar
+# distances            ---
 # dict                 ---
 # ----------------------------------------------------------------------------------------
 #
@@ -50,7 +50,8 @@
 # ---------------------------------------------------------------------------------------------
 
 
-f_correctTyposApply = function(Frame[String] strings, Double frequency_threshold = 0.05, Integer distance_threshold = 2, Matrix[Double] distance_matrix, Frame[Unknown] dict)
+f_correctTyposApply = function(Frame[String] strings, Double frequencyThreshold = 0.05, Integer distanceThreshold = 2,
+  Matrix[Double] distances, Frame[Unknown] dict)
   return (Frame[String] Y)
 {
   strings = map(strings, "s -> s.toLowerCase()");
@@ -63,18 +64,18 @@ f_correctTyposApply = function(Frame[String] strings, Double frequency_threshold
   for (i in 1:num_different_strings) {
     idx = as.integer(as.scalar(sorted_frequency_idxs[i])); # lowest frequency idx
     frequency = as.scalar(frequencies[idx]);
-    if (frequency < frequency_threshold) {
-      min_idxs = t(order(target=t(distance_matrix[idx,]), index.return=TRUE));
+    if (frequency < frequencyThreshold) {
+      min_idxs = t(order(target=t(distances[idx,]), index.return=TRUE));
 
       j = 1;
       break=FALSE;
       while (j <= num_different_strings & !break) {
         min_idx = as.integer(as.scalar(min_idxs[,j]));
-        min = as.integer(as.scalar(distance_matrix[idx, min_idx]));
+        min = as.integer(as.scalar(distances[idx, min_idx]));
         replacement_frequency = as.scalar(frequencies[min_idx]);
 
         # TODO: additional parameter for replacement_frequency?
-        if (min < distance_threshold & replacement_frequency > frequency_threshold/2) {
+        if (min < distanceThreshold & replacement_frequency > frequencyThreshold/2) {
           to_replace = as.scalar(strings[idx,]);
           replacement = as.scalar(strings[min_idx,]);
           Y = replaceStrings1(replacement, to_replace, Y);
diff --git a/scripts/builtin/cox.dml b/scripts/builtin/cox.dml
index 779d1930823..5af59db9ada 100644
--- a/scripts/builtin/cox.dml
+++ b/scripts/builtin/cox.dml
@@ -25,28 +25,28 @@
 # 
 # INPUT:
 # -------------------------------------------------------------------------------------------------
-# X      Location to read the input matrix X containing the survival data 
-#        containing the following information
-#        1: timestamps 
-#        2: whether an event occurred (1) or data is censored (0)
-#        3: feature vectors
-# TE     Column indices of X as a column vector which contain timestamp 
-#        (first row) and event information (second row)
-# F      Column indices of X as a column vector which are to be used for 
-#        fitting the Cox model
-# R      If factors (categorical variables) are available in the input matrix
-#        X, location to read matrix R containing the start and end indices of
-#        the factors in X
-#        R[,1]: start indices
-#        R[,2]: end indices
-#        Alternatively, user can specify the indices of the baseline level of
-#        each factor which needs to be removed from X; in this case the start
-#        and end indices corresponding to the baseline level need to be the same;
-#        if R is not provided by default all variables are considered to be continuous 
-# alpha  Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
-# tol    Tolerance ("epsilon")
-# moi    Max. number of outer (Newton) iterations
-# mii    Max. number of inner (conjugate gradient) iterations, 0 = no max   
+# X             Location to read the input matrix X containing the survival data
+#               containing the following information
+#               1: timestamps
+#               2: whether an event occurred (1) or data is censored (0)
+#               3: feature vectors
+# TE            Column indices of X as a column vector which contain timestamp
+#               (first row) and event information (second row)
+# F             Column indices of X as a column vector which are to be used for
+#               fitting the Cox model
+# R             If factors (categorical variables) are available in the input matrix
+#               X, location to read matrix R containing the start and end indices of
+#               the factors in X
+#               R[,1]: start indices
+#               R[,2]: end indices
+#               Alternatively, user can specify the indices of the baseline level of
+#               each factor which needs to be removed from X; in this case the start
+#               and end indices corresponding to the baseline level need to be the same;
+#               if R is not provided by default all variables are considered to be continuous
+# alpha         Parameter to compute a 100*(1-alpha)% confidence interval for the betas
+# tol           Tolerance ("epsilon")
+# maxIter       Max. number of outer (Newton) iterations
+# maxInnerIter  Max. number of inner (conjugate gradient) iterations, 0 = no max
 #
 # -------------------------------------------------------------------------------------------------
 #
@@ -80,7 +80,7 @@
 # ------------------------------------------------------------------------------------------------------
 
 m_cox = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] F, Matrix[Double] R,
-    Double alpha = 0.05, Double tol = 0.000001, Integer moi = 100, Integer mii = 0)
+    Double alpha = 0.05, Double tol = 0.000001, Integer maxIter = 100, Integer maxInnerIter = 0)
   return (Matrix[Double] M, Matrix[Double] S, Matrix[Double] T, Matrix[Double] COV, Matrix[Double] RT, Matrix[Double] XO) {
 
   X_orig = X;
@@ -201,7 +201,7 @@ m_cox = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] F, Matrix[D
   delta = 0.5 * sqrt (D) / max (sqrt (rowSums (X ^ 2)));
   initial_g2 = sum (g ^ 2);
   exit_g2 = initial_g2 * tol ^ 2;
-  maxiter = 100;
+  maxiter = maxIter;
   maxinneriter = min (D, 100);
   i = 0;
   sum_g2 = sum (g ^ 2);
diff --git a/scripts/builtin/cspline.dml b/scripts/builtin/cspline.dml
index eb8e1fdc5db..9ce7b6d3326 100644
--- a/scripts/builtin/cspline.dml
+++ b/scripts/builtin/cspline.dml
@@ -26,32 +26,32 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# X      1-column matrix of x values knots. It is assumed that x values are
-#        monotonically increasing and there is no duplicates points in X
-# Y      1-column matrix of corresponding y values knots
-# inp_x  the given input x, for which the cspline will find predicted y
-# mode   Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
-# tol    Tolerance (epsilon); conjugate graduent procedure terminates early if
-#        L2 norm of the beta-residual is less than tolerance * its initial norm
-# maxi   Maximum number of conjugate gradient iterations, 0 = no maximum
+# X        1-column matrix of x values knots. It is assumed that x values are
+#          monotonically increasing and there is no duplicates points in X
+# Y        1-column matrix of corresponding y values knots
+# xPred    the given input x, for which the cspline will find predicted y
+# mode     Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
+# tol      Tolerance (epsilon); conjugate graduent procedure terminates early if
+#          L2 norm of the beta-residual is less than tolerance * its initial norm
+# maxIter  Maximum number of conjugate gradient iterations, 0 = no maximum
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------------------------------------------
-# pred_Y  Predicted value
-# K       Matrix of k parameters
+# pred_Y   Predicted value
+# K        Matrix of k parameters
 # ---------------------------------------------------------------------------------------------------
 
-m_cspline = function(Matrix[Double] X, Matrix[Double] Y, Double inp_x, 
-String mode = "DS", Double tol = -1.0, Integer maxi = -1)
+m_cspline = function(Matrix[Double] X, Matrix[Double] Y, Double xPred,
+String mode = "DS", Double tol = -1.0, Integer maxIter = -1)
   return (Matrix[Double] pred_Y, Matrix[Double] K)
 {
-  if( mode == "CG" & maxi != -1 & tol != -1.0)
+  if( mode == "CG" & maxIter != -1 & tol != -1.0)
   {
-    [pred_Y, K] = csplineCG(X=X, Y=Y, inp_x=inp_x, tol=tol, maxi=maxi);
+    [pred_Y, K] = csplineCG(X=X, Y=Y, xPred=xPred, tol=tol, maxIter=maxIter);
   }
   else 
   {    
-    [pred_Y, K] = csplineDS(X=X, Y=Y, inp_x=inp_x);
+    [pred_Y, K] = csplineDS(X=X, Y=Y, xPred=xPred);
   }
 }
diff --git a/scripts/builtin/csplineCG.dml b/scripts/builtin/csplineCG.dml
index a6e8b2077ed..741d817a36c 100644
--- a/scripts/builtin/csplineCG.dml
+++ b/scripts/builtin/csplineCG.dml
@@ -26,7 +26,7 @@
 # X      1-column matrix of x values knots. It is assumed that x values are
 #          monotonically increasing and there is no duplicates points in X
 # Y      1-column matrix of corresponding y values knots
-# inp_x  the given input x, for which the cspline will find predicted y.
+# xPred  the given input x, for which the cspline will find predicted y.
 # tol    Tolerance (epsilon); conjugate gradient procedure terminates early if
 #        L2 norm of the beta-residual is less than tolerance * its initial norm
 # maxi   Maximum number of conjugate gradient iterations, 0 = no maximum
@@ -38,12 +38,12 @@
 # K       Matrix of k parameters
 # ------------------------------------------------------------------------------------------------------
 
-m_csplineCG = function (Matrix[Double] X, Matrix[Double] Y, Double inp_x, Double tol = 0.000001, Integer maxi = 0) 
+m_csplineCG = function (Matrix[Double] X, Matrix[Double] Y, Double xPred, Double tol = 0.000001, Integer maxIter = 0)
   return (Matrix[Double] pred_Y, Matrix[Double] K) 
 {
-  K = calcKnotsDerivKsCG(X, Y, maxi, tol)
+  K = calcKnotsDerivKsCG(X, Y, maxIter, tol)
 
-  y = interpSplineCG(inp_x, X, Y, K)
+  y = interpSplineCG(xPred, X, Y, K)
 
   pred_Y = matrix(y, 1, 1)
 }
diff --git a/scripts/builtin/csplineDS.dml b/scripts/builtin/csplineDS.dml
index 647be16d0ff..b72cfc92e19 100644
--- a/scripts/builtin/csplineDS.dml
+++ b/scripts/builtin/csplineDS.dml
@@ -26,7 +26,7 @@
 # X       1-column matrix of x values knots. It is assumed that x values are
 #         monotonically increasing and there is no duplicates points in X
 # Y       1-column matrix of corresponding y values knots
-# inp_x   the given input x, for which the cspline will find predicted y.
+# xPred   the given input x, for which the cspline will find predicted y.
 # --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -35,12 +35,12 @@
 # K       Matrix of k parameters
 # ------------------------------------------------------------------------------------------------------
 
-m_csplineDS = function (Matrix[Double] X, Matrix[Double] Y, Double inp_x) 
+m_csplineDS = function (Matrix[Double] X, Matrix[Double] Y, Double xPred)
   return (Matrix[Double] pred_Y, Matrix[Double] K) 
 {
   K = calcKnotsDerivKs(X, Y)
 
-  y = interpSpline(inp_x, X, Y, K)
+  y = interpSpline(xPred, X, Y, K)
 
   pred_Y = matrix(y, 1, 1)
 }
diff --git a/scripts/builtin/cvlm.dml b/scripts/builtin/cvlm.dml
index b30dccb0a70..505b71b03a8 100644
--- a/scripts/builtin/cvlm.dml
+++ b/scripts/builtin/cvlm.dml
@@ -25,12 +25,12 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------
-# X      Recorded Data set into matrix
-# y      1-column matrix of response values.
-# k      Number of subsets needed, It should always be more than 1 and less than nrow(X)
-# icpt   Intercept presence, shifting and rescaling the columns of X
-# reg    Regularization constant (lambda) for L2-regularization. set to nonzero for
-#        highly dependant/sparse/numerous features
+# X          Recorded Data set into matrix
+# y          1-column matrix of response values.
+# k          Number of subsets needed, It should always be more than 1 and less than nrow(X)
+# intercept  Intercept presence, shifting and rescaling the columns of X
+# reg        Regularization constant (lambda) for L2-regularization. set to nonzero for
+#            highly dependant/sparse/numerous features
 # ----------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -39,7 +39,7 @@
 # allbeta     Validated data set
 # --------------------------------------------------------------------------------------------
 
-m_cvlm = function(Matrix[Double] X, Matrix[Double] y, Integer k, Integer icpt = 0, Double reg = 1e-7)
+m_cvlm = function(Matrix[Double] X, Matrix[Double] y, Integer k, Integer intercept = 0, Double reg = 1e-7)
     return (Matrix[Double] y_predict, Matrix[Double] allbeta)
 {
   M = nrow(X);
@@ -67,8 +67,8 @@ m_cvlm = function(Matrix[Double] X, Matrix[Double] y, Integer k, Integer icpt =
       trainRes = rbind(y[1:testS-1,], y[testE+1:M,]);
     }
 
-    beta = lm(X=trainSet, y=trainRes, icpt=icpt, reg=reg);
-    pred = lmPredict(X=testSet, B=beta, ytest=matrix(0,1,1), icpt=icpt);
+    beta = lm(X=trainSet, y=trainRes, intercept=intercept, reg=reg);
+    pred = lmPredict(X=testSet, B=beta, Ytest=matrix(0,1,1), intercept=intercept);
     y_predict[testS:testE,] = pred;
     allbeta[i,] = t(beta);
   }
diff --git a/scripts/builtin/decisionTree.dml b/scripts/builtin/decisionTree.dml
index 69bf12af90c..ecf64ff94a3 100644
--- a/scripts/builtin/decisionTree.dml
+++ b/scripts/builtin/decisionTree.dml
@@ -47,15 +47,15 @@
 # y               Label matrix in recoded/binned representation
 # ctypes          Row-Vector of column types [1 scale/ordinal, 2 categorical]
 #                 of shape 1-by-(ncol(X)+1), where the last entry is the y type
-# max_depth       Maximum depth of the learned tree (stopping criterion)
-# min_leaf        Minimum number of samples in leaf nodes (stopping criterion),
+# maxDepth        Maximum depth of the learned tree (stopping criterion)
+# minLeaf         Minimum number of samples in leaf nodes (stopping criterion),
 #                 odd number recommended to avoid 50/50 leaf label decisions
-# min_split       Minimum number of samples in leaf for attempting a split
-# max_features    Parameter controlling the number of features used as split
-#                 candidates at tree nodes: m = ceil(num_features^max_features)
-# max_values      Parameter controlling the number of values per feature used
-#                 as split candidates: nb = ceil(num_values^max_values)
-# max_dataratio   Parameter in [0,1] controlling when to materialize data
+# minSplit        Minimum number of samples in leaf for attempting a split
+# maxFeatures     Parameter controlling the number of features used as split
+#                 candidates at tree nodes: m = ceil(numFeatures^maxFeatures)
+# maxValues       Parameter controlling the number of values per feature used
+#                 as split candidates: nb = ceil(numValues^maxValues)
+# maxDataRatio    Parameter in [0,1] controlling when to materialize data
 #                 subsets of X and y on node splits. When set to 0, we always
 #                 scan the original X and y, which has the benefit of avoiding
 #                 the allocation and maintenance of data for all active nodes.
@@ -72,16 +72,16 @@
 # ------------------------------------------------------------------------------
 
 m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] ctypes,
-    Int max_depth = 10, Int min_leaf = 20, Int min_split = 50,
-    Double max_features = 0.5, Double max_values = 1.0, Double max_dataratio = 0.25,
+    Int maxDepth = 10, Int minLeaf = 20, Int minSplit = 50,
+    Double maxFeatures = 0.5, Double maxValues = 1.0, Double maxDataRatio = 0.25,
     String impurity = "gini", Int seed = -1, Boolean verbose = FALSE)
   return(Matrix[Double] M)
 {
   t1 = time();
 
   # validation checks
-  if( max_depth > 32 )
-    stop("decisionTree: invalid max_depth > 32: "+max_depth);
+  if( maxDepth > 32 )
+    stop("decisionTree: invalid maxDepth > 32: "+maxDepth);
   if( sum(X<=0) != 0 )
     stop("decisionTree: feature matrix X is not properly recoded/binned (values <= 0): "+sum(X<=0));
   if( sum(abs(X-round(X))>1e-14) != 0 )
@@ -105,8 +105,8 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
   I = matrix(1, rows=1, cols=nrow(X));
 
   if( verbose ) {
-    print("decisionTree: initialize with max_depth=" + max_depth + ", max_features="
-      + max_features +", max_dataratio=" + max_dataratio + ", impurity="
+    print("decisionTree: initialize with maxDepth=" + maxDepth + ", maxFeatures="
+      + maxFeatures +", maxDataRatio=" + maxDataRatio + ", impurity="
       + impurity + ", seed=" + seed + ".");
     print("decisionTree: basic statistics:");
     print("-- impurity: " + as.scalar(computeImpurity(y2, I, impurity)));
@@ -115,7 +115,7 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
   }
 
   # queue-based node splitting
-  M = matrix(0, rows=1, cols=2*(2^max_depth-1))
+  M = matrix(0, rows=1, cols=2*(2^maxDepth-1))
   queue = list(list(1,I,X2,y2)); # node IDs / data indicators
   maxPath = 1;
   while( length(queue) > 0 ) {
@@ -130,7 +130,7 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
       print("decisionTree: attempting split of node "+nID+" ("+sum(nI)+" rows)");
 
     # optional rematerialization of data per node
-    if( sum(nI) < max_dataratio*ncol(nI) ) {
+    if( sum(nI) < maxDataRatio*ncol(nI) ) {
       if(verbose)
         print("-- compacting data: "+ncol(nI)+" --> "+sum(nI));
       X2 = removeEmpty(target=X2, margin="rows", select=t(nI));
@@ -141,8 +141,8 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
     # find best split attribute
     nSeed = ifelse(seed==-1, seed, seed*nID);
     [f, v, IDleft, Ileft, IDright, Iright] = findBestSplit(
-      X2, y2, foffb, foffe, nID, nI, min_leaf, max_features, max_values, impurity, nSeed);
-    validSplit = sum(Ileft) >= min_leaf & sum(Iright) >= min_leaf;
+      X2, y2, foffb, foffe, nID, nI, minLeaf, maxFeatures, maxValues, impurity, nSeed);
+    validSplit = sum(Ileft) >= minLeaf & sum(Iright) >= minLeaf;
     if(verbose)
       print("-- best split: f"+f+" <= "+v+" --> valid="+validSplit);
     if( validSplit )
@@ -153,11 +153,11 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
 
     # split data, finalize or recurse
     if( validSplit ) {
-      if( sum(Ileft) >= min_split & floor(log(IDleft,2))+2 < max_depth )
+      if( sum(Ileft) >= minSplit & floor(log(IDleft,2))+2 < maxDepth )
         queue = append(queue, list(IDleft,Ileft,X2,y2));
       else
         M[,2*IDleft] = computeLeafLabel(y2, Ileft, classify, verbose)
-      if( sum(Iright) >= min_split & floor(log(IDright,2))+2 < max_depth )
+      if( sum(Iright) >= minSplit & floor(log(IDright,2))+2 < maxDepth )
         queue = append(queue, list(IDright,Iright,X2,y2));
       else
         M[,2*IDright] = computeLeafLabel(y2, Iright, classify, verbose)
@@ -175,15 +175,15 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
 }
 
 findBestSplit = function(Matrix[Double] X2, Matrix[Double] y2, Matrix[Double] foffb, Matrix[Double] foffe,
-    Int ID, Matrix[Double] I, Int min_leaf, Double max_features, Double max_values, String impurity, Int seed)
+    Int ID, Matrix[Double] I, Int minLeaf, Double maxFeatures, Double maxValues, String impurity, Int seed)
   return(Int f, Int v, Int IDleft, Matrix[Double] Ileft, Int IDright, Matrix[Double] Iright)
 {
-  # sample features iff max_features < 1
+  # sample features iff maxFeatures < 1
   n = ncol(foffb);
   numI = sum(I);
   feat = seq(1,n);
-  if( max_features < 1.0 ) {
-    rI = rand(rows=n, cols=1, seed=seed) <= (n^max_features/n);
+  if( maxFeatures < 1.0 ) {
+    rI = rand(rows=n, cols=1, seed=seed) <= (n^maxFeatures/n);
     feat = removeEmpty(target=feat, margin="rows", select=rI);
     if( sum(feat) == 0 ) #sample at least one
       feat[1,1] = round(rand(rows=1, cols=1, min=1, max=n));
@@ -205,8 +205,8 @@ findBestSplit = function(Matrix[Double] X2, Matrix[Double] y2, Matrix[Double] fo
     # (vectorized evaluation, each column in P is a split candidate)
     fP = upper.tri(target=matrix(1,belen,belen), diag=TRUE);
     vI = seq(1,belen);
-    if( max_values < 1.0 & ncol(fP)>10 ) {
-      rI2 = rand(rows=ncol(fP),cols=1,seed=seed) <= (ncol(fP)^max_values/ncol(fP));
+    if( maxValues < 1.0 & ncol(fP)>10 ) {
+      rI2 = rand(rows=ncol(fP),cols=1,seed=seed) <= (ncol(fP)^maxValues/ncol(fP));
       fP = removeEmpty(target=fP, margin="cols", select=t(rI2));
       vI = removeEmpty(target=vI, margin="rows", select=rI2);
     }
@@ -222,7 +222,7 @@ findBestSplit = function(Matrix[Double] X2, Matrix[Double] y2, Matrix[Double] fo
     ig = replace(target=ig, pattern=NaN, replacement=0);
 
     # track best split value and index, incl validity
-    valid = (rowSums(Ileft)>=min_leaf) & (rowSums(Iright)>=min_leaf);
+    valid = (rowSums(Ileft)>=minLeaf) & (rowSums(Iright)>=minLeaf);
     bestig = max(valid*ig);
     bestv = ifelse(bestig>0, nrow(valid)-as.scalar(rowIndexMax(t(rev(valid*ig))))+beg, -1);
     if( bestv >= 0 )
diff --git a/scripts/builtin/deepWalk.dml b/scripts/builtin/deepWalk.dml
index c2cd02f9244..c8f0d0e185c 100644
--- a/scripts/builtin/deepWalk.dml
+++ b/scripts/builtin/deepWalk.dml
@@ -28,7 +28,7 @@
 # d       embedding size
 # gamma   walks per vertex
 # t       walk length
-# alpha   learning rate
+# lr      learning rate
 # beta    factor for decreasing learning rate
 # ------------------------------------------------------------------------------------
 #
@@ -40,7 +40,7 @@
 source("scripts/staging/entity-resolution/primitives/postprocessing.dml") as post;
 
 m_deepWalk = function(Matrix[Double] Graph, Integer w, Integer d,
-  Integer gamma, Integer t, Double alpha=0.025, Double beta=0.9)
+  Integer gamma, Integer t, Double lr=0.025, Double beta=0.9)
   return(Matrix[Double] Phi)
 {
   word_count = nrow(Graph)
@@ -60,10 +60,10 @@ m_deepWalk = function(Matrix[Double] Graph, Integer w, Integer d,
     vocab_shuffled = sample(word_count, word_count, FALSE)
     for (node_idx in 1:length(vocab_shuffled)) {
       random_walk = randomWalk(Graph, as.scalar(vocab_shuffled[node_idx]), t)
-      [Phi, Theta] = skipGram(Phi, Theta, T, random_walk, w, alpha)
+      [Phi, Theta] = skipGram(Phi, Theta, T, random_walk, w, lr)
     }
     # decreasing learning rate
-    alpha = alpha * beta
+    lr = lr * beta
   }
 }
 
@@ -93,7 +93,7 @@ randomWalk = function(Matrix[Double] Graph, Integer start_vertex, Integer walk_l
 }
 
 skipGram = function(Matrix[double] Phi, Matrix[double] Theta,
-  Matrix[double] Tree, Matrix[double] walk, Integer window_size, Double alpha)
+  Matrix[double] Tree, Matrix[double] walk, Integer window_size, Double lr)
   return(Matrix[double] Phi_new, Matrix[double] Theta_new)
 {
   Phi_new = Phi
@@ -110,21 +110,21 @@ skipGram = function(Matrix[double] Phi, Matrix[double] Theta,
       left_neighbors = walk[min_val:(w_i-1)]
       for (u_k in 1:nrow(left_neighbors)) {
         [Phi_new, Theta_new] = update(Phi_new, Theta_new,
-          Tree, tree_depth, as.scalar(left_neighbors[u_k]), as.scalar(walk[w_i]), alpha)
+          Tree, tree_depth, as.scalar(left_neighbors[u_k]), as.scalar(walk[w_i]), lr)
       }
     }
     if (w_i != nrow(walk)) {
       right_neighbors = walk[(w_i+1):max_val]
       for (u_k in 1:nrow(right_neighbors)) {
         [Phi_new, Theta_new] = update(Phi_new, Theta_new,
-          Tree, tree_depth, as.scalar(right_neighbors[u_k]), as.scalar(walk[w_i]), alpha)
+          Tree, tree_depth, as.scalar(right_neighbors[u_k]), as.scalar(walk[w_i]), lr)
       }
     }
   }
 }
 
 update = function(Matrix[double] Phi, Matrix[double] Theta,
-  Matrix[double] Tree, Integer tree_depth, Integer u, Integer v, Double alpha)
+  Matrix[double] Tree, Integer tree_depth, Integer u, Integer v, Double lr)
   return (Matrix[double] Phi_new, Matrix[double] Theta_new)
 {
   Phi_new = Phi
@@ -138,13 +138,13 @@ update = function(Matrix[double] Phi, Matrix[double] Theta,
   # compute negative gradient for Theta update
   neg_gradient = outer(gradients, Phi[v,], "*");
   for (i in 1:nrow(gradients))
-    Theta_new[as.scalar(path_to_u[i])] = Theta_new[as.scalar(path_to_u[i])] + alpha * neg_gradient[i]
+    Theta_new[as.scalar(path_to_u[i])] = Theta_new[as.scalar(path_to_u[i])] + lr * neg_gradient[i]
 
   # compute negative gradient for Phi update
   P = table(seq(1,tree_depth), path_to_u[1:tree_depth], tree_depth, nrow(Theta));
   target_theta = P %*% Theta;
   neg_gradient = t(gradients) %*% target_theta
-  Phi_new[v] = Phi_new[v] + alpha * neg_gradient
+  Phi_new[v] = Phi_new[v] + lr * neg_gradient
 }
 
 computeGradients = function(Integer u, Integer v, Matrix[double] Theta,
diff --git a/scripts/builtin/differenceStatistics.dml b/scripts/builtin/differenceStatistics.dml
index 0e9019f0963..102bd10aa8f 100644
--- a/scripts/builtin/differenceStatistics.dml
+++ b/scripts/builtin/differenceStatistics.dml
@@ -28,7 +28,10 @@
 # X        First Matrix to compare
 # Y        Second Matrix to compare
 # --------------------------------------------------------------------------------
-
+# OUTPUT:
+# ----------------------------------------------------------------------------------------
+#
+# ----------------------------------------------------------------------------------------
 m_differenceStatistics = function(Matrix[Double] X, Matrix[Double] Y)  {
 
   P = matrix("0.0 0.01 0.1 0.25 0.5 0.75 0.90 0.99 1.0", rows= 9, cols=1)
diff --git a/scripts/builtin/ema.dml b/scripts/builtin/ema.dml
index a1811f64481..800524cfe56 100644
--- a/scripts/builtin/ema.dml
+++ b/scripts/builtin/ema.dml
@@ -24,7 +24,7 @@
 # INPUT:
 # ------------------------------------------------------------------------------------------
 # X       Frame that contains time series data that needs to be imputed
-#         search_iterations	Integer	--	Budget iterations for parameter optimization,
+# iter    Integer	--	Budget iterations for parameter optimization,
 #         used if parameters weren't set
 # mode    Type of EMA method. Either "single", "double" or "triple"
 # freq    Seasonality when using triple EMA.
@@ -40,9 +40,9 @@
 
 # TODO: convert to DML builtin using cumsumprod(data, alpha) 
 
-f_ema = function(Frame[Double] X, Integer search_iterations, String mode, Integer freq, 
+f_ema = function(Frame[Double] X, Integer iter, String mode, Integer freq,
   Double alpha, Double beta, Double gamma) return (Frame[Double] Y) {
-  M = map(X, "UtilFunctions.exponentialMovingAverageImputation(" + search_iterations + ";"
+  M = map(X, "UtilFunctions.exponentialMovingAverageImputation(" + iter + ";"
     + mode + ";" + freq + ";" + alpha + ";" + beta + ";" + gamma + ")")
   Y = as.frame(as.matrix(M))
 }
diff --git a/scripts/builtin/executePipeline.dml b/scripts/builtin/executePipeline.dml
index eecf90b6b1c..686df6a6bfb 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -49,13 +49,15 @@
 # changesByOp    ---
 # --------------------------------------------------------------------------------------
 
-f_executePipeline = function(Frame[String] pipeline, Matrix[Double] Xtrain,  Matrix[Double] Ytrain, 
+f_executePipeline = function(Frame[String] pipeline, Matrix[Double] X,  Matrix[Double] Y,
   Matrix[Double] Xtest,  Matrix[Double] Ytest, List[Unknown] metaList, Matrix[Double] hyperParameters, Matrix[Double] hpForPruning = as.matrix(0),
   Matrix[Double] changesByOp = as.matrix(0), Integer flagsCount, Boolean test = FALSE, Boolean verbose,
   Integer startInd, Integer endInd)
   return (Matrix[Double] Xtrain, Matrix[Double] Ytrain, Matrix[Double] Xtest, Matrix[Double] Ytest,
     Double t2, Matrix[Double] hpForPruning, Matrix[Double] changesByOp, Double changesAll, List[Unknown] internalStates)
 {
+  Xtrain = X
+  Ytrain = Y
   internalStates = list()
   mask=as.matrix(metaList['mask'])
   applyFunc = as.frame(metaList['applyFunc'])
@@ -96,8 +98,8 @@ f_executePipeline = function(Frame[String] pipeline, Matrix[Double] Xtrain,  Mat
 
       # dataFlag 0 = only on numeric, 1 = on whole data
       if(yFlag) {
-        [L, Y] =  remove(L, 1);
-        Ytrain = as.matrix(Y)
+        [L, Y_] =  remove(L, 1);
+        Ytrain = as.matrix(Y_)
       }
       # Xtrain = confirmMeta(Xtrain, mask)
       # Xtest = confirmMeta(Xtest, mask)
@@ -427,7 +429,7 @@ return (Matrix[Double] X, Matrix[Double] Y)
   classes1 = table(Y, 1)
   if(min(Y) != max(Y) & nrow(Y) > 1 & max(Y) <= 2)
   {
-    betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=FALSE)
+    betas = multiLogReg(X=X, Y=Y, intercept=1, reg=1e-4, maxIter=100, maxInnerIter=0, verbose=FALSE)
     [prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
     inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
     while(sum(inc) > 0 & maxIter > 0 & min(Y) != max(Y) & nrow(Y) > 1)
@@ -446,7 +448,7 @@ return (Matrix[Double] X, Matrix[Double] Y)
       Y = rbind(Ycor, Yinc)
       maxIter = maxIter - 1
       if(min(Y) != max(Y) & nrow(Y) > 1) {
-        betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=FALSE)
+        betas = multiLogReg(X=X, Y=Y, intercept=1, reg=1e-4, maxIter=100, maxInnerIter=0, verbose=FALSE)
         [prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
         inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
       }
diff --git a/scripts/builtin/ffPredict.dml b/scripts/builtin/ffPredict.dml
index 4a3b49617db..1dbcdb8bd9f 100644
--- a/scripts/builtin/ffPredict.dml
+++ b/scripts/builtin/ffPredict.dml
@@ -25,7 +25,7 @@
 # --------------------------------------------------------------------------------------------
 # Model       Trained ff neural network model
 # X           Data used for making predictions
-# batch_size  Batch size
+# batchSize   Batch size
 # --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -35,7 +35,7 @@
 
 source("nn/layers/feedForward.dml") as ff_pass
 
-s_ffPredict = function(List[unknown] model, Matrix[Double] X, Integer batch_size = 128) 
+s_ffPredict = function(List[unknown] model, Matrix[Double] X, Integer batchSize = 128)
   return (Matrix[Double] pred) {
   
   rows = nrow(X)
@@ -43,9 +43,9 @@ s_ffPredict = function(List[unknown] model, Matrix[Double] X, Integer batch_size
   cols = ncol(out)
   pred = matrix(0, rows, cols)
   
-  iters = ceil(rows / batch_size)
+  iters = ceil(rows / batchSize)
 
-  batch = batch_size
+  batch = batchSize
   for(i in 1:iters) {
     begin = (i-1)*batch+1
     end = min(rows, begin + batch - 1)
diff --git a/scripts/builtin/ffTrain.dml b/scripts/builtin/ffTrain.dml
index e3e7833008f..c1a86a4187b 100644
--- a/scripts/builtin/ffTrain.dml
+++ b/scripts/builtin/ffTrain.dml
@@ -27,15 +27,15 @@
 # ------------------------------------------------------------------------------------------
 # X                 Training data
 # Y                 Labels/Target values
-# batch_size        Batch size
+# batchSize         Batch size
 # epochs            Number of epochs
-# learning_rate     Learning rate
-# out_activation    User specified output activation function. Possible values:
+# lr                Learning rate
+# outActivation     User specified output activation function. Possible values:
 #                   "sigmoid", "relu", "lrelu", "tanh", "softmax", "logits" (no activation).
-# loss_fcn          User specified loss function. Possible values:
+# lossFn            User specified loss function. Possible values:
 #                   "l1", "l2", "log_loss", "logcosh_loss", "cel" (cross-entropy loss).
 # shuffle           Flag which indicates if dataset should be shuffled or not
-# validation_split  Fraction of training set used as validation set
+# validationSplit   Fraction of training set used as validation set
 # seed              Seed for model initialization
 # verbose           Flag which indicates if function should print to stdout
 # ------------------------------------------------------------------------------------------
@@ -65,9 +65,9 @@ source("nn/layers/softmax.dml") as softmax
 
 source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
 
-m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64, 
-  Integer epochs=20, Double learning_rate=0.003, String out_activation, 
-  String loss_fcn, Boolean shuffle=FALSE, Double validation_split = 0.0, 
+m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batchSize=64,
+  Integer epochs=20, Double lr=0.003, String outActivation,
+  String lossFn, Boolean shuffle=FALSE, Double validationSplit = 0.0,
   Integer seed=-1, Boolean verbose=FALSE) 
   return (List[unknown] model)
 {
@@ -81,9 +81,9 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
   }
 
   validation = FALSE
-  if(validation_split > 0.0) {
+  if(validationSplit > 0.0) {
     validation = TRUE
-    [X_train, Y_train, X_val, Y_val] = val_split(X, Y, validation_split)
+    [X_train, Y_train, X_val, Y_val] = val_split(X, Y, validationSplit)
     N = nrow(X_train)
   } else {
     X_train = X
@@ -97,7 +97,6 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
   [W2, b2] = affine::init(H1, t, seed)
 
   # Initialize SGD
-  lr = learning_rate
   mu = 0 
   decay = 0.99 
   vW1 = sgd_nesterov::init(W1)
@@ -105,9 +104,9 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
   vW2 = sgd_nesterov::init(W2) 
   vb2 = sgd_nesterov::init(b2)
 
-  iters = ceil(N / batch_size)
+  iters = ceil(N / batchSize)
 
-  batch = batch_size
+  batch = batchSize
   for (e in 1:epochs) {
     loss = 0
     val_loss = 0
@@ -125,17 +124,17 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
       # such that appropriate functions are applied. This is
       # advantage for the user which does not have to pass model 
       # and activation function as two arguments in predict method.
-      layers = list(W1=W1, b1=b1, W2=W2, b2=b2, activation=out_activation)
+      layers = list(W1=W1, b1=b1, W2=W2, b2=b2, activation=outActivation)
       cache = ff_pass::feedForward(X=X_batch, layers=layers)
 
       # Distinguish two cases when loss is calculated from the raw output
       # or from the output of the activation function.
-      if (out_activation != "logits") {
-        loss = loss + loss_forward(as.matrix(cache["outs2"]), Y_batch, loss_fcn)
-        dout2 = loss_backward(as.matrix(cache["outs2"]), Y_batch, loss_fcn)
+      if (outActivation != "logits") {
+        loss = loss + loss_forward(as.matrix(cache["outs2"]), Y_batch, lossFn)
+        dout2 = loss_backward(as.matrix(cache["outs2"]), Y_batch, lossFn)
       } else {
-        loss =  loss + loss_forward(as.matrix(cache["out2"]), Y_batch, loss_fcn)
-        dout2 = loss_backward(as.matrix(cache["out2"]), Y_batch, loss_fcn)
+        loss =  loss + loss_forward(as.matrix(cache["out2"]), Y_batch, lossFn)
+        dout2 = loss_backward(as.matrix(cache["out2"]), Y_batch, lossFn)
       }
 
       [dW1, db1, dW2, db2] = feed_backward(X_batch, layers, cache, dout2)
@@ -147,10 +146,10 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
 
       if(validation) {
         cache = ff_pass::feedForward(X=X_val, layers=layers)
-        if (out_activation != "logits")
-          val_loss = val_loss + loss_forward(as.matrix(cache["outs2"]), Y_val, loss_fcn)
+        if (outActivation != "logits")
+          val_loss = val_loss + loss_forward(as.matrix(cache["outs2"]), Y_val, lossFn)
         else
-          val_loss = val_loss + loss_forward(as.matrix(cache["out2"]), Y_val, loss_fcn)
+          val_loss = val_loss + loss_forward(as.matrix(cache["out2"]), Y_val, lossFn)
       }
     }
     
@@ -163,7 +162,7 @@ m_ffTrain = function(Matrix[double] X, Matrix[double] Y, Integer batch_size=64,
       print("Epoch: " + e + ", Train loss: " + loss/iters)
     }
   }
-  model = list(W1=W1, b1=b1, W2=W2, b2=b2, activation=out_activation)
+  model = list(W1=W1, b1=b1, W2=W2, b2=b2, activation=outActivation)
 }
 
 
@@ -198,32 +197,32 @@ apply_activation_backward = function(Matrix[double] dout, Matrix[double] X, Stri
   }
 }
 
-loss_forward = function(Matrix[double] prediction, Matrix[double] target, String loss_fcn)
+loss_forward = function(Matrix[double] prediction, Matrix[double] target, String lossFn)
   return(Double loss)
 {
-  if (loss_fcn == "l1") {
+  if (lossFn == "l1") {
     loss = l1_loss::forward(prediction, target)
-  } else if(loss_fcn == "l2") {
+  } else if(lossFn == "l2") {
     loss = l2_loss::forward(prediction, target)
-  } else if(loss_fcn == "log_loss") {
+  } else if(lossFn == "log_loss") {
     loss = log_loss::forward(prediction, target)
-  } else if(loss_fcn == "logcosh_loss") {
+  } else if(lossFn == "logcosh_loss") {
     loss = logcosh_loss::forward(prediction, target)
   } else {
     loss = cel::forward(prediction, target)
   }
 }
 
-loss_backward = function(Matrix[double] prediction, Matrix[double] target, String loss_fcn)
+loss_backward = function(Matrix[double] prediction, Matrix[double] target, String lossFn)
   return(Matrix[Double] dout)
 {
-  if (loss_fcn == "l1") {
+  if (lossFn == "l1") {
     dout = l1_loss::backward(prediction, target)
-  } else if(loss_fcn == "l2") {
+  } else if(lossFn == "l2") {
     dout = l2_loss::backward(prediction, target)
-  } else if(loss_fcn == "log_loss") {
+  } else if(lossFn == "log_loss") {
     dout = log_loss::backward(prediction, target)
-  } else if(loss_fcn == "logcosh_loss") {
+  } else if(lossFn == "logcosh_loss") {
     dout = logcosh_loss::backward(prediction, target)
   } else {
     dout = cel::backward(prediction, target)
diff --git a/scripts/builtin/fit_pipeline.dml b/scripts/builtin/fit_pipeline.dml
index dc6967b2cf9..d9b29e1a648 100644
--- a/scripts/builtin/fit_pipeline.dml
+++ b/scripts/builtin/fit_pipeline.dml
@@ -103,7 +103,7 @@ return (Matrix[Double] scores, Matrix[Double] cleanTrain, Matrix[Double] cleanTe
   if (allCombinations) {
   # Count number of subsets of consecutive primitives
     totCount = 0;
-    n = ncol(pip); 
+    n = ncol(pip);
     for (i in 1:n) {
       for (j in i:n)
         totCount = totCount + 1;
@@ -130,22 +130,22 @@ return (Matrix[Double] scores, Matrix[Double] cleanTrain, Matrix[Double] cleanTe
     startInd = as.scalar(primitives[r,1]);
     endInd = as.scalar(primitives[r,2]);
     # # # now test accuracy
-    [eXtrain_cl, eYtrain_cl, eXtest_cl, eYtest_cl, a, b, c, d, iState] = executePipeline(pipeline=pip, Xtrain=eXtrain, Ytrain=eYtrain,
+    [eXtrain_cl, eYtrain_cl, eXtest_cl, eYtest_cl, a, b, c, d, iState] = executePipeline(pipeline=pip, X=eXtrain, Y=eYtrain,
       Xtest=eXtest, Ytest=eYtest, metaList=metaList, hyperParameters=hp_matrix, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE, startInd=startInd, endInd=endInd)
-    
-    if(max(eYtrain_cl) == min(eYtrain_cl)) 
+
+    if(max(eYtrain_cl) == min(eYtrain_cl))
       stop("Y contains only one class")
 
     # score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp))
     # trainAccuracy = as.scalar(score[1, 1])
-    
+
     score = eval(evaluationFunc, list(X=eXtrain_cl, Y=eYtrain_cl, Xtest=eXtest_cl, Ytest=eYtest_cl, Xorig=as.matrix(0), evalFunHp=evalFunHp))
     testAccuracy = as.scalar(score[1, 1])
-    
+
     scores = matrix(0, rows=1, cols=3)
     scores[1, 1] = dirtyScore
     # scores[1, 2] = trainAccuracy
-    scores[1, 3] = testAccuracy  
+    scores[1, 3] = testAccuracy
     cleanTrain = cbind(eXtrain_cl, eYtrain_cl)
     cleanTest = cbind(eXtest, eYtest)
 
diff --git a/scripts/builtin/fixInvalidLengthsApply.dml b/scripts/builtin/fixInvalidLengthsApply.dml
index c6b30279064..245787e28ba 100644
--- a/scripts/builtin/fixInvalidLengthsApply.dml
+++ b/scripts/builtin/fixInvalidLengthsApply.dml
@@ -24,9 +24,9 @@
 # INPUT:
 # ------------------------
 # X      ---
-# mask   ---
-# ql     ---
-# qu     ---
+# Mask   ---
+# QL     ---
+# QU     ---
 # ------------------------
 #
 # OUTPUT:
@@ -35,15 +35,15 @@
 # M     ---
 # ------------------------
 
-f_fixInvalidLengthsApply = function(Frame[Unknown] X, Matrix[Double] mask, Matrix[Double] qLow, Matrix[Double] qUp)
+f_fixInvalidLengthsApply = function(Frame[Unknown] X, Matrix[Double] Mask, Matrix[Double] QL, Matrix[Double] QU)
 return (Frame[Unknown] X)
 {
 
   length = map(X, "x -> x.length()")
   length = as.matrix(length)
-  length = replace(target = (length * mask), pattern = NaN, replacement = 0)
-  M = ( length < qLow  | length > qUp)
-  # # # check if mask vector has 1 in more than one column
+  length = replace(target = (length * Mask), pattern = NaN, replacement = 0)
+  M = ( length < QL  | length > QU)
+  # # # check if Mask vector has 1 in more than one column
   # # # this indicates that two values are being swapped and can be fixed
   rowCountSwap = rowSums(M)  >= 2 
   rowCountDangling = rowSums(M) > 0 & rowSums(M) < 2
@@ -64,7 +64,7 @@ return (Frame[Unknown] X)
       tmp = X[rowIdx, id1]
       X[rowIdx, id1] = X[rowIdx, id2]
       X[rowIdx, id2] = tmp
-      # # remove the mask for fixed entries
+      # # remove the Mask for fixed entries
       M[rowIdx, id1] = 0
       M[rowIdx, id2] = 0
     }
@@ -82,7 +82,7 @@ return (Frame[Unknown] X)
       colIdx = removeEmpty(target = colIds[rowIdx], margin="cols")
       id1 = as.scalar(colIdx[1, 1])
       X[rowIdx, id1] = ""
-      # # remove the mask for fixed entries
+      # # remove the Mask for fixed entries
       M[rowIdx, id1] = 0
     }
   }
diff --git a/scripts/builtin/garch.dml b/scripts/builtin/garch.dml
index 58375de551c..7ece836c61e 100644
--- a/scripts/builtin/garch.dml
+++ b/scripts/builtin/garch.dml
@@ -30,13 +30,13 @@
 # INPUT:
 # -----------------------------------------------------------------------------------------
 # X                The input Matrix to apply Arima on.
-# kmax             Number of iterations
+# iter             Number of iterations
 # momentum         Momentum for momentum-gradient descent (set to 0 to deactivate)
-# start_stepsize   Initial gradient-descent stepsize
-# end_stepsize     gradient-descent stepsize at end (linear descent)
-# start_vicinity   proportion of randomness of restart-location for gradient descent at beginning
-# end_vicinity     same at end (linear decay)
-# sim_seed         seed for simulation of process on fitted coefficients
+# startStepsize    Initial gradient-descent stepsize
+# endStepsize      gradient-descent stepsize at end (linear descent)
+# startVicinity    proportion of randomness of restart-location for gradient descent at beginning
+# endVicinity      same at end (linear decay)
+# seed             seed for simulation of process on fitted coefficients
 # verbose          verbosity, comments during fitting
 # -----------------------------------------------------------------------------------------
 #
@@ -49,8 +49,8 @@
 # best_var_coef    1-st garch-coefficient of fitted process
 # --------------------------------------------------------------------------------------------------
 
-m_garch = function(Matrix[Double] X, Integer kmax, Double momentum, Double start_stepsize, Double end_stepsize, Double start_vicinity,
-  Double end_vicinity, Integer sim_seed, Boolean verbose)
+m_garch = function(Matrix[Double] X, Integer iter, Double momentum, Double startStepsize, Double endStepsize, Double startVicinity,
+  Double endVicinity, Integer seed, Boolean verbose)
 return (Matrix[Double] fitted_X, Matrix[Double] fitted_var_hist, Double best_a0, Double best_arch_coef, Double best_var_coef) {
 
   [a0, arch_coef, var_coef] = sample_feasible_params() # initialize startpoint
@@ -68,20 +68,20 @@ return (Matrix[Double] fitted_X, Matrix[Double] fitted_var_hist, Double best_a0,
   last_change_var_coef = 0
 
   # initialize stepsize (linear decay)
-  stepsize = start_stepsize
+  stepsize = startStepsize
 
   # initialize vicinity (linear decay)
-  vicinity = start_vicinity
+  vicinity = startVicinity
 
   # all coeffs need be >0 to provide a feasible solution; clip at this constant
   clip_at = 0.00001
 
   # do gradient descent
-  for (k in 1:kmax-1) {
+  for (k in 1:iter-1) {
     # update vicinity and stepsize
-    progress = k/kmax
-    stepsize = (1-progress) * start_stepsize + progress*end_stepsize
-    vicinity = (1-progress) * start_vicinity + progress*end_vicinity
+    progress = k/iter
+    stepsize = (1-progress) * startStepsize + progress*endStepsize
+    vicinity = (1-progress) * startVicinity + progress*endVicinity
 
     # get gradient
     [d_a0, d_arch_coef, d_var_coef] = gradient(X, a0, arch_coef, var_coef)
@@ -168,7 +168,7 @@ return (Matrix[Double] fitted_X, Matrix[Double] fitted_var_hist, Double best_a0,
 
   # simulate process from best solution
   sim_steps = nrow(X)
-  [fitted_X, fitted_var_hist] = sim_garch(best_a0, best_arch_coef, best_var_coef, sim_steps, sim_seed)
+  [fitted_X, fitted_var_hist] = sim_garch(best_a0, best_arch_coef, best_var_coef, sim_steps, seed)
 
   # logging: report output
   if (verbose) {
diff --git a/scripts/builtin/getAccuracy.dml b/scripts/builtin/getAccuracy.dml
index 6738d36e98f..14f1b427a9b 100644
--- a/scripts/builtin/getAccuracy.dml
+++ b/scripts/builtin/getAccuracy.dml
@@ -23,8 +23,8 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# y           Ground truth (Actual Labels)
-# yhat        Predictions (Predicted labels)
+# Y           Predictions (Predicted labels)
+# Ytest       Ground truth (Actual Labels)
 # isWeighted  Flag for weighted or non-weighted accuracy calculation
 # --------------------------------------------------------------------------------------
 #
@@ -33,23 +33,23 @@
 # accuracy  accuracy of the predicted labels
 # --------------------------------------------------------------------------------------------
 
-m_getAccuracy = function(Matrix[Double] y, Matrix[Double] yhat, Boolean isWeighted = FALSE)
+m_getAccuracy = function(Matrix[Double] Y, Matrix[Double] Ytest, Boolean isWeighted = FALSE)
 return (Double accuracy)
 {
   if(!isWeighted)
   {
-    sum = sum(y == yhat)
-    accuracy = (sum/nrow(y)) * 100
+    sum = sum(Ytest == Y)
+    accuracy = (sum/nrow(Ytest)) * 100
   }
   else 
   {
-    n = nrow(y)
-    classes = table(y, 1, max(y), 1)
-    resp = matrix(0, nrow(y), nrow(classes))
+    n = nrow(Ytest)
+    classes = table(Ytest, 1, max(Ytest), 1)
+    resp = matrix(0, nrow(Ytest), nrow(classes))
     resp = resp + t(seq(1, nrow(classes)))
 
-    respY = resp == y
-    respYhat = resp == yhat
+    respY = resp == Ytest
+    respYhat = resp == Y
     
     pred = respY * respYhat
     classes = replace(target = classes, pattern = 0, replacement = 1)
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
index c9512295648..b50c58edd04 100644
--- a/scripts/builtin/glm.dml
+++ b/scripts/builtin/glm.dml
@@ -35,7 +35,7 @@
 #   BETA_MIN_INDEX        Column index for the smallest beta value
 #   BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
 #   BETA_MAX_INDEX        Column index for the largest beta value
-#   INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+#   INTERCEPT             Intercept value, or NaN if there is no intercept (if intercept=0)
 #   DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
 #                         or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
 #   DISPERSION_EST        Dispersion estimated from the dataset
@@ -93,39 +93,39 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------------
-# X        matrix X of feature vectors
-# Y        matrix Y with either 1 or 2 columns:
-#          if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
-# dfam     Distribution family code: 1 = Power, 2 = Binomial
-# vpow     Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#          0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link     Link function code: 0 = canonical (depends on distribution),
-#          1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
-# lpow     Power for Link function defined as (mean)^power (ignored if link != 1):
-#          -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# yneg     Response value for Bernoulli "No" label, usually 0.0 or -1.0
-# icpt     Intercept presence, X columns shifting and rescaling:
-#          0 = no intercept, no shifting, no rescaling;
-#          1 = add intercept, but neither shift nor rescale X;
-#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg      Regularization parameter (lambda) for L2 regularization
-# tol      Tolerance (epsilon)
-# disp     (Over-)dispersion value, or 0.0 to estimate it from data
-# moi      Maximum number of outer (Newton / Fisher Scoring) iterations
-# mii      Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-# verbose  if the Algorithm should be verbose
+# X                  matrix X of feature vectors
+# Y                  matrix Y with either 1 or 2 columns:
+#                    if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
+# dfam               Distribution family code: 1 = Power, 2 = Binomial
+# vpow               Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#                    0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link               Link function code: 0 = canonical (depends on distribution),
+#                    1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
+# lpow               Power for Link function defined as (mean)^power (ignored if link != 1):
+#                    -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# yneg               Response value for Bernoulli "No" label, usually 0.0 or -1.0
+# intercept          Intercept presence, X columns shifting and rescaling:
+#                    0 = no intercept, no shifting, no rescaling;
+#                    1 = add intercept, but neither shift nor rescale X;
+#                    2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg                Regularization parameter (lambda) for L2 regularization
+# tol                Tolerance (epsilon)
+# disp               (Over-)dispersion value, or 0.0 to estimate it from data
+# maxIter            Maximum number of outer (Newton / Fisher Scoring) iterations
+# maxInnerIter       Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+# verbose            if the Algorithm should be verbose
 # ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # --------------------------------------------------------------------------------------------
-# beta  Matrix beta, whose size depends on icpt:
-#       icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+# beta  Matrix beta, whose size depends on intercept:
+#       intercept=0: ncol(X) x 1;  intercept=1: (ncol(X) + 1) x 1;  intercept=2: (ncol(X) + 1) x 2
 #---------------------------------------------------------------------------------------------
 
 m_glm = function(Matrix[Double] X, Matrix[Double] Y, Integer dfam=1, 
   Double vpow=0.0, Integer link=0, Double lpow=1.0, Double yneg=0.0,
-  Integer icpt = 0, Double disp=0.0, Double reg=0.0, Double tol=0.000001,
-  Integer moi=200, Integer mii=0, Boolean verbose=TRUE)
+  Integer intercept = 0, Double disp=0.0, Double reg=0.0, Double tol=0.000001,
+  Integer maxIter=200, Integer maxInnerIter=0, Boolean verbose=TRUE)
   return(Matrix[Double] betas)
 { 
   distribution_type = dfam;
@@ -133,12 +133,12 @@ m_glm = function(Matrix[Double] X, Matrix[Double] Y, Integer dfam=1,
   link_type = link;
   link_as_power_of_the_mean = lpow;
   bernoulli_No_label = yneg;
-  intercept_status = icpt;
+  intercept_status = intercept;
   dispersion = disp;
   regularization = reg;
   eps = tol;
-  max_iteration_IRLS = moi;
-  max_iteration_CG = mii;
+  max_iteration_IRLS = maxIter;
+  max_iteration_CG = maxInnerIter;
 
   # variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
   # link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
diff --git a/scripts/builtin/glmPredict.dml b/scripts/builtin/glmPredict.dml
index c5f3a63a8cf..fd2c27210a2 100644
--- a/scripts/builtin/glmPredict.dml
+++ b/scripts/builtin/glmPredict.dml
@@ -65,7 +65,7 @@
 #          ncol(X)   x k: do not add intercept
 #          ncol(X)+1 x k: add intercept as given by the last B-row
 #          if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
-# ytest    Response matrix Y, with the following dimensions:
+# Ytest    Response matrix Y, with the following dimensions:
 #          nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
 #          nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
 #          nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
@@ -89,7 +89,7 @@
 # -------------------------------------------------------------------------
 
 
-m_glmPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] ytest=matrix(0,0,0),
+m_glmPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] Ytest=matrix(0,0,0),
   Boolean intercept = FALSE, Integer dfam=1, Double vpow=0.0, Integer link=0, Double lpow=1.0,
   Double disp=1.0, Boolean verbose=TRUE)
   return(Matrix[Double] M)
@@ -136,9 +136,9 @@ m_glmPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] ytest
     
   M = means;
 
-  if (nrow(ytest) > 0)
+  if (nrow(Ytest) > 0)
   {
-    Y = ytest;
+    Y = Ytest;
     ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
 
     # Statistics To Compute:
diff --git a/scripts/builtin/gmm.dml b/scripts/builtin/gmm.dml
index d7c911d00e8..9675c7b3c29 100644
--- a/scripts/builtin/gmm.dml
+++ b/scripts/builtin/gmm.dml
@@ -26,16 +26,16 @@
 # INPUT:
 # ---------------------------------------------------------------------------------------
 # X             Dataset input to fit the GMM model
-# n_components  Number of components to use in the Gaussian mixture model
+# nComponents   Number of components to use in the Gaussian mixture model
 # model         "VVV": unequal variance (full),each component has its own general covariance matrix
 #               "EEE": equal variance (tied), all components share the same general covariance matrix
 #               "VVI": spherical, unequal volume (diag), each component has its own diagonal
 #               covariance matrix
 #               "VII": spherical, equal volume (spherical), each component has its own single variance
-# init_param    Initialization algorithm to use to initialize the gaussian weights, valid inputs are:
+# initParams    Initialization algorithm to use to initialize the gaussian weights, valid inputs are:
 #               "kmeans" or "random"
-# iterations    Number of iterations
-# reg_covar     Regularization parameter for covariance matrix
+# maxIter       Number of iterations
+# reg           Regularization parameter for covariance matrix
 # tol           Tolerance value for convergence
 # seed          The seed value to initialize the values for fitting the GMM.
 # ---------------------------------------------------------------------------------------
@@ -53,8 +53,8 @@
 #               that observation i in the test data belongs to the kth class
 # -----------------------------------------------------------------------------------------------
 
-m_gmm = function(Matrix[Double] X, Integer n_components = 3, String model = "VVV", String init_params = "kmeans", 
-  Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Integer seed = -1, Boolean verbose = FALSE )
+m_gmm = function(Matrix[Double] X, Integer nComponents = 3, String model = "VVV", String initParams = "kmeans",
+  Integer maxIter = 100, Double reg = 1e-6, Double tol = 0.000001, Integer seed = -1, Boolean verbose = FALSE )
 return (Matrix[Double] labels, Matrix[Double] predict_prob, Integer df, Double bic, 
   Matrix[Double] mu, Matrix[Double] prec_chol, Matrix[Double] weight)
 { 
@@ -62,26 +62,26 @@ return (Matrix[Double] labels, Matrix[Double] predict_prob, Integer df, Double b
   if(model != "VVV" & model != "EEE" &    model != "VVI" & model != "VII")
     stop("model not supported, should be in VVV, EEE, VVI, VII");
 
-  [labels, predict_prob, norm, mu, prec_chol, weight] = fit(X, n_components, 
-    model, init_params, iter, reg_covar, tol, seed,  verbose)
-  df = estimate_free_param(n_components, ncol(X), model)
+  [labels, predict_prob, norm, mu, prec_chol, weight] = fit(X, nComponents,
+    model, initParams, maxIter, reg, tol, seed,  verbose)
+  df = estimate_free_param(nComponents, ncol(X), model)
   bic = getBIC(nrow(X), norm, df)
 }
 
-fit = function(Matrix[Double] X, Integer n_components, String model, String init_params,
-  Integer iter, Double reg_covar, Double tol, Integer seed, Boolean verbose)
+fit = function(Matrix[Double] X, Integer nComponents, String model, String initParams,
+  Integer maxIter, Double reg, Double tol, Integer seed, Boolean verbose)
 return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm,
     Matrix[Double] mean, Matrix[Double] precision_chol, Matrix[Double] weight)
 {
   et = FALSE
   lower_bound = 0
   converged = FALSE
-  [weight, mean, sigma, precision_chol] = initialize_param(X, n_components,init_params, model, reg_covar, tol, seed)
+  [weight, mean, sigma, precision_chol] = initialize_param(X, nComponents,initParams, model, reg, tol, seed)
   i = 1
-  while(i <= iter & !converged & !et) {
+  while(i <= maxIter & !converged & !et) {
     prev_lower_bound = lower_bound
     [log_prob_norm, log_resp, weighted_log_prob] = e_step(X, weight, mean, precision_chol, model)
-    [weight, mean, sigma, precision_chol, et] = m_step(X, log_resp, n_components, model, reg_covar)
+    [weight, mean, sigma, precision_chol, et] = m_step(X, log_resp, nComponents, model, reg)
     lower_bound = log_prob_norm
     change = lower_bound - prev_lower_bound
     converged = (abs(change) < tol)
@@ -95,7 +95,7 @@ return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm,
   if(et) {
     print("warning: did not converge because some components have ill-defined empirical covariance 
     (i.e., singleton matrix or non-symmetric).
-    \nTry to decrease the number of components, or increase reg_covar")
+    \nTry to decrease the number of components, or increase reg")
     label = rowIndexMax(weighted_log_prob)
     predict_prob = exp(log_resp)
   }
@@ -107,49 +107,49 @@ return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm,
 
 }
 
-initialize_param = function(Matrix[Double] X, Integer n_components, String init_params, 
-  String model, Double reg_covar, Double tol , Integer seed)
+initialize_param = function(Matrix[Double] X, Integer nComponents, String initParams,
+  String model, Double reg, Double tol , Integer seed)
 return (Matrix[Double] weight, Matrix[Double] mean, Matrix[Double] sigma, Matrix[Double] precision_chol) 
 {
-  # create responsibility matrix, resp[n_samples, n_components]
-  resp = matrix(0, nrow(X), n_components)
-  if(init_params == "kmeans") {
-    [C, Y] = kmeans(X=X, k=n_components, runs=10,
-      eps=tol, is_verbose=FALSE, avg_sample_size_per_centroid=100, seed=seed)
-    resp = ((resp + t(seq(1, n_components))) == Y)
+  # create responsibility matrix, resp[n_samples, nComponents]
+  resp = matrix(0, nrow(X), nComponents)
+  if(initParams == "kmeans") {
+    [C, Y] = kmeans(X=X, k=nComponents, runs=10,
+      tol=tol, verbose=FALSE, avgSampleSizePerCentroid=100, seed=seed)
+    resp = ((resp + t(seq(1, nComponents))) == Y)
   }
-  else if(init_params == "random") {
-    resp = Rand(rows = nrow(X), cols=n_components, seed=seed)
+  else if(initParams == "random") {
+    resp = Rand(rows = nrow(X), cols=nComponents, seed=seed)
     resp = resp/rowSums(resp)
   }
-  else stop("invalid parameter value, expected kmeans or random found "+init_params) 
+  else stop("invalid parameter value, expected kmeans or random found "+initParams)
   
-  [weight, mean, sigma] = estimate_gaussian_param(X, resp, n_components, model, reg_covar)
+  [weight, mean, sigma] = estimate_gaussian_param(X, resp, nComponents, model, reg)
   weight = weight/nrow(X)
-  [precision_chol, et] = compute_precision_cholesky(sigma, model, n_components)
+  [precision_chol, et] = compute_precision_cholesky(sigma, model, nComponents)
   if(et)
     stop("Fitting the mixture model failed because some components have ill-defined empirical covariance 
     (i.e., singleton matrix or non-symmetric).
-    \nTry to decrease the number of components, or increase reg_covar")
+    \nTry to decrease the number of components, or increase reg")
 }
 
 estimate_gaussian_param = function(Matrix[Double] X, Matrix[Double] resp, 
-  Integer n_components, String model, Double reg_covar)
+  Integer nComponents, String model, Double reg)
 return (Matrix[Double] weight, Matrix[Double] mean, Matrix[Double] sigma)
 {
   MACHINE_PRECISION = 2.22e-16
   # estimate Gaussian parameter
   weight = colSums(resp) + MACHINE_PRECISION # adding machine precision 
-  mean = (t(resp) %*% X) / t(weight) # mean dims:  n_components * n_features
+  mean = (t(resp) %*% X) / t(weight) # mean dims:  nComponents * n_features
   
   if(model == "VVV") {
-  # output: (sigma a list of length = n_components where each item in list is a covariance matrix of (
+  # output: (sigma a list of length = nComponents where each item in list is a covariance matrix of (
   # n_features * n_features) dimensions) all rbind in a matrix form
     sigma = matrix(0, 0, ncol(X))
     for(k in 1:nrow(mean)) {
       diff = X - mean[k,]
       cov = (t(diff * resp[, k]) %*% diff) / as.scalar(weight[1,k])
-      cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
+      cov = cov + diag(matrix(reg, ncol(cov), 1))
       sigma = rbind(sigma, cov)
     }
   }
@@ -160,35 +160,35 @@ return (Matrix[Double] weight, Matrix[Double] mean, Matrix[Double] sigma)
     avgMean = (t(mean) * weight) %*% mean
     cov = avgX2 - avgMean
     cov = cov / sum(weight)
-    cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
+    cov = cov + diag(matrix(reg, ncol(cov), 1))
     sigma = cov
   }
   else if(model ==  "VVI") {
-  # output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_components * n_features) dimensions)
+  # output: (sigma a list of length = 1 where item in list is a covariance matrix of (nComponents * n_features) dimensions)
     avgX2 = (t(resp) %*% (X*X)) / t(weight)
     avgMean = mean ^ 2
     avgMean2 = mean * (t(resp) %*% X) / t(weight)
-    cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
+    cov = avgX2 - 2 * avgMean + avgMean2 + reg
     sigma = cov
   }
   else if (model == "VII") {
-  # output: (sigma a list of length = 1 where item in list is a variance value for each component (1* n_components) dimensions)
+  # output: (sigma a list of length = 1 where item in list is a variance value for each component (1* nComponents) dimensions)
     avgX2 = (t(resp) %*% (X*X)) / t(weight)
     avgMean = mean ^ 2
     avgMean2 = mean * (t(resp) %*% X) / t(weight)
-    cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
+    cov = avgX2 - 2 * avgMean + avgMean2 + reg
     sigma = rowMeans(cov)
   }
 }
 
-compute_precision_cholesky = function(Matrix[Double] sigma, String model, Integer n_components)
+compute_precision_cholesky = function(Matrix[Double] sigma, String model, Integer nComponents)
 return (Matrix[Double] precision_chol, Boolean earlyTermination )
 {
   earlyTermination = FALSE
   if(model == "VVV") {
     index = 1; k = 1
     precision_chol = matrix(0, 0, ncol(sigma))
-    while(k <= n_components) {
+    while(k <= nComponents) {
       cov = sigma[index:(ncol(sigma)*k), ]
       isSPD = checkSPD(cov)
       if(isSPD) {
@@ -199,7 +199,7 @@ return (Matrix[Double] precision_chol, Boolean earlyTermination )
         k = k+1
       } else {
         earlyTermination = TRUE;
-        k = n_components + 1
+        k = nComponents + 1
       }
     }
   }
@@ -225,34 +225,34 @@ return (Matrix[Double] precision_chol, Boolean earlyTermination )
 
 # Expectation step
 e_step = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu,
-  Matrix[Double] precisions_cholesky, String model)
+  Matrix[Double] precisions, String model)
   return(Double norm, Matrix[Double] log_resp, Matrix[Double] weighted_log_prob)
 {
-  weighted_log_prob =  estimate_log_gaussian_prob(X, mu, precisions_cholesky, model) + log(w)
+  weighted_log_prob =  estimate_log_gaussian_prob(X, mu, precisions, model) + log(w)
   log_prob_norm = logSumExp(weighted_log_prob, "rows")
   log_resp = weighted_log_prob - log_prob_norm
   norm = mean(log_prob_norm)
 }
 
 # maximization Step
-m_step = function(Matrix[Double] X, Matrix[Double] log_resp, Integer n_components, String model, Double reg_covar)
+m_step = function(Matrix[Double] X, Matrix[Double] logResp, Integer nComponents, String model, Double reg)
   return (Matrix[Double] weight, Matrix[Double] mean, Matrix[Double] sigma, Matrix[Double] precision_chol, Boolean et) 
 {
-  [weight, mean, sigma] = estimate_gaussian_param(X, exp(log_resp), n_components, model, reg_covar)
+  [weight, mean, sigma] = estimate_gaussian_param(X, exp(logResp), nComponents, model, reg)
   weight = weight/nrow(X)
-  [precision_chol, et] = compute_precision_cholesky(sigma, model, n_components)
+  [precision_chol, et] = compute_precision_cholesky(sigma, model, nComponents)
 }
 
 estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, Matrix[Double] prec_chol, String model)
-  return(Matrix[Double] es_log_prob ) # nrow(X) * n_components
+  return(Matrix[Double] es_log_prob ) # nrow(X) * nComponents
 {
-  n_components = nrow(mu)
+  nComponents = nrow(mu)
 
   log_det = compute_log_det_cholesky(prec_chol, model, ncol(X))
   if(model == "VVV") {
-    log_prob = matrix(0, nrow(X), n_components) 
+    log_prob = matrix(0, nrow(X), nComponents)
     i = 1
-    for(k in 1:n_components) {
+    for(k in 1:nComponents) {
       prec = prec_chol[i:(k*ncol(X)),]
       y = X %*% prec - mu[k,] %*% prec  # changing here t intro:  y = X %*% prec - mu[k,] %*% prec 
       log_prob[, k] = rowSums(y*y)
@@ -260,9 +260,9 @@ estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, Matri
     }
   }
   else if(model == "EEE") {
-    log_prob = matrix(0, nrow(X), n_components)
+    log_prob = matrix(0, nrow(X), nComponents)
     prec = prec_chol
-    for(k in 1:n_components) {
+    for(k in 1:nComponents) {
       y = X %*% prec - mu[k,] %*% prec
       log_prob[, k] = rowSums(y*y) # TODO replace y*y with squared built-in
     }
@@ -314,22 +314,22 @@ compute_log_det_cholesky = function(Matrix[Double] mat_chol, String model, Integ
 }
 
 # compute the number of estimated parameters
-estimate_free_param = function(Integer n_components, Integer n_features, String model)
+estimate_free_param = function(Integer nComponents, Integer n_features, String model)
   return (Integer n_parameters)
 {
   if(model == "VVV")
-    cov_param = n_components * n_features * (n_features + 1) / 2
+    cov_param = nComponents * n_features * (n_features + 1) / 2
   else if(model == "EEE")
     cov_param = n_features * (n_features + 1) / 2
   else if (model == "VVI")
-    cov_param = n_components * n_features
+    cov_param = nComponents * n_features
   else if (model == "VII")
-    cov_param = n_components
+    cov_param = nComponents
   else
     stop("invalid model expecting any of [VVV,EEE,VVI,VII], found "+model)
-  mean_param = n_features * n_components
+  mean_param = n_features * nComponents
   
-  n_parameters = as.integer( cov_param + mean_param + n_components - 1 )
+  n_parameters = as.integer( cov_param + mean_param + nComponents - 1 )
 }
 
 getBIC = function(Integer n, Double norm, Integer df)
diff --git a/scripts/builtin/gmmPredict.dml b/scripts/builtin/gmmPredict.dml
index d39d4524eaa..cd2ffc7baed 100644
--- a/scripts/builtin/gmmPredict.dml
+++ b/scripts/builtin/gmmPredict.dml
@@ -25,11 +25,11 @@
 # INPUT:
 # ------------------------------------------------------------------------------------------
 # X                     Dataset input to predict the labels from
-# weight                Weight of learned model:
+# W                     Weights of learned model:
 #                       A matrix whose [i,k]th entry is the probability
 #                       that observation i in the test data belongs to the kth class
-# mu                    Fitted clusters mean
-# precisions_cholesky   Fitted precision matrix for each mixture
+# Mu                    Fitted clusters mean
+# Precisions            Fitted precision cholesky matrix for each mixture
 # model                 "VVV": unequal variance (full),each component has its own general covariance matrix
 #                       "EEE": equal variance (tied), all components share the same general covariance matrix
 #                       "VVI": spherical, unequal volume (diag), each component has its own diagonal
@@ -43,23 +43,23 @@
 # predict_prob   Probability of the predictions given the X input dataset
 # ---------------------------------------------------------------------------------------------------
 
-m_gmmPredict = function(Matrix[Double] X, Matrix[Double] weight,
-  Matrix[Double] mu, Matrix[Double] precisions_cholesky, String model = "VVV")
+m_gmmPredict = function(Matrix[Double] X, Matrix[Double] W,
+  Matrix[Double] Mu, Matrix[Double] Precisions, String model = "VVV")
   return(Matrix[Double] labels, Matrix[Double] predict_prob)
 {
   # compute the posterior probabilities for new instances
-  weighted_log_prob =  compute_log_gaussian_prob(X, mu, precisions_cholesky, model) + log(weight)
+  weighted_log_prob =  compute_log_gaussian_prob(X, Mu, Precisions, model) + log(W)
   log_prob_norm = logSumExp(weighted_log_prob, "rows")
   log_resp = weighted_log_prob - log_prob_norm
   predict_prob = exp(log_resp)
   labels =  rowIndexMax(weighted_log_prob)
 }
 
-compute_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu,
+compute_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] Mu,
   Matrix[Double] prec_chol, String model)
   return(Matrix[Double] es_log_prob ) # nrow(X) * n_components
 {
-  n_components = nrow(mu)
+  n_components = nrow(Mu)
   d = ncol(X)
 
   if(model == "VVV") { 
@@ -68,7 +68,7 @@ compute_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu,
     i = 1
     for(k in 1:n_components) {
       prec = prec_chol[i:(k*ncol(X)),]
-      y = X %*% prec - mu[k,] %*% prec
+      y = X %*% prec - Mu[k,] %*% prec
       log_prob[, k] = rowSums(y*y)
       # compute log_det_cholesky
       log_det = sum(log(diag(t(prec))))
@@ -81,7 +81,7 @@ compute_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu,
     log_det_chol = as.matrix(sum(log(diag(prec_chol))))
     prec = prec_chol
     for(k in 1:n_components) {
-      y = X %*% prec - mu[k,] %*% prec
+      y = X %*% prec - Mu[k,] %*% prec
       log_prob[, k] = rowSums(y*y) 
     }
   }
@@ -89,17 +89,17 @@ compute_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu,
     log_det_chol = t(rowSums(log(prec_chol)))
     prec = prec_chol
     precisions = prec^2
-    bc_matrix = matrix(1,nrow(X), nrow(mu))
-    log_prob = (bc_matrix*t(rowSums(mu^2 * precisions)) 
-      - 2 * (X %*% t(mu * precisions)) + X^2 %*% t(precisions))
+    bc_matrix = matrix(1,nrow(X), nrow(Mu))
+    log_prob = (bc_matrix*t(rowSums(Mu^2 * precisions))
+      - 2 * (X %*% t(Mu * precisions)) + X^2 %*% t(precisions))
   }
   else if (model == "VII") {
     log_det_chol = t(d * log(prec_chol))
     prec = prec_chol
     precisions = prec^ 2
-    bc_matrix = matrix(1,nrow(X), nrow(mu))
-    log_prob = (bc_matrix * t(rowSums(mu^2) * precisions) 
-      - 2 * X %*% t(mu * precisions) + rowSums(X*X) %*% t(precisions) )
+    bc_matrix = matrix(1,nrow(X), nrow(Mu))
+    log_prob = (bc_matrix * t(rowSums(Mu^2) * precisions)
+      - 2 * X %*% t(Mu * precisions) + rowSums(X*X) %*% t(precisions) )
   }
   if(ncol(log_det_chol) == 1)
     log_det_chol = matrix(1, 1, ncol(log_prob)) * log_det_chol
diff --git a/scripts/builtin/gnmf.dml b/scripts/builtin/gnmf.dml
index abd6659c801..50c7fe6b401 100644
--- a/scripts/builtin/gnmf.dml
+++ b/scripts/builtin/gnmf.dml
@@ -30,29 +30,29 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X      Matrix of feature vectors.
-# rnk    Number of components into which matrix X is to be factored
-# eps    Tolerance
-# maxi   Maximum number of conjugate gradient iterations
+# X               Matrix of feature vectors.
+# rank            Number of components into which matrix X is to be factored
+# tol             Tolerance
+# maxIter         Maximum number of conjugate gradient iterations
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # --------------------------------------------------------------------------------------
-# W     List of pattern matrices, one for each repetition
-# H     List of amplitude matrices, one for each repetition
+# W               List of pattern matrices, one for each repetition
+# H               List of amplitude matrices, one for each repetition
 # --------------------------------------------------------------------------------------
 
-m_gnmf = function(Matrix[Double] X, Integer rnk, Double eps = 1e-8, Integer maxi = 10)
+m_gnmf = function(Matrix[Double] X, Integer rank, Double tol = 1e-8, Integer maxIter = 10)
   return (Matrix[Double] W, Matrix[Double] H) 
 {
   #initialize W and H
-  W = rand(rows=nrow(X), cols=rnk, min=-0.05, max=0.05);
-  H = rand(rows=rnk, cols=ncol(X), min=-0.05, max=0.05);
+  W = rand(rows=nrow(X), cols=rank, min=-0.05, max=0.05);
+  H = rand(rows=rank, cols=ncol(X), min=-0.05, max=0.05);
 
   i = 0;
-  while(i < maxi) {
-    H = H * ((t(W) %*% X) / (((t(W) %*% W) %*% H)+eps));
-    W = W * ((X %*% t(H)) / ((W %*% (H %*% t(H)))+eps));
+  while(i < maxIter) {
+    H = H * ((t(W) %*% X) / (((t(W) %*% W) %*% H)+tol));
+    W = W * ((X %*% t(H)) / ((W %*% (H %*% t(H)))+tol));
     i = i + 1;
   }
 }
diff --git a/scripts/builtin/gridSearch.dml b/scripts/builtin/gridSearch.dml
index 9ef50895a9d..877c929c2d4 100644
--- a/scripts/builtin/gridSearch.dml
+++ b/scripts/builtin/gridSearch.dml
@@ -60,7 +60,7 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
 {
   # Step 0) handling default arguments, which require access to passed data
   if( length(trainArgs) == 0 )
-    trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1);
+    trainArgs = list(X=X, y=y, intercept=0, reg=-1, tol=-1, maxIter=-1);
   if( length(dataArgs) == 0 )
     dataArgs = list("X", "y");
   if( length(predictArgs) == 0 )
diff --git a/scripts/builtin/hyperband.dml b/scripts/builtin/hyperband.dml
index 4eede73070d..c6fcf89f86c 100644
--- a/scripts/builtin/hyperband.dml
+++ b/scripts/builtin/hyperband.dml
@@ -29,10 +29,10 @@
 #
 # INPUT:
 # ------------------------------------------------------------------------------------------
-# X_train      Input Matrix of training vectors
-# y_train      Labels for training vectors
-# X_val        Input Matrix of validation vectors
-# y_val        Labels for validation vectors
+# X            Input Matrix of training vectors
+# Y            Labels for training vectors
+# Xtest        Input Matrix of validation vectors
+# Ytest        Labels for validation vectors
 # params       List of parameters to optimize
 # paramRanges  The min and max values for the uniform distributions to draw from.
 #              One row per hyper parameter, first column specifies min, second column max value.
@@ -47,8 +47,8 @@
 # bestHyperParams  hyper parameters of best performing candidate
 # ----------------------------------------------------------------------------------------------
 
-m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
-  Matrix[Double] X_val, Matrix[Double] y_val, List[String] params, 
+m_hyperband = function(Matrix[Double] X, Matrix[Double] Y,
+  Matrix[Double] Xtest, Matrix[Double] Ytest, List[String] params,
   Matrix[Double] paramRanges, Scalar[int] R = 81, Scalar[int] eta = 3, 
   Boolean verbose = TRUE) 
   return (Matrix[Double] bestWeights, Frame[Unknown] bestHyperParams) 
@@ -59,15 +59,15 @@ m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
   
   assert(numParams == nrow(paramRanges));
   assert(ncol(paramRanges) == 2);
-  assert(nrow(X_train) == nrow(y_train));
-  assert(nrow(X_val) == nrow(y_val));
-  assert(ncol(X_train) == ncol(X_val));
-  assert(ncol(y_train) == ncol(y_val));
+  assert(nrow(X) == nrow(Y));
+  assert(nrow(Xtest) == nrow(Ytest));
+  assert(ncol(X) == ncol(Xtest));
+  assert(ncol(Y) == ncol(Ytest));
 
   s_max = floor(log(R,eta));
   B = (s_max + 1) * R;
   bracketWinners = matrix(0, s_max+1, numParams+1);
-  winnerWeights = matrix(0, s_max+1, ncol(X_train));
+  winnerWeights = matrix(0, s_max+1, ncol(X));
 
   parfor( s in s_max:0 ) {
     debugMsgs = "--------------------------";
@@ -80,7 +80,7 @@ m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
     r = R * eta^(-s);
     
     scoreboard = matrix(0,n,1+numParams);
-    candidateWeights = matrix(0,n,ncol(X_train));
+    candidateWeights = matrix(0,n,ncol(X));
     # candidateWeights is not read until last round, as models are retrained 
     # from zero in every trial at the moment
 
@@ -112,7 +112,7 @@ m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
         # TODO argument list has to be passed from outside as well
         # args is a residue from the implementation with eval("lmCG", args)
         # init argument list
-        args = list(X=X_train, y=y_train, icpt=0, reg=1e-7, 
+        args = list(X=X, y=Y, icpt=0, reg=1e-7,
           tol=1e-7, maxi=r_i, verbose=TRUE);
           
         for( curParam in 1:numParams ) {
@@ -128,12 +128,12 @@ m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
         # prone depending on the order of the list. hyper parameters to optimize
         # are taken from args, as there they are reordered to be invariant to the
         # order used at calling hyperband
-        weights = eval("lmCG", list(X=X_train, y=y_train, icpt=0, 
-          tol=as.scalar(args[1]), reg=as.scalar(args[2]), maxi=r_i, verbose=FALSE));
+        weights = eval("lmCG", list(X=X, y=Y, intercept=0,
+          tol=as.scalar(args[1]), reg=as.scalar(args[2]), maxIter=r_i, verbose=FALSE));
         
         candidateWeights[curCandidate] = t(weights)
-        preds = lmPredict(X=X_val, B=weights, ytest= matrix(0,1,1));
-        scoreboard[curCandidate,1] = as.matrix(sum((y_val - preds)^2));
+        preds = lmPredict(X=Xtest, B=weights, Ytest= matrix(0,1,1));
+        scoreboard[curCandidate,1] = as.matrix(sum((Ytest - preds)^2));
       }
       
       # reorder both matrices by same order
diff --git a/scripts/builtin/img_brightness.dml b/scripts/builtin/img_brightness.dml
index 100ccb7588b..4a8d5012545 100644
--- a/scripts/builtin/img_brightness.dml
+++ b/scripts/builtin/img_brightness.dml
@@ -23,9 +23,9 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------------
-# img_in       Input matrix/image
+# img          Input matrix/image
 # value        The amount of brightness to be changed for the image
-# channel_max  Maximum value of the brightness of the image
+# maxValue     Maximum channel value of the brightness of the image
 # -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -33,7 +33,7 @@
 # img_out  Output matrix/image
 # ----------------------------------------------------------------------------------------------------------------------
 
-m_img_brightness = function(Matrix[Double] img_in, Double value, Integer channel_max) return (Matrix[Double] img_out) {
+m_img_brightness = function(Matrix[Double] img, Double value, Integer maxValue) return (Matrix[Double] img_out) {
   # change the brightness of an image
-  img_out = max(0, min(img_in + value, channel_max))
+  img_out = max(0, min(img + value, maxValue))
 }
diff --git a/scripts/builtin/img_brightness_linearized.dml b/scripts/builtin/img_brightness_linearized.dml
index 8c5e72d13f9..288f5dd3a60 100644
--- a/scripts/builtin/img_brightness_linearized.dml
+++ b/scripts/builtin/img_brightness_linearized.dml
@@ -23,9 +23,9 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------------
-# img_in       Input matrix/image (can represent multiple images every row of the matrix represents a linearized image)
+# img          Input matrix/image (can represent multiple images every row of the matrix represents a linearized image)
 # value        The amount of brightness to be changed for the image
-# channel_max  Maximum value of the brightness of the image
+# maxValue     Maximum channel value of the brightness of the image
 # -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -33,6 +33,6 @@
 # img_out  Output matrix/images  (every row of the matrix represents a linearized image)
 # ----------------------------------------------------------------------------------------------------------------------
 
-m_img_brightness_linearized = function(Matrix[Double] img_in, Double value, Integer channel_max) return (Matrix[Double] img_out) {
-  img_out = img_brightness(img_in,value,channel_max)
+m_img_brightness_linearized = function(Matrix[Double] img, Double value, Integer maxValue) return (Matrix[Double] img_out) {
+  img_out = img_brightness(img,value,maxValue)
 }
diff --git a/scripts/builtin/img_crop.dml b/scripts/builtin/img_crop.dml
index e85301f8bb6..94c52e84f5a 100644
--- a/scripts/builtin/img_crop.dml
+++ b/scripts/builtin/img_crop.dml
@@ -23,11 +23,11 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------
-# img_in    Input matrix/image
+# img       Input matrix/image
 # w         The width of the subregion required
 # h         The height of the subregion required
-# x_offset  The horizontal coordinate in the image to begin the crop operation
-# y_offset  The vertical coordinate in the image to begin the crop operation
+# offsetX   The horizontal coordinate in the image to begin the crop operation
+# offsetY   The vertical coordinate in the image to begin the crop operation
 # ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -35,25 +35,25 @@
 # img_out  Cropped matrix/image
 # --------------------------------------------------------------------------------------------------
 
-m_img_crop = function(Matrix[Double] img_in, Integer w, Integer h, Integer x_offset, Integer y_offset) return (Matrix[Double] img_out) {
+m_img_crop = function(Matrix[Double] img, Integer w, Integer h, Integer offsetX, Integer offsetY) return (Matrix[Double] img_out) {
   # crop - cut out a subregion of an image. Adapted from image_utils.dml
-  orig_w = ncol(img_in)
-  orig_h = nrow(img_in)
+  orig_w = ncol(img)
+  orig_h = nrow(img)
 
-  start_h = (ceil((orig_h - h) / 2)) + y_offset
+  start_h = (ceil((orig_h - h) / 2)) + offsetY
   end_h = (start_h + h - 1) 
-  start_w = (ceil((orig_w - w) / 2)) + x_offset
+  start_w = (ceil((orig_w - w) / 2)) + offsetX
   end_w = (start_w + w - 1) 
 
   if((start_h < 0) | (end_h > orig_h) | (start_w < 0) | (end_w > orig_w)) {
     print("Offset out of bounds! Returning input.")
-    img_out = img_in
+    img_out = img
   }
   else {
     mask = matrix(0, rows=orig_h, cols=orig_w)
     temp_mask = matrix(1, rows=h , cols=w )
     mask[start_h:end_h, start_w:end_w] = temp_mask
     mask = matrix(mask, rows=1, cols=orig_w * orig_h)
-    img_out = matrix(removeEmpty(target=(matrix(img_in+1, 1, orig_w * orig_h)), margin="cols", select=mask) - 1, h, w)
+    img_out = matrix(removeEmpty(target=(matrix(img+1, 1, orig_w * orig_h)), margin="cols", select=mask) - 1, h, w)
   }
 }
diff --git a/scripts/builtin/img_crop_linearized.dml b/scripts/builtin/img_crop_linearized.dml
index b2c2c03fd18..3f577adca49 100644
--- a/scripts/builtin/img_crop_linearized.dml
+++ b/scripts/builtin/img_crop_linearized.dml
@@ -23,13 +23,13 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------
-# img_in     Linearized input images as 2D matrix
+# img        Linearized input images as 2D matrix
 # w          The width of the subregion required
 # h          The height of the subregion required
-# x_offset   The horizontal offset for the center of the crop region
-# y_offset   The vertical offset for the center of the crop region
-# s_cols     Width of a single image
-# s_rows     Height of a single image
+# offsetX    The horizontal offset for the center of the crop region
+# offsetY    The vertical offset for the center of the crop region
+# sW         Width of a single image
+# sH         Height of a single image
 # ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -37,22 +37,22 @@
 # img_out     Cropped images as linearized 2D matrix
 # --------------------------------------------------------------------------------------------------
 
-m_img_crop_linearized = function(Matrix[Double] img_in, Integer w, Integer h, Integer x_offset, Integer y_offset, 
-  Integer s_cols, Integer s_rows) return (Matrix[Double] img_out) {
+m_img_crop_linearized = function(Matrix[Double] img, Integer w, Integer h, Integer offsetX, Integer offsetY,
+  Integer sW, Integer sH) return (Matrix[Double] img_out) {
   
-  orig_w = s_cols
-  orig_h = s_rows
+  orig_w = sW
+  orig_h = sH
 
-  nrows = nrow(img_in) # number of images
+  nrows = nrow(img) # number of images
 
-  start_h = (ceil((orig_h - h) / 2)) + y_offset
+  start_h = (ceil((orig_h - h) / 2)) + offsetY
   end_h = (start_h + h - 1) 
-  start_w = (ceil((orig_w - w) / 2)) + x_offset
+  start_w = (ceil((orig_w - w) / 2)) + offsetX
   end_w = (start_w + w - 1) 
 
   if((start_h < 0) | (end_h > orig_h) | (start_w < 0) | (end_w > orig_w)) {
     print("Offset out of bounds! Returning input.")
-    img_out = img_in
+    img_out = img
   }
   else {
     mask = matrix(0, rows=orig_h, cols=orig_w)
@@ -61,7 +61,7 @@ m_img_crop_linearized = function(Matrix[Double] img_in, Integer w, Integer h, In
 
     linear_mask = matrix(mask, rows=1, cols=orig_w * orig_h)
 
-    img_out = matrix(removeEmpty(target=(matrix(img_in+1, nrow(img_in), ncol(img_in))), margin="cols", select=linear_mask) - 1, nrows, w * h)
+    img_out = matrix(removeEmpty(target=(matrix(img+1, nrow(img), ncol(img))), margin="cols", select=linear_mask) - 1, nrows, w * h)
   }
 }
 
diff --git a/scripts/builtin/img_cutout.dml b/scripts/builtin/img_cutout.dml
index cd3f432cd0f..6cf8ec520a3 100644
--- a/scripts/builtin/img_cutout.dml
+++ b/scripts/builtin/img_cutout.dml
@@ -23,12 +23,12 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# img         Input image as 2D matrix with top left corner at [1, 1]
 # x           Column index of the top left corner of the rectangle (starting at 1)
 # y           Row index of the top left corner of the rectangle (starting at 1)
-# width       Width of the rectangle (must be positive)
-# height      Height of the rectangle (must be positive)
-# fill_value  The value to set for the rectangle
+# w           Width of the rectangle (must be positive)
+# h           Height of the rectangle (must be positive)
+# value       The value to set for the rectangle
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -36,23 +36,23 @@
 # img_out  Output image as 2D matrix with top left corner at [1, 1]
 # ------------------------------------------------------------------------------------------
 
-m_img_cutout = function(Matrix[Double] img_in, Integer x, Integer y, Integer width, Integer height, Double fill_value) return (Matrix[Double] img_out) {
-  rows = nrow(img_in)
-  cols = ncol(img_in)
+m_img_cutout = function(Matrix[Double] img, Integer x, Integer y, Integer w, Integer h, Double value) return (Matrix[Double] img_out) {
+  rows = nrow(img)
+  cols = ncol(img)
 
-  if (width < 1 | height < 1) {
-    print("Invalid width or height. Returning input")
-    img_out = img_in
+  if (w < 1 | h < 1) {
+    print("Invalid w or h. Returning input")
+    img_out = img
   } else {
-    end_x = x + width - 1
-    end_y = y + height - 1
+    end_x = x + w - 1
+    end_y = y + h - 1
 
     start_x = max(1, x)
     start_y = max(1, y)
     end_x = min(cols, end_x)
     end_y = min(rows, end_y)
 
-    img_out = matrix(img_in, rows=rows, cols=cols)
-    img_out[start_y:end_y, start_x:end_x] = matrix(fill_value, rows=(end_y-start_y+1), cols=(end_x-start_x+1))
+    img_out = matrix(img, rows=rows, cols=cols)
+    img_out[start_y:end_y, start_x:end_x] = matrix(value, rows=(end_y-start_y+1), cols=(end_x-start_x+1))
   }
 }
diff --git a/scripts/builtin/img_cutout_linearized.dml b/scripts/builtin/img_cutout_linearized.dml
index cb923e31ba4..83d1098dd9a 100644
--- a/scripts/builtin/img_cutout_linearized.dml
+++ b/scripts/builtin/img_cutout_linearized.dml
@@ -23,14 +23,14 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in      Input images as linearized 2D matrix with top left corner at [1, 1]
+# img         Input images as linearized 2D matrix with top left corner at [1, 1]
 # x           Column index of the top left corner of the rectangle (starting at 1)
 # y           Row index of the top left corner of the rectangle (starting at 1)
-# width       Width of the rectangle (must be positive)
-# height      Height of the rectangle (must be positive)
-# fill_value   The value to set for the rectangle
-# s_cols      Width of a single image
-# s_rows      Height of a single image
+# w           Width of the rectangle (must be positive)
+# h      Height of the rectangle (must be positive)
+# value       The value to set for the rectangle
+# sW          Width of a single image
+# sH          Height of a single image
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -38,33 +38,33 @@
 # img_out     Output images as linearized 2D matrix with top left corner at [1, 1]
 # ------------------------------------------------------------------------------------------
 
-m_img_cutout_linearized = function(Matrix[Double] img_in, Integer x, Integer y, Integer width, Integer height, 
-  Double fill_value, Integer s_cols, Integer s_rows) return (Matrix[Double] img_out) {
-  rows = nrow(img_in) 
-  cols = ncol(img_in) 
+m_img_cutout_linearized = function(Matrix[Double] img, Integer x, Integer y, Integer w, Integer h,
+  Double value, Integer sW, Integer sH) return (Matrix[Double] img_out) {
+  rows = nrow(img)
+  cols = ncol(img)
 
-  if (width < 1 | height < 1) {
-    print("Invalid width or height. Returning input")
-    img_out = img_in
+  if (w < 1 | h < 1) {
+    print("Invalid w or h. Returning input")
+    img_out = img
   } else {
 
     start_x = max(1, x)
     start_y = max(1, y)
 
-    end_x = start_x + width - 1
-    end_x = min(s_cols, end_x)
+    end_x = start_x + w - 1
+    end_x = min(sW, end_x)
 
-    end_y = start_y + height - 1
-    end_y = min(s_rows, end_y)
+    end_y = start_y + h - 1
+    end_y = min(sH, end_y)
 
-    img_out = img_in
+    img_out = img
 
     # Iterate through each row of the rectangular region
     for (i in start_y: end_y){
-      start_idx = (i-1) * s_cols + start_x
-      end_idx = (i-1) * s_cols + end_x
+      start_idx = (i-1) * sW + start_x
+      end_idx = (i-1) * sW + end_x
 
-      img_out[, start_idx:end_idx] = matrix(fill_value, rows=rows, cols=(end_x-start_x+1))
+      img_out[, start_idx:end_idx] = matrix(value, rows=rows, cols=(end_x-start_x+1))
     }
   }
 }
diff --git a/scripts/builtin/img_invert.dml b/scripts/builtin/img_invert.dml
index c52f5bed3a7..81142ca0937 100644
--- a/scripts/builtin/img_invert.dml
+++ b/scripts/builtin/img_invert.dml
@@ -23,8 +23,8 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in     Input image
-# max_value  The maximum value pixels can have
+# img        Input image
+# maxValue   The maximum value pixels can have
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -32,6 +32,6 @@
 # img_out  Output image
 # -------------------------------------------------------------------------------------------
 
-m_img_invert = function(Matrix[Double] img_in, Double max_value) return (Matrix[Double] img_out) {
-  img_out = max_value - img_in
+m_img_invert = function(Matrix[Double] img, Double maxValue) return (Matrix[Double] img_out) {
+  img_out = maxValue - img
 }
diff --git a/scripts/builtin/img_invert_linearized.dml b/scripts/builtin/img_invert_linearized.dml
index 68b245492c0..aaa6df8e16f 100644
--- a/scripts/builtin/img_invert_linearized.dml
+++ b/scripts/builtin/img_invert_linearized.dml
@@ -23,8 +23,8 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in     Input matrix/image (every row of the matrix represents a linearized image)
-# max_value  The maximum value pixels can have
+# img        Input matrix/image (every row of the matrix represents a linearized image)
+# maxValue   The maximum value pixels can have
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -32,6 +32,6 @@
 # img_out  Output images (every row of the matrix represents a linearized image)
 # -------------------------------------------------------------------------------------------
 
-m_img_invert_linearized = function(Matrix[Double] img_in, Double max_value) return (Matrix[Double] img_out) {
-  img_out = img_invert(img_in,max_value)
+m_imgvert_linearized = function(Matrix[Double] img, Double maxValue) return (Matrix[Double] img_out) {
+  img_out = img_invert(img,maxValue)
 }
diff --git a/scripts/builtin/img_mirror.dml b/scripts/builtin/img_mirror.dml
index a8836f6fd25..46111e4190c 100644
--- a/scripts/builtin/img_mirror.dml
+++ b/scripts/builtin/img_mirror.dml
@@ -24,19 +24,19 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in     Input matrix/image
-# max_value  The maximum value pixels can have
+# img         Input matrix/image
+# horizontal  Boolean, True for horizental
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------------
-# img_out  Flipped matrix/image
+# img_out     Flipped matrix/image
 # -------------------------------------------------------------------------------------------
 
-m_img_mirror = function(Matrix[Double] img_in, Boolean horizontal_axis) return (Matrix[Double] img_out) {
+m_img_mirror = function(Matrix[Double] img, Boolean horizontal) return (Matrix[Double] img_out) {
   # flip an image on the x (horizontal) or y (vertical) axis
-  if( horizontal_axis)
-    img_out = rev(img_in)
+  if( horizontal)
+    img_out = rev(img)
   else
-    img_out = t(rev(t(img_in)))
+    img_out = t(rev(t(img)))
 }
diff --git a/scripts/builtin/img_mirror_linearized.dml b/scripts/builtin/img_mirror_linearized.dml
index 08b3fe539f9..1b48f299ff7 100644
--- a/scripts/builtin/img_mirror_linearized.dml
+++ b/scripts/builtin/img_mirror_linearized.dml
@@ -24,10 +24,10 @@
 # It flips an image on the X (horizontal) or Y (vertical) axis.
 # INPUT:
 # -----------------------------------------------------------------------------------------
-# img_matrix           Input matrix/image (every row represents a linearized matrix/image)
-# horizontal_axis      flip either in X or Y axis
-# original_rows        number of rows in the original 2-D images
-# original_cols        number of cols in the original 2-D images
+# img          Input matrix/image (every row represents a linearized matrix/image)
+# horizontal   flip either in X or Y axis
+# sH           Height of a single image
+# sW           Width of a single image
 # -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -35,36 +35,36 @@
 # R            Output matrix/image  (every row represents a linearized matrix/image)
 # -----------------------------------------------------------------------------------------
 
-m_img_mirror_linearized = function(matrix[double] img_matrix, Boolean horizontal_axis,
-Integer original_rows, Integer original_cols) return (matrix[double] R) {
-  n = ncol(img_matrix);
-  R = matrix(0, rows=nrow(img_matrix), cols=n);
-  rows = original_rows;
-  cols = original_cols;
+m_img_mirror_linearized = function(matrix[double] img, Boolean horizontal,
+Integer sH, Integer sW) return (matrix[double] R) {
+  n = ncol(img);
+  R = matrix(0, rows=nrow(img), cols=n);
+  rows = sH;
+  cols = sW;
 
-  if (horizontal_axis) {
+  if (horizontal) {
     parfor (i in seq(1, (rows %/% 2) * cols, cols),check=0) {
       start = i;
       end = i + cols - 1;
       mirrorStart = (n - end) + 1;
       mirrorEnd = (n - start) + 1;
-      R[, start:end] = img_matrix[, mirrorStart:mirrorEnd];
-      R[, mirrorStart:mirrorEnd] = img_matrix[, start:end];
+      R[, start:end] = img[, mirrorStart:mirrorEnd];
+      R[, mirrorStart:mirrorEnd] = img[, start:end];
     }
     if (rows %% 2 == 1) {
       midStart = ((rows %/% 2)) * cols + 1;
       midEnd = midStart + cols - 1;
-      R[, midStart:midEnd] = img_matrix[, midStart:midEnd];
+      R[, midStart:midEnd] = img[, midStart:midEnd];
     }
   }
   else {
       offset = 1;
       while (offset <= n) {
         end = min(n, offset + cols - 1);
-        reversed_sub_matrix = matrix(0, rows=nrow(img_matrix), cols=cols);
+        reversed_sub_matrix = matrix(0, rows=nrow(img), cols=cols);
         idx = 1;
         for (j in offset:end) {
-          reversed_sub_matrix[, cols - idx + 1] = img_matrix[, j];
+          reversed_sub_matrix[, cols - idx + 1] = img[, j];
           idx = idx + 1;
         }
         R[, offset:end] = reversed_sub_matrix;
diff --git a/scripts/builtin/img_posterize.dml b/scripts/builtin/img_posterize.dml
index 91578b9c766..49493faa7eb 100644
--- a/scripts/builtin/img_posterize.dml
+++ b/scripts/builtin/img_posterize.dml
@@ -24,7 +24,7 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in  Input image
+# img     Input image
 # bits    The number of bits keep for the values.
 #         1 means black and white, 8 means every integer between 0 and 255.
 # -------------------------------------------------------------------------------------------
@@ -34,6 +34,6 @@
 # img_out  Output image
 # ---------------------------------------------------------------------------------------------
 
-m_img_posterize = function(Matrix[Double] img_in, Integer bits) return (Matrix[Double] img_out) {
-  img_out = (img_in %/% 2^(8 - bits)) * (2^(8 - bits))
+m_img_posterize = function(Matrix[Double] img, Integer bits) return (Matrix[Double] img_out) {
+  img_out = (img %/% 2^(8 - bits)) * (2^(8 - bits))
 }
diff --git a/scripts/builtin/img_posterize_linearized.dml b/scripts/builtin/img_posterize_linearized.dml
index a0edcf3ed4f..0cb285c48d7 100644
--- a/scripts/builtin/img_posterize_linearized.dml
+++ b/scripts/builtin/img_posterize_linearized.dml
@@ -24,7 +24,7 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in  Row linearized input images as 2D matrix
+# img     Row linearized input images as 2D matrix
 # bits    The number of bits keep for the values.
 #         1 means black and white, 8 means every integer between 0 and 255.
 # -------------------------------------------------------------------------------------------
@@ -34,6 +34,6 @@
 # img_out  Row linearized output images as 2D matrix
 # ---------------------------------------------------------------------------------------------
 
-m_img_posterize_linearized = function(Matrix[Double] img_in, Integer bits) return (Matrix[Double] img_out) {
-  img_out = (img_in %/% 2^(8 - bits)) * (2^(8 - bits))
+m_img_posterize_linearized = function(Matrix[Double] img, Integer bits) return (Matrix[Double] img_out) {
+  img_out = (img %/% 2^(8 - bits)) * (2^(8 - bits))
 }
diff --git a/scripts/builtin/img_rotate.dml b/scripts/builtin/img_rotate.dml
index c49826c2104..6a59b7fb81c 100644
--- a/scripts/builtin/img_rotate.dml
+++ b/scripts/builtin/img_rotate.dml
@@ -24,9 +24,9 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------------------
-# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# img         Input image as 2D matrix with top left corner at [1, 1]
 # radians     The value by which to rotate in radian.
-# fill_value  The background color revealed by the rotation
+# value       The background color revealed by the rotation
 # -----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -34,16 +34,16 @@
 # img_out   Output image as 2D matrix with top left corner at [1, 1]
 # ---------------------------------------------------------------------------------------------
 
-m_img_rotate = function(Matrix[Double] img_in, Double radians, Double fill_value) return (Matrix[Double] img_out) {
+m_img_rotate = function(Matrix[Double] img, Double radians, Double value) return (Matrix[Double] img_out) {
   # Translation matrix for moving the origin to the center of the image
   t1 = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
-  t1[1, 3] = -ncol(img_in) / 2
-  t1[2, 3] = -nrow(img_in) / 2
+  t1[1, 3] = -ncol(img) / 2
+  t1[2, 3] = -nrow(img) / 2
 
   # Translation matrix for moving the origin back to the top left corner
   t2 = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
-  t2[1, 3] = ncol(img_in) / 2
-  t2[2, 3] = nrow(img_in) / 2
+  t2[1, 3] = ncol(img) / 2
+  t2[2, 3] = nrow(img) / 2
 
   # The rotation matrix around the origin
   rot = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
@@ -58,5 +58,5 @@ m_img_rotate = function(Matrix[Double] img_in, Double radians, Double fill_value
   m = t2 %*% rot %*% t1
 
   # Transform image
-  img_out = img_transform(img_in, ncol(img_in), nrow(img_in), as.scalar(m[1,1]), as.scalar(m[1,2]), as.scalar(m[1,3]), as.scalar(m[2,1]), as.scalar(m[2,2]), as.scalar(m[2,3]), fill_value)
+  img_out = img_transform(img, ncol(img), nrow(img), as.scalar(m[1,1]), as.scalar(m[1,2]), as.scalar(m[1,3]), as.scalar(m[2,1]), as.scalar(m[2,2]), as.scalar(m[2,3]), value)
 }
diff --git a/scripts/builtin/img_rotate_linearized.dml b/scripts/builtin/img_rotate_linearized.dml
index f5ac43625d5..973fab33a7a 100644
--- a/scripts/builtin/img_rotate_linearized.dml
+++ b/scripts/builtin/img_rotate_linearized.dml
@@ -24,9 +24,9 @@
 #
 # INPUT:
 # -----------------------------------------------------------------------------------------------
-# img_in      Linearized input images as 2D matrix with top left corner at [1, 1]
-# radians     The value by which to rotate in radian.
-# fill_value   The background color revealed by the rotation
+# img      inearized input images as 2D matrix with top left corner at [1, 1]
+# radians  The value by which to rotate in radian.
+# value    The background color revealed by the rotation
 # -----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -34,16 +34,16 @@
 # img_out   Output images in linearized form as 2D matrix with top left corner at [1, 1]
 # ---------------------------------------------------------------------------------------------
 
-m_img_rotate_linearized = function(Matrix[Double] img_in, Double radians, Double fill_value, Integer s_cols, Integer s_rows) return (Matrix[Double] img_out) {
+m_img_rotate_linearized = function(Matrix[Double] img, Double radians, Double value, Integer sW, Integer sH) return (Matrix[Double] img_out) {
   # Translation matrix for moving the origin to the center of the image
   t1 = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
-  t1[1, 3] = -s_cols / 2
-  t1[2, 3] = -s_rows / 2
+  t1[1, 3] = -sW / 2
+  t1[2, 3] = -sH / 2
 
   # Translation matrix for moving the origin back to the top left corner
   t2 = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
-  t2[1, 3] = s_cols / 2
-  t2[2, 3] = s_rows / 2
+  t2[1, 3] = sW / 2
+  t2[2, 3] = sH / 2
 
   # The rotation matrix around the origin
   rot = matrix("1 0 0 0 1 0 0 0 1", rows=3, cols=3)
@@ -58,5 +58,5 @@ m_img_rotate_linearized = function(Matrix[Double] img_in, Double radians, Double
   m = t2 %*% rot %*% t1
 
   # Transform image
-  img_out = img_transform_linearized(img_in, s_cols, s_rows, as.scalar(m[1,1]), as.scalar(m[1,2]), as.scalar(m[1,3]), as.scalar(m[2,1]), as.scalar(m[2,2]), as.scalar(m[2,3]), fill_value, s_cols, s_rows)
+  img_out = img_transform_linearized(img, sW, sH, as.scalar(m[1,1]), as.scalar(m[1,2]), as.scalar(m[1,3]), as.scalar(m[2,1]), as.scalar(m[2,2]), as.scalar(m[2,3]), value, sW, sH)
 }
diff --git a/scripts/builtin/img_sample_pairing.dml b/scripts/builtin/img_sample_pairing.dml
index 99147b25550..1e10dadd39f 100644
--- a/scripts/builtin/img_sample_pairing.dml
+++ b/scripts/builtin/img_sample_pairing.dml
@@ -23,10 +23,10 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in1  First input image
-# img_in2  Second input image
+# img1     First input image
+# img2     Second input image
 # weight   The weight given to the second image.
-#          0 means only img_in1, 1 means only img_in2 will be visible
+#          0 means only img1, 1 means only img2 will be visible
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -34,11 +34,11 @@
 # img_out  Output image
 # --------------------------------------------------------------------------------------------
 
-m_img_sample_pairing = function(Matrix[Double] img_in1, Matrix[Double] img_in2, Double weight) return (Matrix[Double] img_out) {
+m_img_sample_pairing = function(Matrix[Double] img1, Matrix[Double] img2, Double weight) return (Matrix[Double] img_out) {
   if (weight < 0 | 1 < weight) {
     print("Invalid weight. Set weight to 0.5")
     weight = 0.5
   }
 
-  img_out = (1 - weight) * img_in1 + weight * img_in2
+  img_out = (1 - weight) * img1 + weight * img2
 }
diff --git a/scripts/builtin/img_sample_pairing_linearized.dml b/scripts/builtin/img_sample_pairing_linearized.dml
index f09046cc181..66d4d8e1834 100644
--- a/scripts/builtin/img_sample_pairing_linearized.dml
+++ b/scripts/builtin/img_sample_pairing_linearized.dml
@@ -23,10 +23,10 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in1  input matrix/image (every row is a linearized image)
-# img_in2  Second input image (one image represented as a single row linearized matrix)
+# img1     input matrix/image (every row is a linearized image)
+# img2     Second input image (one image represented as a single row linearized matrix)
 # weight   The weight given to the second image.
-#          0 means only img_in1, 1 means only img_in2 will be visible
+#          0 means only img1, 1 means only img2 will be visible
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -34,15 +34,15 @@
 # img_out  Output image
 # --------------------------------------------------------------------------------------------
 
-m_img_sample_pairing_linearized= function(Matrix[Double] img_in1, Matrix[Double] img_in2, Double weight) return (Matrix[Double] img_out) {
+m_img_sample_pairing_linearized= function(Matrix[Double] img1, Matrix[Double] img2, Double weight) return (Matrix[Double] img_out) {
   if (weight < 0 | 1 < weight) {
     print("Invalid weight. Set weight to 0.5")
     weight = 0.5
   }
-  num_images= nrow(img_in1)
-  img_out = matrix (0 ,rows=nrow(img_in1),cols=ncol(img_in2))
+  num_images= nrow(img1)
+  img_out = matrix (0 ,rows=nrow(img1),cols=ncol(img2))
   parfor(i in 1:num_images) {
-    img_out[i,] = (1 - weight) * img_in1[i,]+ weight * img_in2
+    img_out[i,] = (1 - weight) * img1[i,]+ weight * img2
   }
  
 }
diff --git a/scripts/builtin/img_shear.dml b/scripts/builtin/img_shear.dml
index 2cf00592a63..678822724a5 100644
--- a/scripts/builtin/img_shear.dml
+++ b/scripts/builtin/img_shear.dml
@@ -24,10 +24,10 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in      Input image as 2D matrix with top left corner at [1, 1]
-# shear_x     Shearing factor for horizontal shearing
-# shear_y     Shearing factor for vertical shearing
-# fill_value  The background color revealed by the shearing
+# img      Input image as 2D matrix with top left corner at [1, 1]
+# shearX   Shearing factor for horizontal shearing
+# shearY   Shearing factor for vertical shearing
+# value    The background color revealed by the shearing
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -35,6 +35,6 @@
 # img_out  Output image as 2D matrix with top left corner at [1, 1]
 # ------------------------------------------------------------------------------------------
 
-m_img_shear = function(Matrix[Double] img_in, Double shear_x, Double shear_y, Double fill_value) return (Matrix[Double] img_out) {
-  img_out = img_transform(img_in, ncol(img_in), nrow(img_in), 1, shear_x, 0, shear_y, 1, 0, fill_value)
+m_img_shear = function(Matrix[Double] img, Double shearX, Double shearY, Double value) return (Matrix[Double] img_out) {
+  img_out = img_transform(img, ncol(img), nrow(img), 1, shearX, 0, shearY, 1, 0, value)
 }
diff --git a/scripts/builtin/img_shear_linearized.dml b/scripts/builtin/img_shear_linearized.dml
index 79471f358b6..fa099dc75a1 100644
--- a/scripts/builtin/img_shear_linearized.dml
+++ b/scripts/builtin/img_shear_linearized.dml
@@ -24,17 +24,17 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------------
-# img_in      Linearized input images as 2D matrix with top left corner at [1, 1]
-# shear_x     Shearing factor for horizontal shearing
-# shear_y     Shearing factor for vertical shearing
-# fill_value   The background color revealed by the shearing
+# img      Linearized input images as 2D matrix with top left corner at [1, 1]
+# shearX   Shearing factor for horizontal shearing
+# shearY   Shearing factor for vertical shearing
+# value    The background color revealed by the shearing
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ------------------------------------------------------------------------------------------
-# img_out     Output images in linearized form as 2D matrix with top left corner at [1, 1]
+# img_out  Output images in linearized form as 2D matrix with top left corner at [1, 1]
 # ------------------------------------------------------------------------------------------
 
-m_img_shear_linearized = function(Matrix[Double] img_in, Double shear_x, Double shear_y, Double fill_value, Integer s_cols, Integer s_rows) return (Matrix[Double] img_out) {
-  img_out = img_transform_linearized(img_in, s_cols, s_rows, 1, shear_x, 0, shear_y, 1, 0, fill_value, s_cols, s_rows)
+m_img_shear_linearized = function(Matrix[Double] img, Double shearX, Double shearY, Double value, Integer sW, Integer sH) return (Matrix[Double] img_out) {
+  img_out = img_transform_linearized(img, sW, sH, 1, shearX, 0, shearY, 1, 0, value, sW, sH)
 }
diff --git a/scripts/builtin/img_transform.dml b/scripts/builtin/img_transform.dml
index f65e2f4a5f5..5dfd7aded83 100644
--- a/scripts/builtin/img_transform.dml
+++ b/scripts/builtin/img_transform.dml
@@ -25,11 +25,11 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in       Input image as 2D matrix with top left corner at [1, 1]
-# out_w        Width of the output image
-# out_h        Height of the output image
+# img          Input image as 2D matrix with top left corner at [1, 1]
+# w            Width of the output image
+# h            Height of the output image
 # a,b,c,d,e,f  The first two rows of the affine matrix in row-major order
-# fill_value   The background of the image
+# value        The background of the image
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -37,17 +37,17 @@
 # img_out  Output image as 2D matrix with top left corner at [1, 1]
 # ---------------------------------------------------------------------------------------
 
-m_img_transform = function(Matrix[Double] img_in, Integer out_w, Integer out_h, Double a,
-    Double b, Double c, Double d, Double e, Double f, Double fill_value) 
+m_img_transform = function(Matrix[Double] img, Integer w, Integer h, Double a,
+    Double b, Double c, Double d, Double e, Double f, Double value)
     return (Matrix[Double] img_out) {
   divisor = a * e - b * d
   if(divisor == 0) {
     print("Inverse matrix does not exist! Returning input.")
-    img_out = img_in
+    img_out = img
   }
   else {
-    orig_w = ncol(img_in)
-    orig_h = nrow(img_in)
+    orig_w = ncol(img)
+    orig_h = nrow(img)
     # inverted transformation matrix
     # inversion is necessary because we compute the sampling position of pixels in the output image
     # and not the output coordinates of input pixels
@@ -61,26 +61,26 @@ m_img_transform = function(Matrix[Double] img_in, Integer out_w, Integer out_h,
     T_inv[3, 3] = 1
 
     # coordinates of output pixel-centers linearized in row-major order
-    coords = matrix(1, rows=3, cols=out_w*out_h)
-    coords[1,] = t((seq(0, out_w*out_h-1) %% out_w) + 0.5)
-    coords[2,] = t((seq(0, out_w*out_h-1) %/% out_w) + 0.5)
+    coords = matrix(1, rows=3, cols=w*h)
+    coords[1,] = t((seq(0, w*h-1) %% w) + 0.5)
+    coords[2,] = t((seq(0, w*h-1) %/% w) + 0.5)
 
     # compute sampling pixel indices
     coords = floor(T_inv %*% coords) + 1
 
-    img_out = matrix(fill_value, rows=out_h, cols=out_w)
+    img_out = matrix(value, rows=h, cols=w)
     
     inx = t(coords[1,])
     iny = t(coords[2,])
 
-    # any out-of-range pixels, if present, correspond to an extra pixel with fill_value at the end of the input
+    # any out-of-range pixels, if present, correspond to an extra pixel with value at the end of the input
     index_vector = (orig_w *(iny-1) + inx) * ((0<inx) & (inx<=orig_w) & (0<iny) & (iny<=orig_h)) 
     index_vector = t(index_vector)
     xs = ((index_vector == 0)*(orig_w*orig_h +1)) + index_vector
 
-    y = matrix(img_in, 1, orig_w*orig_h)
+    y = matrix(img, 1, orig_w*orig_h)
     if(min(index_vector) == 0){
-      ys= cbind(y, matrix(fill_value,1, 1))
+      ys= cbind(y, matrix(value,1, 1))
     }else{
       ys = y
     }
@@ -89,7 +89,7 @@ m_img_transform = function(Matrix[Double] img_in, Integer out_w, Integer out_h,
     z = table(xs, ind) 
     output = ys%*%z
 
-    img_out = matrix(output, rows=out_h, cols=out_w)
+    img_out = matrix(output, rows=h, cols=w)
   }
   
 }
diff --git a/scripts/builtin/img_transform_linearized.dml b/scripts/builtin/img_transform_linearized.dml
index 06867d61b2e..4b55813e4eb 100644
--- a/scripts/builtin/img_transform_linearized.dml
+++ b/scripts/builtin/img_transform_linearized.dml
@@ -25,11 +25,11 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------------
-# img_in       Linearized input images as 2D matrix with top left corner at [1, 1]
-# out_w        Width of the output matrix
-# out_h        Height of the output matrix
+# img          Linearized input images as 2D matrix with top left corner at [1, 1]
+# w            Width of the output matrix
+# h            Height of the output matrix
 # a,b,c,d,e,f  The first two rows of the affine matrix in row-major order
-# fill_value    The background of an image
+# value        The background of an image
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -37,17 +37,17 @@
 # img_out  Output images in linearized form as 2D matrix with top left corner at [1, 1]
 # ---------------------------------------------------------------------------------------
 
-m_img_transform_linearized = function(Matrix[Double] img_in, Integer out_w, Integer out_h, Double a, Double b, Double c, Double d,
- Double e, Double f, Double fill_value, Integer s_cols, Integer s_rows) return (Matrix[Double] img_out) {
-    # size of a single image is s_cols : s_rows
+m_img_transform_linearized = function(Matrix[Double] img, Integer w, Integer h, Double a, Double b, Double c, Double d,
+ Double e, Double f, Double value, Integer sW, Integer sH) return (Matrix[Double] img_out) {
+    # size of a single image is sW : sH
   divisor = a * e - b * d
   if(divisor == 0) {
     print("Inverse matrix does not exist! Returning input.")
-    img_out = img_in
+    img_out = img
   }
   else {
-    orig_w = s_cols
-    orig_h = s_rows
+    orig_w = sW
+    orig_h = sH
     # inverted transformation matrix
     # inversion is necessary because we compute the sampling position of pixels in the output image
     # and not the output coordinates of input pixels
@@ -61,9 +61,9 @@ m_img_transform_linearized = function(Matrix[Double] img_in, Integer out_w, Inte
     T_inv[3, 3] = 1
 
     # coordinates of output pixel-centers linearized in row-major order
-    coords = matrix(1, rows=3, cols=out_w*out_h)
-    coords[1,] = t((seq(0, out_w*out_h-1) %% out_w) + 0.5)
-    coords[2,] = t((seq(0, out_w*out_h-1) %/% out_w) + 0.5)
+    coords = matrix(1, rows=3, cols=w*h)
+    coords[1,] = t((seq(0, w*h-1) %% w) + 0.5)
+    coords[2,] = t((seq(0, w*h-1) %/% w) + 0.5)
 
     # compute sampling pixel indices
     coords = floor(T_inv %*% coords) + 1
@@ -71,21 +71,21 @@ m_img_transform_linearized = function(Matrix[Double] img_in, Integer out_w, Inte
     inx = t(coords[1,])
     iny = t(coords[2,])
 
-    # any out-of-range pixels, if present, correspond to an extra pixel with fill_value at the end of the input
+    # any out-of-range pixels, if present, correspond to an extra pixel with value at the end of the input
     index_vector = (orig_w *(iny-1) + inx) * ((0<inx) & (inx<=orig_w) & (0<iny) & (iny<=orig_h)) 
     index_vector = t(index_vector)
     xs = ((index_vector == 0)*(orig_w*orig_h +1)) + index_vector
     
     if(min(index_vector) == 0){
-      ys=cbind(img_in, matrix(fill_value,nrow(img_in), 1))
+      ys=cbind(img, matrix(value,nrow(img), 1))
     }else{
-      ys = img_in
+      ys = img
     }
 
     ind= matrix(seq(1,ncol(xs),1),1,ncol(xs))
     z = table(xs, ind) 
     output = ys%*%z
 
-    img_out = matrix(output, rows=nrow(img_in), cols=out_w*out_h)
+    img_out = matrix(output, rows=nrow(img), cols=w*h)
   }
 }
diff --git a/scripts/builtin/img_translate.dml b/scripts/builtin/img_translate.dml
index 9bf2664d33c..daf8ae11068 100644
--- a/scripts/builtin/img_translate.dml
+++ b/scripts/builtin/img_translate.dml
@@ -25,12 +25,12 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------------
-# img_in      Input image as 2D matrix with top left corner at [1, 1]
-# offset_x    The distance to move the image in x direction
-# offset_y    The distance to move the image in y direction
-# out_w       Width of the output image
-# out_h       Height of the output image
-# fill_value  The background of the image
+# img      Input image as 2D matrix with top left corner at [1, 1]
+# offsetX  The distance to move the image in x direction
+# offsetY  The distance to move the image in y direction
+# w        Width of the output image
+# h        Height of the output image
+# value    The background of the image
 # ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -38,20 +38,20 @@
 # img_out  Output image as 2D matrix with top left corner at [1, 1]
 # --------------------------------------------------------------------------------------------
 
-m_img_translate = function(Matrix[Double] img_in, Double offset_x, Double offset_y, Integer out_w, Integer out_h, Double fill_value)
+m_img_translate = function(Matrix[Double] img, Double offsetX, Double offsetY, Integer w, Integer h, Double value)
  return (Matrix[Double] img_out) {
-  w = ncol(img_in)
-  h = nrow(img_in)
+  w_in = ncol(img)
+  h_in = nrow(img)
 
   # round to nearest integer as fraction is irrelevant for nearest neighbor sampling
-  offset_x = round(offset_x)
-  offset_y = round(offset_y)
+  offset_x = round(offsetX)
+  offset_y = round(offsetY)
 
   # index range in input image
   start_x = 1 - offset_x
   start_y = 1 - offset_y
-  end_x = max(w , out_w) - offset_x
-  end_y = max(h, out_h) - offset_y
+  end_x = max(w_in , w) - offset_x
+  end_y = max(h_in, h) - offset_y
 
   # clip copy region to left and top of input image
   if (start_x < 1)
@@ -60,19 +60,19 @@ m_img_translate = function(Matrix[Double] img_in, Double offset_x, Double offset
     start_y = 1
 
   # clip copy region to right and bottom of input image
-  if (w < end_x)
-    end_x = w
-  if (h < end_y)
-    end_y = h
+  if (w_in < end_x)
+    end_x = w_in
+  if (h_in < end_y)
+    end_y = h_in
 
   # clip copy region to right and bottom of output image
-  if (out_w < end_x + offset_x)
-    end_x = out_w - offset_x
-  if (out_h < end_y + offset_y)
-    end_y = out_h - offset_y
+  if (w < end_x + offset_x)
+    end_x = w - offset_x
+  if (h < end_y + offset_y)
+    end_y = h - offset_y
 
-  img_out = matrix(fill_value, rows=out_h, cols=out_w)
+  img_out = matrix(value, rows=h, cols=w)
   if (start_x < end_x & start_y < end_y) {
-    img_out[(start_y+offset_y):(end_y+offset_y), (start_x+offset_x):(end_x+offset_x)] = img_in[start_y:end_y, start_x:end_x]
+    img_out[(start_y+offset_y):(end_y+offset_y), (start_x+offset_x):(end_x+offset_x)] = img[start_y:end_y, start_x:end_x]
   }
 }
diff --git a/scripts/builtin/img_translate_linearized.dml b/scripts/builtin/img_translate_linearized.dml
index c2c898d21c9..866631bbec8 100644
--- a/scripts/builtin/img_translate_linearized.dml
+++ b/scripts/builtin/img_translate_linearized.dml
@@ -24,37 +24,37 @@
 # It translates the image and Optionally resizes the image (without scaling).
 # INPUT:
 # ----------------------------------------------------------------------------------------------
-# img_in                Input matrix/image (every row represents a linearized matrix/image)
-# offset_x              The distance to move the image in x direction
-# offset_y              The distance to move the image in y direction
-# out_w                 Width of the output image
-# out_h                 Height of the output image
-# fill_value            The background of the image
-# o_w                   Width of the original 2D images
-# o_h                   Height of the original 2D images
+# img       Input matrix/image (every row represents a linearized matrix/image)
+# offsetX   The distance to move the image in x direction
+# offsetY   The distance to move the image in y direction
+# w         Width of the output image
+# h         Height of the output image
+# value     The background of the image
+# sW        Width of the original 2D images
+# sH        Height of the original 2D images
 # --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # --------------------------------------------------------------------------------------------
-# img_out               Output matrix/image  (every row represents a linearized matrix/image)
+# img_out   Output matrix/image  (every row represents a linearized matrix/image)
 # --------------------------------------------------------------------------------------------
 
-m_img_translate_linearized = function(Matrix[Double] img_in, Double offset_x, 
-Double offset_y,Integer out_w, Integer out_h, Double fill_value, Integer o_w, Integer o_h) 
+m_img_translate_linearized = function(Matrix[Double] img, Double offsetX,
+Double offsetY,Integer w, Integer h, Double value, Integer sW, Integer sH)
 return (Matrix[Double] img_out) {
-  offset_x = round(offset_x)
-  offset_y = round(offset_y)
-  img_out = matrix(fill_value, rows=nrow(img_in), cols=out_w * out_h)
-       parfor(y in 1:out_h,check=0) {
-         for (x in 1:out_w) {
-        src_x = x - offset_x
-        src_y = y - offset_y
-        if (src_x >= 1 & src_x <= o_w & src_y >= 1 & src_y <= o_h) {
-          start_index = (src_y - 1) * o_w + src_x
-          dest_index = (y - 1) * out_w + x
+  offsetX = round(offsetX)
+  offsetY = round(offsetY)
+  img_out = matrix(value, rows=nrow(img), cols=w * h)
+       parfor(y in 1:h,check=0) {
+         for (x in 1:w) {
+        src_x = x - offsetX
+        src_y = y - offsetY
+        if (src_x >= 1 & src_x <= sW & src_y >= 1 & src_y <= sH) {
+          start_index = (src_y - 1) * sW + src_x
+          dest_index = (y - 1) * w + x
     
-          if (start_index >= 1 & start_index <= ncol(img_in) & dest_index >= 1 & dest_index <= ncol(img_out)) {
-            img_out[, dest_index] = img_in[, start_index]
+          if (start_index >= 1 & start_index <= ncol(img) & dest_index >= 1 & dest_index <= ncol(img_out)) {
+            img_out[, dest_index] = img[, start_index]
           }  
         }
       }
diff --git a/scripts/builtin/impurityMeasures.dml b/scripts/builtin/impurityMeasures.dml
index d62bdd04f5f..d965931b655 100644
--- a/scripts/builtin/impurityMeasures.dml
+++ b/scripts/builtin/impurityMeasures.dml
@@ -28,7 +28,7 @@
 # Y        Target vector containing 0 and 1 values.
 # R        Vector indicating whether a feature is categorical or continuous.
 #          1 denotes a continuous feature, 2 denotes a categorical feature.
-# n_bins   Number of bins for binning in case of scale features.
+# n        Number of bins for binning in case of scale features.
 # method   String indicating the method to use; either "entropy" or "gini".
 # --------------------------------------------------------------------------
 #
@@ -44,7 +44,7 @@
 #        the better the split.
 # ------------------------------------------------------------------------
 
-m_impurityMeasures = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] R, Integer n_bins = 20, String method)
+m_impurityMeasures = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] R, Integer n = 20, String method)
   return (Matrix[Double] IM)
 {
   if (method != "entropy" & method != "gini") {
@@ -55,8 +55,8 @@ m_impurityMeasures = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double]
 
   parfor (i in 1:ncol(X)) {
     if (as.scalar(R[,i]) == 1) {
-      binned_feature = applyBinning(X[,i], n_bins)
-      IM[,i] = getImpurityMeasure(binned_feature, Y, n_bins, method)
+      binned_feature = applyBinning(X[,i], n)
+      IM[,i] = getImpurityMeasure(binned_feature, Y, n, method)
     } else {
       IM[,i] = getImpurityMeasure(X[,i], Y, max(X[,i]), method)
     }
diff --git a/scripts/builtin/imputeByFDApply.dml b/scripts/builtin/imputeByFDApply.dml
index 3841cecf068..283d34eb03f 100644
--- a/scripts/builtin/imputeByFDApply.dml
+++ b/scripts/builtin/imputeByFDApply.dml
@@ -23,23 +23,21 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X          Matrix X
-# source     source attribute to use for imputation and error correction
-# target     attribute to be fixed
-# threshold  threshold value in interval [0, 1] for robust FDs
+# X           Matrix X
+# imputedVec  source attribute to use for imputation and error correction
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------------------------
-# X     Matrix with possible imputations
+# X           imputed dataset
 # ---------------------------------------------------------------------------------
 
-m_imputeByFDApply = function(Matrix[Double] X, Matrix[Double] Y_imp)
+m_imputeByFDApply = function(Matrix[Double] X, Matrix[Double] imputedVec)
   return(Matrix[Double] imputed_Y)
 {
   X = replace(target = X, pattern=NaN, replacement=1)
   X = replace(target = X, pattern=0, replacement=1)
-  imputed_Y = table(seq(1,nrow(X)), X, 1, nrow(X), nrow(Y_imp)) %*% Y_imp;
+  imputed_Y = table(seq(1,nrow(X)), X, 1, nrow(X), nrow(imputedVec)) %*% imputedVec;
 
   if(sum(imputed_Y) == 0)
     imputed_Y = imputed_Y + NaN
diff --git a/scripts/builtin/imputeByKNN.dml b/scripts/builtin/imputeByKNN.dml
index 13136ff2c9a..2d141ea721f 100644
--- a/scripts/builtin/imputeByKNN.dml
+++ b/scripts/builtin/imputeByKNN.dml
@@ -41,7 +41,7 @@
 #                               with M<<N and S<<N, but suboptimal imputation.
 # seed        Root seed value for random/sample calls for deterministic behavior
 #             -1 for true randomization
-# sample_frac Sample fraction for 'dist_sample' (value between 0 and 1)
+# sampleFrac  Sample fraction for 'dist_sample' (value between 0 and 1)
 # ------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -49,7 +49,7 @@
 # result     Imputed dataset
 # ------------------------------------------------------------------------------
 
-m_imputeByKNN = function(Matrix[Double] X, String method="dist", Int seed=-1, Double sample_frac=0.1)
+m_imputeByKNN = function(Matrix[Double] X, String method="dist", Int seed=-1, Double sampleFrac=0.1)
   return(Matrix[Double] result)
 {
   #KNN-Imputation Script
@@ -82,7 +82,7 @@ m_imputeByKNN = function(Matrix[Double] X, String method="dist", Int seed=-1, Do
   }
   else if(method == "dist_sample"){
     #assuming large missing values
-    imputedValue = compute_missing_values(X, filled_matrix, seed, sample_frac)
+    imputedValue = compute_missing_values(X, filled_matrix, seed, sampleFrac)
   }
   else {
     stop("Method is unknown or not yet implemented")
@@ -93,20 +93,20 @@ m_imputeByKNN = function(Matrix[Double] X, String method="dist", Int seed=-1, Do
   result = result + (imputedValue * is.nan(X))
 }
 
-compute_missing_values = function (Matrix[Double] X, Matrix[Double] filled_matrix, Int seed, Double sample_frac)
+compute_missing_values = function (Matrix[Double] X, Matrix[Double] filledMatrix, Int seed, Double sampleFrac)
     return (Matrix[Double] imputedValue)
 {
     #Split the matrix into containing NaN values (missing records) and not containing NaN values (M2 records)
     maskNAN = is.nan(X)
     I = rowSums(maskNAN) != 0
-    missing = removeEmpty(target=filled_matrix, margin="rows", select=I)
+    missing = removeEmpty(target=filledMatrix, margin="rows", select=I)
 
     Y = (rowSums(maskNAN)==0)
     M2 = removeEmpty(target=X, margin = "rows", select = Y)
 
-    if (sample_frac != 1.0) {
-        #Create permutation matrix for sampling sample_frac*nrow(X) rows
-        I = rand(rows=nrow(M2), cols=1, seed=seed) <= sample_frac;
+    if (sampleFrac != 1.0) {
+        #Create permutation matrix for sampling sampleFrac*nrow(X) rows
+        I = rand(rows=nrow(M2), cols=1, seed=seed) <= sampleFrac;
         M2 = removeEmpty(target=M2, margin="rows", select=I);
     }
 
diff --git a/scripts/builtin/km.dml b/scripts/builtin/km.dml
index b4c2afc7c22..1ec22d0e9c6 100644
--- a/scripts/builtin/km.dml
+++ b/scripts/builtin/km.dml
@@ -32,12 +32,12 @@
 # SI         Column indices of X corresponding to the factors to be used for stratifying
 # alpha      Parameter to compute 100*(1-alpha)% confidence intervals for the survivor
 #            function and its median
-# err_type   "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
-# conf_type  Parameter to modify the confidence interval; "plain" keeps the lower and
+# err        "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
+# conf       Parameter to modify the confidence interval; "plain" keeps the lower and
 #            upper bound of the confidence interval unmodified, "log" (the default)
 #            corresponds to logistic transformation and "log-log" corresponds to the
 #            complementary log-log transformation
-# test_type  If survival data for multiple groups is available specifies which test to
+# test       If survival data for multiple groups is available specifies which test to
 #            perform for comparing survival data across multiple groups: "none" (the default)
 #            "log-rank" or "wilcoxon" test
 # ------------------------------------------------------------------------------------------
@@ -84,7 +84,7 @@
 # --------------------------------------------------------------------------------------------
 
 m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[Double] SI,
-    Double alpha = 0.05, String err_type = "greenwood", String conf_type = "log", String test_type = "none")
+    Double alpha = 0.05, String err = "greenwood", String conf = "log", String test = "none")
   return (Matrix[Double] O, Matrix[Double] M, Matrix[Double] T, Matrix[Double] T_GROUPS_OE)
 {
   if (ncol(GI) != 0 & nrow(GI) != 0)
@@ -102,14 +102,14 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
   SI = t(SI);
 
   # check arguments for validity
-  if (err_type != "greenwood" & err_type != "peto")
-    stop (err_type + " is not a valid error type!");
+  if (err != "greenwood" & err != "peto")
+    stop (err + " is not a valid error type!");
 
-  if (conf_type != "plain" & conf_type != "log" & conf_type != "log-log")
-    stop (conf_type + " is not a valid confidence type!");
+  if (conf != "plain" & conf != "log" & conf != "log-log")
+    stop (conf + " is not a valid confidence type!");
 
-  if (test_type != "log-rank" & test_type != "wilcoxon" & test_type != "none")
-    stop (test_type + " is not a valid test type!");
+  if (test != "log-rank" & test != "wilcoxon" & test != "none")
+    stop (test + " is not a valid test type!");
 
   n_group_cols = ncol (GI);
   n_stratum_cols = ncol (SI);
@@ -233,7 +233,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
   M_cols = seq (1, num_groups * num_strata);
   z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
 
-  if (num_groups > 1 & test_type != "none") {
+  if (num_groups > 1 & test != "none") {
     str = "";
     TEST = matrix (0, rows = num_groups, cols = 5);
     TEST_GROUPS_OE = matrix (0, rows = 1, cols = 4);
@@ -243,7 +243,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
     EXP = matrix (0, rows = num_groups, cols = num_strata);
     V_sum_total = matrix (0, rows = num_groups-1, cols = (num_groups-1) * num_strata);
     n_event_all_global = matrix(1, rows=num_groups, cols=num_strata);
-  } else if (num_groups == 1 & test_type != "none") {
+  } else if (num_groups == 1 & test != "none") {
     stop ("Data contains only one group or no groups, at least two groups are required for test!");
   }
 
@@ -299,7 +299,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
 
     n_risk_stratum = range - n_event_all_stratum_agg; # no. at risk per stratum
 
-    if (num_groups > 1 & test_type != "none") {	# needed for log-rank or wilcoxon test
+    if (num_groups > 1 & test != "none") {	# needed for log-rank or wilcoxon test
       n_risk_n_event_stratum = matrix (0, rows = n_time_all1, cols = num_groups * 2);
     }
   }
@@ -354,7 +354,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
     }
     n_risk_stratum = range - n_event_all_stratum_agg; # no. at risk per stratum
 
-    if (num_groups > 1 & test_type != "none") {	# needed for log-rank or wilcoxon test
+    if (num_groups > 1 & test != "none") {	# needed for log-rank or wilcoxon test
       n_risk_n_event_stratum = matrix (0, rows = n_time_all1, cols = num_groups * 2);
     }
 
@@ -389,7 +389,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
         }
 
         n_risk = sum (group_ind) - n_event_all_agg; # no. at risk per stratum per group
-        if (num_groups > 1 & test_type != "none") {
+        if (num_groups > 1 & test != "none") {
           n_risk_n_event_stratum[,(g - 1) * 2 + 1] = n_risk;
           n_risk_n_event_stratum[,(g - 1) * 2 + 2] = n_event;
         }
@@ -408,20 +408,20 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
         surv = cumprod ((n_risk - n_event) / n_risk);
         tmp = n_event / (n_risk * (n_risk - n_event));
         se_surv = sqrt (cumsum (tmp)) * surv;
-        if (err_type == "peto") {
+        if (err == "peto") {
           se_surv = (surv * sqrt(1 - surv) / sqrt(n_risk));
         }
 
-        if (conf_type == "plain") {
+        if (conf == "plain") {
           # True survivor function is in [surv +- z_alpha_2 * se_surv],
           # values less than 0 are replaced by 0, values larger than 1are replaced by 1!
           CI_l = max (surv - (z_alpha_2 * se_surv), 0);
           CI_r = min (surv + (z_alpha_2 * se_surv), 1);
-        } else if (conf_type == "log") {
+        } else if (conf == "log") {
           # True survivor function is in [surv * exp(+- z_alpha_2 * se_surv / surv)]
           CI_l = max (surv * exp (- z_alpha_2 * se_surv / surv), 0);
           CI_r = min (surv * exp ( z_alpha_2 * se_surv / surv), 1);
-        } else { # conf_type == "log-log"
+        } else { # conf == "log-log"
           # True survivor function is in [surv ^ exp(+- z_alpha_2 * se(log(-log(surv))))]
           CI_l = max (surv ^ exp (- z_alpha_2 * se_surv / log(surv)), 0);
           CI_r = min (surv ^ exp ( z_alpha_2 * se_surv / log(surv)), 1);
@@ -495,7 +495,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
            }
 
            M[M_offset,3] = t_5;
-           if (test_type != "none") {
+           if (test != "none") {
              n_event_all_global[g,s] = n_event_sum_all;
            }
          }
@@ -507,16 +507,16 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
     }
 
     ######## COMPARISON BETWEEN DIFFERENT GROUPS USING LOG-RANK OR WILCOXON TEST
-    if (num_groups > 1 & test_type != "none") {
+    if (num_groups > 1 & test != "none") {
       V = matrix (0, rows = num_groups-1, cols = num_groups-1);
       parfor (g in 0:(num_groups-1), check = 0) {
         n_risk = n_risk_n_event_stratum[,g * 2 + 1];
         n_event = n_risk_n_event_stratum[,g * 2 + 2];
 
-        if (test_type == "log-rank") {
+        if (test == "log-rank") {
           O = n_event;
           E = n_risk * n_event_stratum / n_risk_stratum;
-        } else { ### test_type == "wilcoxon"
+        } else { ### test == "wilcoxon"
           O = n_risk_stratum * n_event / range;
           E = n_risk * n_event_stratum / range;
         }
@@ -537,12 +537,12 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
           if (i1 == i2) {
             I_i1i2 = 1;
           }
-          if (test_type == "log-rank") {
+          if (test == "log-rank") {
             V1 = n_risk * n_event_stratum * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
             V1 = replace (target = V1, pattern = NaN, replacement = 0);
             V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
             V[(i1 + 1),(i2 + 1)] = sum (V1 * V2);
-          } else { ### test_type == "wilcoxon"
+          } else { ### test == "wilcoxon"
             V1 = (n_risk_stratum ^ 2) * (n_risk * n_event_stratum) * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
             V1 = replace (target = V1, pattern = NaN, replacement = 0);
             V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
@@ -555,7 +555,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
     }
   }
 
-  if (num_groups > 1 & test_type != "none") {
+  if (num_groups > 1 & test != "none") {
     V_sum = matrix (0, rows = num_groups-1, cols = num_groups-1);
     for (s in 1:num_strata) {
       V_start_ind = (s - 1) * (num_groups - 1) + 1;
@@ -567,7 +567,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
 
     test_st = as.scalar (t(U_sum[1:(num_groups-1),1]) %*% inv(V_sum) %*% U_sum[1:(num_groups-1),1]);
     p_val = 1 - cdf (target = test_st, dist = "chisq", df = num_groups-1 );
-    if (test_type != "none") {
+    if (test != "none") {
       U_OE_sum = rowSums(U_OE);
       V_OE =rowSums((U*U) /sum(V_sum));
       TEST_GROUPS_OE[1,1] = num_groups;
@@ -579,7 +579,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
       TEST[,3] = rowSums(EXP);
       TEST[,4] = rowSums(U_OE_sum);
       TEST[,5] = rowSums(V_OE);
-      str = append (str, test_type + " test for " + num_groups + " groups: Chi-squared = " + test_st + " on " + (num_groups - 1) + " df, p = " + p_val + " ");
+      str = append (str, test + " test for " + num_groups + " groups: Chi-squared = " + test_st + " on " + (num_groups - 1) + " df, p = " + p_val + " ");
     }
   }
 
@@ -608,12 +608,12 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
   # pull out non-empty rows from TEST
   if (n_group_cols > 0 & n_stratum_cols > 0) {
     M = cbind (G_cols, S_cols, M);
-    if (test_type != "none") {
+    if (test != "none") {
       TEST = cbind (G_cols_original, TEST);
     }
   } else if (n_group_cols > 0) {
     M = cbind (G_cols, M);
-    if (test_type != "none") {
+    if (test != "none") {
       TEST = cbind (G_cols_original, TEST);
     }
   } else if (n_stratum_cols > 0) {
@@ -626,7 +626,7 @@ m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[D
   KM = removeEmpty (target = KM, margin = "rows");
   KM = KM[1:(nrow (KM) - 1),];
 
-  if (test_type != "none") {
+  if (test != "none") {
     if (num_groups > 1) {
       T = TEST;
       T_GROUPS_OE = TEST_GROUPS_OE;
diff --git a/scripts/builtin/kmeans.dml b/scripts/builtin/kmeans.dml
index 8b76040f641..122218c3352 100644
--- a/scripts/builtin/kmeans.dml
+++ b/scripts/builtin/kmeans.dml
@@ -26,10 +26,10 @@
 # X                             The input Matrix to do KMeans on.
 # k                             Number of centroids
 # runs                          Number of runs (with different initial centroids)
-# max_iter                      Maximum number of iterations per run
-# eps                           Tolerance (epsilon) for WCSS change ratio
-# is_verbose                    do not print per-iteration stats
-# avg_sample_size_per_centroid  Average number of records per centroid in data samples
+# maxIter                       Maximum number of iterations per run
+# tol                           Tolerance (epsilon) for WCSS change ratio
+# verbose                       do not print per-iteration stats
+# avgSampleSizePerCentroid      Average number of records per centroid in data samples
 # seed                          The seed used for initial sampling. If set to -1
 #                               random seeds are selected.
 # ---------------------------------------------------------------------------------------
@@ -40,12 +40,12 @@
 # C     The output matrix with the centroids
 # ---------------------------------------------------------------
 
-m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer max_iter = 1000,
-    Double eps = 0.000001, Boolean is_verbose = FALSE, Integer avg_sample_size_per_centroid = 50,
+m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer maxIter = 1000,
+    Double tol = 0.000001, Boolean verbose = FALSE, Integer avgSampleSizePerCentroid = 50,
     Integer seed = -1)
   return (Matrix[Double] C, Matrix[Double] Y)
 {
-  if( is_verbose )
+  if( verbose )
     print ("BEGIN K-MEANS SCRIPT");
   
   num_records   = nrow (X);
@@ -53,24 +53,24 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
   num_centroids = k;
   num_runs = runs;
 
-  if(is_verbose)
+  if(verbose)
     print("dim X=" + nrow(X) + "x" + ncol(X))
 
   sumXsq = sum (X ^ 2);
 
   # STEP 1: INITIALIZE CENTROIDS FOR ALL RUNS FROM DATA SAMPLES:
 
-  if( is_verbose )
+  if( verbose )
     print ("Taking data samples for initialization...");
 
   [sample_maps, samples_vs_runs_map, sample_block_size] = get_sample_maps(
-    num_records, num_runs, num_centroids * avg_sample_size_per_centroid, seed);
+    num_records, num_runs, num_centroids * avgSampleSizePerCentroid, seed);
 
   is_row_in_samples = rowSums (sample_maps);
   X_samples = sample_maps %*% X;
   X_samples_sq_norms = rowSums (X_samples ^ 2);
 
-  if( is_verbose )
+  if( verbose )
     print ("Initializing the centroids for all runs...");
   
   All_Centroids = matrix (0, num_runs * num_centroids, num_features);
@@ -116,7 +116,7 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
   final_wcss = matrix (0, rows = num_runs, cols = 1);
   num_iterations = matrix (0, rows = num_runs, cols = 1);
 
-  if( is_verbose )
+  if( verbose )
     print ("Performing k-means iterations for all runs...");
 
   parfor (run_index in 1 : num_runs, check = 0)
@@ -136,7 +136,7 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
       # Compute the current centroid-based within-cluster sum of squares (WCSS)
       wcss_old = wcss;
       wcss = sumXsq + sum (minD);
-      if( is_verbose ) {
+      if( verbose ) {
         if (iter_count == 0)
           print ("Run " + run_index + ", At Start-Up:  Centroid WCSS = " + wcss);
         else
@@ -156,9 +156,9 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
 
       # Check if convergence or maximum iteration has been reached
       iter_count = iter_count + 1
-      if(wcss_old - wcss < eps * wcss)
+      if(wcss_old - wcss < tol * wcss)
         term_code = 1; # Convergence reached
-      else if(iter_count >= max_iter)
+      else if(iter_count >= maxIter)
         term_code = 2; # Max iteration reached
       else if(sum (P_denom <= 0) > 0)
         term_code = 3; # "Runaway" centroid (0.0 denom)
@@ -166,7 +166,7 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
         C_old = C; C = C_new;
     }
 
-    if(is_verbose)
+    if(verbose)
       print ("Run " + run_index + ", Iteration " + iter_count + ":  Terminated with code = "
         + term_code + ",  Centroid WCSS = " + wcss);
 
@@ -183,7 +183,7 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
   termination_bitmap [, 1 : ncol(termination_bitmap_raw)] = termination_bitmap_raw;
   termination_stats = colSums (termination_bitmap);
 
-  if(is_verbose){
+  if(verbose){
 
     print ("Number of successful runs = " + as.integer (as.scalar (termination_stats [1, 1])));
     print ("Number of incomplete runs = " + as.integer (as.scalar (termination_stats [1, 2])));
@@ -202,7 +202,7 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
     aggr_best_index_vector = cumsum (best_index_vector);
     best_index = as.integer (sum (aggr_best_index_vector == 0) + 1);
 
-    if(is_verbose)
+    if(verbose)
       print ("Successful runs:  Best run is " + best_index + " with Centroid WCSS = " + best_wcss
              + ";  Avg WCSS = " + avg_wcss + ";  Worst WCSS = " + worst_wcss);
 
@@ -212,11 +212,11 @@ m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer
     aggr_P = t(cumsum (t(P)));
     Y = rowSums (aggr_P == 0) + 1
 
-    if(is_verbose)
+    if(verbose)
       print("dim C=" + nrow(C) + "x" + ncol(C) + ", dim Y=" + nrow(Y) + "x" + ncol(Y))
   }
   else{
-    print ("K-means: No output is produced. Try increasing the number of iterations and/or lower eps.");
+    print ("K-means: No output is produced. Try increasing the number of iterations and/or lower tolerance.");
     C = matrix(0, num_centroids,  num_records)
     Y = matrix(-1, 1, num_records)
   }
diff --git a/scripts/builtin/knn.dml b/scripts/builtin/knn.dml
index 19d4cfffb0e..e6790ac125a 100644
--- a/scripts/builtin/knn.dml
+++ b/scripts/builtin/knn.dml
@@ -26,23 +26,23 @@
 # Train               The input matrix as features
 # Test                The input matrix for nearest neighbor search
 # CL                  The input matrix as target
-# CL_T                The target type of matrix CL whether
+# ctypes              The target type of matrix CL whether
 #                     columns in CL are continuous ( =1 ) or
 #                     categorical ( =2 ) or not specified ( =0 )
-# trans_continuous    Option flag for continuous feature transformed to [-1,1]:
+# transCont           Option flag for continuous feature transformed to [-1,1]:
 #                     FALSE = do not transform continuous variable;
 #                     TRUE = transform continuous variable;
-# k_value             k value for KNN, ignore if select_k enable
-# select_k            Use k selection algorithm to estimate k (TRUE means yes)
-# k_min               Min k value(  available if select_k = 1 )
-# k_max               Max k value(  available if select_k = 1 )
-# select_feature      Use feature selection algorithm to select feature (TRUE means yes)
-# feature_max         Max feature selection
-# interval            Interval value for K selecting (  available if select_k = 1 )
-# feature_importance  Use feature importance algorithm to estimate each feature
+# k                   k value for KNN, ignore if selectK enable
+# selectK             Use k selection algorithm to estimate k (TRUE means yes)
+# minK                Min k value(  available if selectK = 1 )
+# maxK                Max k value(  available if selectK = 1 )
+# selectFeature       Use feature selection algorithm to select feature (TRUE means yes)
+# maxFeatures         Max feature selection
+# interval            Interval value for K selecting (  available if selectK = 1 )
+# featureImportance   Use feature importance algorithm to estimate each feature
 #                     (TRUE means yes)
-# predict_con_tg      Continuous  target predict function: mean(=0) or median(=1)
-# START_SELECTED      feature selection initial value
+# predictCont         Continuous target predict function: mean(=0) or median(=1)
+# initSelectFeature   initial feature selection matrix
 # ---------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -56,18 +56,18 @@ m_knn = function(
     Matrix[Double] Train,
     Matrix[Double] Test,
     Matrix[Double] CL,
-    Integer CL_T = 0,
-    Integer trans_continuous = 0,
-    Integer k_value = 5,
-    Integer select_k = 0,
-    Integer k_min = 1,
-    Integer k_max = 100,
-    Integer select_feature = 0,
-    Integer feature_max = 10,
+    Integer ctypes = 0,
+    Integer transCont = 0,
+    Integer k = 5,
+    Integer selectK = 0,
+    Integer minK = 1,
+    Integer maxK = 100,
+    Integer selectFeature = 0,
+    Integer maxFeatures = 10,
     Integer interval = 1000,
-    Integer feature_importance = 0,
-    Integer predict_con_tg = 0,
-    Matrix[Double] START_SELECTED = matrix(0, 0, 0)
+    Integer featureImportance = 0,
+    Integer predictCont = 0,
+    Matrix[Double] initSelectFeature = matrix(0, 0, 0)
 )return(
     Matrix[Double] NNR_matrix,
     Matrix[Double] CL_matrix,
@@ -77,7 +77,7 @@ m_knn = function(
   m_feature_importance = matrix(0, 0, 0);
 
   #data prepare
-  if( trans_continuous == 1 ){
+  if( transCont == 1 ){
     Train = prepareKNNData( Train);
     Test  = prepareKNNData( Test);
   }
@@ -86,87 +86,87 @@ m_knn = function(
   n_features = ncol( Train );
   s_selected_k = 5;
   m_selected_feature = matrix(1,rows=1,cols=n_records);
-  if( select_k == 1 | select_feature==1 ){
+  if( selectK == 1 | selectFeature==1 ){
     #parameter check
     #parameter re-define
-    if( select_k==1 ){
-      if(  k_max >= n_records  ){
-        k_max = n_records - 1;
-        print( "k_max should no greater than number of record, change k_max equal " +
-        "( number of record - 1 ) : " + k_max );
+    if( selectK==1 ){
+      if(  maxK >= n_records  ){
+        maxK = n_records - 1;
+        print( "maxK should no greater than number of record, change maxK equal " +
+        "( number of record - 1 ) : " + maxK );
       }
-      if(  k_max >= interval  ){
-        interval = k_max + 1;
-        # k_max should equal interval -1, because we drop self when search nn.
-        print( "interval should be no less than k_max, change interval equal : " +
+      if(  maxK >= interval  ){
+        interval = maxK + 1;
+        # maxK should equal interval -1, because we drop self when search nn.
+        print( "interval should be no less than maxK, change interval equal : " +
         interval );
       }
-      if(  k_max <= 1  )
-        stop( "uncorrect k_max value"  );
-      if(  k_min >= k_max )
-        stop( "k_min >= k_max" );
+      if(  maxK <= 1  )
+        stop( "uncorrect maxK value"  );
+      if(  minK >= maxK )
+        stop( "minK >= maxK" );
     }
-    if( select_feature == 1 ){
-      if(  k_value >= n_records  ){
-        k_value = n_records - 1;
-        print( "k_value should be no greater than number of record, change k_value equal " +
-        "( number of record - 1 ) : " + k_value );
+    if( selectFeature == 1 ){
+      if(  k >= n_records  ){
+        k = n_records - 1;
+        print( "k should be no greater than number of record, change k equal " +
+        "( number of record - 1 ) : " + k );
       }
       #Select feature only
-      if( nrow(START_SELECTED) == 0 & ncol(START_SELECTED) == 0 )
+      if( nrow(initSelectFeature) == 0 & ncol(initSelectFeature) == 0 )
         m_start_selected_feature = matrix( 0,1,n_features );
       else
-        m_start_selected_feature = START_SELECTED;
+        m_start_selected_feature = initSelectFeature;
     }
 
-    if( select_k == 1 & select_feature == 1){
+    if( selectK == 1 & selectFeature == 1){
       #Combined k and feature selection
       print("Start combined k and feature selection ...");
       [m_selected_feature,s_selected_k] =
-        getSelectedFeatureAndK( Train,CL,CL_T,m_start_selected_feature,
-        feature_max,k_min,k_max,interval );
+        getSelectedFeatureAndK( Train,CL,ctypes,m_start_selected_feature,
+        maxFeatures,minK,maxK,interval );
     }
-    else if( select_k == 1 ){
+    else if( selectK == 1 ){
       #Select k only
       print("Start k select ...");
-      s_selected_k = getSelectedKBase( Train,CL,CL_T,k_min,k_max,interval );
+      s_selected_k = getSelectedKBase( Train,CL,ctypes,minK,maxK,interval );
     }
-    else if( select_feature == 1 ){
+    else if( selectFeature == 1 ){
       #Select feature only
       print("Start feature selection ... ");
       [m_selected_feature,d_err] =
-        getSelectedFeature( Train,CL,CL_T,m_start_selected_feature,
-        feature_max,k_value,interval );
+        getSelectedFeature( Train,CL,ctypes,m_start_selected_feature,
+        maxFeatures,k,interval );
     }
   }
 
-  if( feature_importance == 1){
-    if(  k_value >= n_records  ){
-      k_value = n_records - 1;
-      print( "k_value should be no greater than number of record, make k_value equal " +
-      "( number of record - 1 ) : " + k_value );
+  if( featureImportance == 1){
+    if(  k >= n_records  ){
+      k = n_records - 1;
+      print( "k should be no greater than number of record, make k equal " +
+      "( number of record - 1 ) : " + k );
     }
     [m_feature_importance,m_norm_feature_importance] =
-      getFeatureImportance(Train,CL,CL_T,k_value);
+      getFeatureImportance(Train,CL,ctypes,k);
   }
 
-  NNR_matrix = naiveKNNsearch(P=Train,Q=Test,K=k_value);
+  NNR_matrix = naiveKNNsearch(P=Train,Q=Test,K=k);
 
   CL_matrix = matrix( 0,nrow( Test ),1 );
 
   for(i in 1 : nrow(NNR_matrix))
   {
-    NNR_tmp_matrix = matrix( 0,k_value,1 );
-    for( j in 1:k_value )
+    NNR_tmp_matrix = matrix( 0,k,1 );
+    for( j in 1:k )
       NNR_tmp_matrix[j,1] = CL[as.scalar( NNR_matrix[i,j] ),1];
 
-    if(CL_T == 2) {
+    if(ctypes == 2) {
       t_cl_value = as.scalar( rowIndexMax( t(NNR_tmp_matrix) ) );
     }
     else {
-      if ( predict_con_tg == 0)
+      if ( predictCont == 0)
         t_cl_value = mean( NNR_tmp_matrix );
-      else if(predict_con_tg == 1)
+      else if(predictCont == 1)
         t_cl_value = median( NNR_tmp_matrix );
     }
 
diff --git a/scripts/builtin/knnbf.dml b/scripts/builtin/knnbf.dml
index 76d88cdf75e..f4be3ab47ff 100644
--- a/scripts/builtin/knnbf.dml
+++ b/scripts/builtin/knnbf.dml
@@ -25,7 +25,7 @@
 # --------------------------
 # X        ---
 # T        ---
-# k_value  ---
+# k        ---
 # --------------------------
 #
 # OUTPUT:
@@ -33,18 +33,18 @@
 # NNR    ---
 # ------------------------
 
-m_knnbf = function(Matrix[Double] X, Matrix[Double] T, Integer k_value = 5) 
+m_knnbf = function(Matrix[Double] X, Matrix[Double] T, Integer k = 5)
   return(Matrix[Double] NNR)
 {
   num_records = nrow(X);
   num_queries = nrow(T);
 
   D = matrix(0, rows = num_records, cols = num_queries);
-  NNR = matrix(0, rows = num_queries, cols = k_value);
+  NNR = matrix(0, rows = num_queries, cols = k);
 
   parfor(i in 1 : num_queries) {
     D[ , i] = calculateDistance(X, T[i, ]);
-    NNR[i, ] = sortAndGetK(D[ , i], k_value);
+    NNR[i, ] = sortAndGetK(D[ , i], k);
   }
 }
 
diff --git a/scripts/builtin/l2svm.dml b/scripts/builtin/l2svm.dml
index 78f3914add6..23d45664d42 100644
--- a/scripts/builtin/l2svm.dml
+++ b/scripts/builtin/l2svm.dml
@@ -24,38 +24,38 @@
 #
 # INPUT:
 # ------------------------------------------------------------------------------
-# X              Feature matrix X (shape: m x n)
-# Y              Label vector y of class labels (shape: m x 1), assumed binary
-#                in -1/+1 or 1/2 encoding.
-# intercept      Indicator if a bias column should be added to X and the model
-# epsilon        Tolerance for early termination if the reduction of objective
-#                function is less than epsilon times the initial objective
-# reg            Regularization parameter (lambda) for L2 regularization
-# maxIterations  Maximum number of conjugate gradient (outer) iterations
-# maxii          Maximum number of line search (inner) iterations
-# verbose        Indicator if training details should be printed
-# columnId       An optional class ID used in verbose print output,
-#                eg. used when L2SVM is used in MSVM.
+# X                     Feature matrix X (shape: m x n)
+# Y                     Label vector y of class labels (shape: m x 1), assumed binary
+#                       in -1/+1 or 1/2 encoding.
+# intercept             Indicator if a bias column should be added to X and the model
+# tol                   Tolerance (epsilon) for early termination if the reduction of objective
+#                       function is less than tolerance times the initial objective
+# reg                   Regularization parameter (lambda) for L2 regularization
+# maxIter               Maximum number of conjugate gradient (outer) iterations
+# maxInnerIter          Maximum number of line search (inner) iterations
+# verbose               Indicator if training details should be printed
+# columnId              An optional class ID used in verbose print output,
+#                       eg. used when L2SVM is used in MSVM.
 # ------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ------------------------------------------------------------------------------
-# model          Trained model/weights (shape: n x 1, w/ intercept: n+1)
+# model                 Trained model/weights (shape: n x 1, w/ intercept: n+1)
 # ------------------------------------------------------------------------------
 
 m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
-    Double epsilon = 0.001, Double reg = 1, Integer maxIterations = 100, 
-    Integer maxii = 20, Boolean verbose = FALSE, Integer columnId = -1)
+    Double tol = 0.001, Double reg = 1, Integer maxIter = 100,
+    Integer maxInnerIter = 20, Boolean verbose = FALSE, Integer columnId = -1)
   return(Matrix[Double] model)
 {
   #check input parameter assertions
   if(nrow(X) < 2)
     stop("L2SVM: Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
-  if(epsilon < 0)
+  if(tol < 0)
     stop("L2SVM: Stopping due to invalid argument: Tolerance (tol) must be non-negative")
   if(reg < 0)
     stop("L2SVM: Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-  if(maxIterations < 1)
+  if(maxIter < 1)
     stop("L2SVM: Stopping due to invalid argument: Maximum iterations should be a positive integer")
   if(ncol(Y) < 1)
     stop("L2SVM: Stopping due to invalid multiple label columns, maybe use MSVM instead?")
@@ -93,7 +93,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE
 
   iter = 0
   continue = TRUE
-  while(continue & iter < maxIterations)  {
+  while(continue & iter < maxIter)  {
     # minimizing primal obj along direction s
     step_sz = 0
     Xd = X %*% s
@@ -101,7 +101,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE
     dd = reg * sum(s * s)
     continue1 = TRUE
     iiter = 0
-    while(continue1 & iiter < maxii){
+    while(continue1 & iiter < maxInnerIter){
       tmp_Xw = Xw + step_sz*Xd
       out = 1 - Y * (tmp_Xw)
       sv = (out > 0)
@@ -109,7 +109,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE
       g = wd + step_sz*dd - sum(out * Y * Xd)
       h = dd + sum(Xd * sv * Xd)
       step_sz = step_sz - g/h
-      continue1 = (g*g/h >= epsilon)
+      continue1 = (g*g/h >= tol)
       iiter = iiter + 1
     }
 
@@ -129,7 +129,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE
     }
 
     tmp = sum(s * g_old)
-    continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0);
+    continue = (step_sz*tmp >= tol*obj & sum(s^2) != 0);
 
     #non-linear CG step
     be = sum(g_new * g_new)/sum(g_old * g_old)
diff --git a/scripts/builtin/lasso.dml b/scripts/builtin/lasso.dml
index 16f3c02dcf3..674fe258d09 100644
--- a/scripts/builtin/lasso.dml
+++ b/scripts/builtin/lasso.dml
@@ -29,7 +29,7 @@
 # tol      target convergence tolerance
 # M        history length
 # tau      regularization component
-# maxi     maximum number of iterations until convergence
+# maxIter  maximum number of iterations until convergence
 # verbose  if the builtin should be verbose
 # -----------------------------------------------------------------------------------
 #
@@ -39,7 +39,7 @@
 # --------------------------------------------------------------------------------
 
 m_lasso = function(Matrix[Double] X, Matrix[Double] y, Double tol = 1e-15, 
-  Integer M = 5, Double tau = 1, Integer maxi = 100, Boolean verbose = TRUE)
+  Integer M = 5, Double tau = 1, Integer maxIter = 100, Boolean verbose = TRUE)
   return(Matrix[Double] w)
 {
   n = nrow(X)
@@ -68,7 +68,7 @@ m_lasso = function(Matrix[Double] X, Matrix[Double] y, Double tol = 1e-15,
   inactive_set = matrix(1, rows=m, cols=1)
   iter = 0
   continue = TRUE
-  while(iter < maxi & continue) {
+  while(iter < maxIter & continue) {
     dw = matrix(0, rows=m, cols=1)
     dg = matrix(0, rows=m, cols=1)
     relChangeObj = -1.0
diff --git a/scripts/builtin/lenetPredict.dml b/scripts/builtin/lenetPredict.dml
index 12243c925b6..f0680deb8e5 100644
--- a/scripts/builtin/lenetPredict.dml
+++ b/scripts/builtin/lenetPredict.dml
@@ -24,11 +24,11 @@
 # INPUT:
 # ---------------------------------------------------------------------------------------------
 # model       Trained LeNet model
-# X           Input data matrix, of shape (N, C*Hin*Win)
-# C           Number of input channels
-# Hin         Input height
-# Win         Input width
-# batch_size  Batch size
+# X           Input data matrix, of shape (N, c*h*w)
+# c           Number of input channels
+# h           Input height
+# w           Input width
+# batchSize   Batch size
 # ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -38,8 +38,8 @@
 
 source("nn/layers/lenetForwardPass.dml") as lenet_fw
 
-s_lenetPredict = function(list[unknown] model, Matrix[Double] X, Integer C,
-  Integer Hin, Integer Win, Integer batch_size = 128)
+s_lenetPredict = function(list[unknown] model, Matrix[Double] X, Integer c,
+  Integer h, Integer w, Integer batchSize = 128)
   return (Matrix[Double] probs)
 {
   N = nrow(X)
@@ -61,14 +61,14 @@ s_lenetPredict = function(list[unknown] model, Matrix[Double] X, Integer C,
 
   # Compute predictions over mini-batches
   probs = matrix(0, rows=N, cols=K)
-  iters = ceil(N / batch_size)
+  iters = ceil(N / batchSize)
   for(i in 1:iters) {
     # Get next batch
-    beg = ((i-1) * batch_size) %% N + 1
-    end = min(N, beg + batch_size - 1)
+    beg = ((i-1) * batchSize) %% N + 1
+    end = min(N, beg + batchSize - 1)
     X_batch = X[beg:end,]
 
-    pred = lenet_fw::lenetForward(X_batch, C, Hin, Win, model, TRUE)
+    pred = lenet_fw::lenetForward(X_batch, c, h, w, model, TRUE)
 
     # Store predictions
     probs[beg:end,] = as.matrix(pred["probs"])
diff --git a/scripts/builtin/lenetTrain.dml b/scripts/builtin/lenetTrain.dml
index 2ddd9e764a3..e7a70299e68 100644
--- a/scripts/builtin/lenetTrain.dml
+++ b/scripts/builtin/lenetTrain.dml
@@ -25,14 +25,14 @@
 #
 # INPUT:
 # ----------------------------------------------------------
-# X           Input data matrix, of shape (N, C*Hin*Win)
+# X           Input data matrix, of shape (N, c*h*w)
 # Y           Target matrix, of shape (N, K)
-# X_val       Validation data matrix, of shape (N, C*Hin*Win)
-# Y_val       Validation target matrix, of shape (N, K)
-# C           Number of input channels (dimensionality of input depth)
-# Hin         Input width
-# Win         Input height
-# batch_size  Batch size
+# Xtest       Validation data matrix, of shape (N, c*h*w)
+# Ytest       Validation target matrix, of shape (N, K)
+# c           Number of input channels (dimensionality of input depth)
+# h           Input width
+# w           Input height
+# batchSize   Batch size
 # epochs      Number of epochs
 # lr          Learning rate
 # mu          Momentum value
@@ -58,8 +58,8 @@ source("nn/layers/softmax.dml") as softmax
 source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
 source("nn/layers/lenetForwardPass.dml") as lenet_fw
 
-m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val, 
-  Matrix[Double] Y_val, Integer C, Integer Hin, Integer Win, Integer batch_size=64, 
+m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest,
+  Matrix[Double] Ytest, Integer c, Integer h, Integer w, Integer batchSize=64,
   Integer epochs=20, Double lr=0.01, Double mu=0.9, Double decay=0.95, Double reg=5e-04, 
   Boolean verbose=FALSE, Integer seed=-1)
   return (List[unknown] model)
@@ -80,9 +80,9 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val
   N3 = 512  # num nodes in affine3
   # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes)
 
-  [W1, b1] = conv2d::init(F1, C, Hf, Wf, seed)  # inputs: (N, C*Hin*Win)
-  [W2, b2] = conv2d::init(F2, F1, Hf, Wf, seed)  # inputs: (N, F1*(Hin/2)*(Win/2))
-  [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3, seed)  # inputs: (N, F2*(Hin/2/2)*(Win/2/2))
+  [W1, b1] = conv2d::init(F1, c, Hf, Wf, seed)  # inputs: (N, c*h*w)
+  [W2, b2] = conv2d::init(F2, F1, Hf, Wf, seed)  # inputs: (N, F1*(h/2)*(w/2))
+  [W3, b3] = affine::init(F2*(h/2/2)*(w/2/2), N3, seed)  # inputs: (N, F2*(h/2/2)*(w/2/2))
   [W4, b4] = affine::init(N3, K, seed)  # inputs: (N, N3)
   W4 = W4 / sqrt(2)  # different initialization, since being fed into softmax, instead of relu
 
@@ -94,35 +94,35 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val
   vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4)
 
   # Optimize  
-  iters = ceil(N / batch_size)
+  iters = ceil(N / batchSize)
   for (e in 1:epochs) {
     loss = 0
     val_loss = 0
     val_acc = 0
     for(i in 1:iters) {
       # Get next batch
-      beg = ((i-1) * batch_size) %% N + 1
-      end = min(N, beg + batch_size - 1)
+      beg = ((i-1) * batchSize) %% N + 1
+      end = min(N, beg + batchSize - 1)
       X_batch = X[beg:end,]
       y_batch = Y[beg:end,]
 
       model = list(W1=W1, b1=b1, W2=W2, b2=b2, W3=W3, b3=b3, W4=W4, b4=b4)
 
-      cache = lenet_fw::lenetForward(X=X_batch, C=C, Hin=Hin, Win=Win, model=model)
+      cache = lenet_fw::lenetForward(X=X_batch, C=c, Hin=h, Win=w, model=model)
       
       # loss
       if(verbose){
         loss = loss + cross_entropy_loss::forward(as.matrix(cache["probs"]), y_batch)
-        val_cache = lenet_fw::lenetForward(X=X_val, C=C, Hin=Hin, Win=Win, model=model)
-        val_loss = val_loss + cross_entropy_loss::forward(as.matrix(val_cache["probs"]), Y_val)
-        val_acc = val_acc + mean(rowIndexMax(as.matrix(val_cache["probs"])) == rowIndexMax(Y_val))
+        val_cache = lenet_fw::lenetForward(X=Xtest, C=c, Hin=h, Win=w, model=model)
+        val_loss = val_loss + cross_entropy_loss::forward(as.matrix(val_cache["probs"]), Ytest)
+        val_acc = val_acc + mean(rowIndexMax(as.matrix(val_cache["probs"])) == rowIndexMax(Ytest))
       }
       
       dprobs = cross_entropy_loss::backward(as.matrix(cache["probs"]), y_batch)
 
       # Compute data backward pass
       [dW1, db1, dW2, db2, dW3, db3, dW4, db4] = feed_backward(
-        X_batch, C, Hin, Win, reg, model, dprobs, cache)
+        X_batch, c, h, w, reg, model, dprobs, cache)
 
       # Optimize with SGD w/ Nesterov momentum
       [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
diff --git a/scripts/builtin/lm.dml b/scripts/builtin/lm.dml
index b7fc55e0ff9..ae88f191048 100644
--- a/scripts/builtin/lm.dml
+++ b/scripts/builtin/lm.dml
@@ -25,27 +25,27 @@
 #
 # INPUT:
 # --------------------------------------------------------------------
-# X        Matrix of feature vectors.
-# y        1-column matrix of response values.
-# icpt     Intercept presence, shifting and rescaling the columns of X
-# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
-#          for highly dependant/sparse/numerous features
-# tol      Tolerance (epsilon); conjugate gradient procedure terminates early if L2
-#          norm of the beta-residual is less than tolerance * its initial norm
-# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose  If TRUE print messages are activated
+# X              Matrix of feature vectors.
+# y              1-column matrix of response values.
+# intercept      Intercept presence, shifting and rescaling the columns of X
+# reg            Regularization constant (lambda) for L2-regularization. set to nonzero
+#                for highly dependant/sparse/numerous features
+# tol            Tolerance (epsilon); conjugate gradient procedure terminates early if L2
+#                norm of the beta-residual is less than tolerance * its initial norm
+# maxIter        Maximum number of conjugate gradient iterations. 0 = no maximum
+# verbose        If TRUE print messages are activated
 # --------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------
-# B     The model fit beta that can be used as input in lmPredict
+# B              The model fit beta that can be used as input in lmPredict
 # ---------------------------------------------------------------
 
-m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
-    Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
+m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer intercept = 0,
+    Double reg = 1e-7, Double tol = 1e-7, Integer maxIter = 0, Boolean verbose = TRUE)
     return (Matrix[Double] B) {
   if( ncol(X) <= 1024 )
-    B = lmDS(X, y, icpt, reg, verbose)
+    B = lmDS(X, y, intercept, reg, verbose)
   else
-    B = lmCG(X, y, icpt, reg, tol, maxi, verbose)
+    B = lmCG(X, y, intercept, reg, tol, maxIter, verbose)
 }
diff --git a/scripts/builtin/lmCG.dml b/scripts/builtin/lmCG.dml
index 5fa9b0fefcb..ab1c8292912 100644
--- a/scripts/builtin/lmCG.dml
+++ b/scripts/builtin/lmCG.dml
@@ -23,29 +23,29 @@
 #
 # INPUT:
 # ---------------------------------------------------------------
-# X        Matrix of feature vectors.
-# y        1-column matrix of response values.
-# icpt     Intercept presence, shifting and rescaling the columns of X
-# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
-#          for highly dependant/sparse/numerous features
-# tol      Tolerance (epsilon) conjugate gradient procedure terminates early if L2
-#          norm of the beta-residual is less than tolerance * its initial norm
-# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose  If TRUE print messages are activated
+# X          Matrix of feature vectors.
+# y          1-column matrix of response values.
+# intercept  Intercept presence, shifting and rescaling the columns of X
+# reg        Regularization constant (lambda) for L2-regularization. set to nonzero
+#            for highly dependant/sparse/numerous features
+# tol        Tolerance (epsilon) conjugate gradient procedure terminates early if L2
+#            norm of the beta-residual is less than tolerance * its initial norm
+# maxIter    Maximum number of conjugate gradient iterations. 0 = no maximum
+# verbose    If TRUE print messages are activated
 # ---------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------
-# B     The model fit beta that can be used as input in lmPredict
+# B          The model fit beta that can be used as input in lmPredict
 # ---------------------------------------------------------------
 
-m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
-    Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
+m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer intercept = 0,
+    Double reg = 1e-7, Double tol = 1e-7, Integer maxIter = 0, Boolean verbose = TRUE)
     return (Matrix[Double] B) {
-  intercept_status = icpt
+  intercept_status = intercept
   regularization = reg
   tolerance = tol
-  max_iteration = maxi
+  max_iteration = maxIter
 
   n = nrow(X)
   m = ncol(X)
diff --git a/scripts/builtin/lmDS.dml b/scripts/builtin/lmDS.dml
index 9c60b33917f..ee414bbc8d6 100644
--- a/scripts/builtin/lmDS.dml
+++ b/scripts/builtin/lmDS.dml
@@ -23,25 +23,24 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X        Matrix of feature vectors.
-# y        1-column matrix of response values.
-# icpt     Intercept presence, shifting and rescaling the columns of X
-# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
-#          for highly dependant/sparse/numerous features
-# tol      Tolerance (epsilon) conjugate gradient procedure terminates early if L2
-#          norm of the beta-residual is less than tolerance * its initial norm
-# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose  If TRUE print messages are activated
+# X          Matrix of feature vectors.
+# y          1-column matrix of response values.
+# intercept  Intercept presence, shifting and rescaling the columns of X
+# reg        Regularization constant (lambda) for L2-regularization. set to nonzero
+#            for highly dependant/sparse/numerous features
+# tol        Tolerance (epsilon) conjugate gradient procedure terminates early if L2
+#            norm of the beta-residual is less than tolerance * its initial norm
+# verbose    If TRUE print messages are activated
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------
-# B     The model fit beta that can be used as input in lmPredict
+# B          The model fit beta that can be used as input in lmPredict
 # ---------------------------------------------------------------
 
-m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
+m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer intercept = 0,
     Double reg = 1e-7, Boolean verbose = TRUE) return (Matrix[Double] B) {
-  intercept_status = icpt
+  intercept_status = intercept
   regularization = reg
 
   n = nrow(X)
diff --git a/scripts/builtin/lmPredict.dml b/scripts/builtin/lmPredict.dml
index 6c017e2f0bb..8eeefc98ca3 100644
--- a/scripts/builtin/lmPredict.dml
+++ b/scripts/builtin/lmPredict.dml
@@ -23,26 +23,26 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X        Matrix of feature vectors
-# B        1-column matrix of weights.
-# ytest    test labels, used only for verbose output. can be set to matrix(0,1,1)
-#          if verbose output is not wanted
-# icpt     Intercept presence, shifting and rescaling the columns of X
-# verbose  If TRUE print messages are activated
+# X          Matrix of feature vectors
+# B          1-column matrix of weights.
+# Ytest      test labels, used only for verbose output. can be set to matrix(0,1,1)
+#            if verbose output is not wanted
+# intercept  Intercept presence, shifting and rescaling the columns of X
+# verbose    If TRUE print messages are activated
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------------
-# yhat  1-column matrix of classes
+# yhat       1-column matrix of classes
 # -----------------------------------------------------------------------------------
 
 m_lmPredict = function(Matrix[Double] X, Matrix[Double] B, 
-  Matrix[Double] ytest = matrix(0,1,1), Integer icpt = 0, Boolean verbose = FALSE) 
+  Matrix[Double] Ytest = matrix(0,1,1), Integer intercept = 0, Boolean verbose = FALSE)
   return (Matrix[Double] yhat)
 {
-  intercept = ifelse(icpt>0 | ncol(X)+1==nrow(B), as.scalar(B[nrow(B),]), 0);
+  intercept = ifelse(intercept>0 | ncol(X)+1==nrow(B), as.scalar(B[nrow(B),]), 0);
   yhat = X %*% B[1:ncol(X),] + intercept;
 
   if( verbose )
-    lmPredictStats(yhat, ytest, TRUE);
+    lmPredictStats(yhat, Ytest, TRUE);
 }
diff --git a/scripts/builtin/lmPredictStats.dml b/scripts/builtin/lmPredictStats.dml
index f538a595915..29a5b40291b 100644
--- a/scripts/builtin/lmPredictStats.dml
+++ b/scripts/builtin/lmPredictStats.dml
@@ -24,8 +24,8 @@
 #
 # INPUT:
 # ------------------------------------------------------------
-# yhat     A column vector of predicted response values y
-# ytest    A column vector of actual response values y
+# Y        A column vector of predicted response values y
+# Ytest    A column vector of actual response values y
 # lm       An indicator if used for linear regression model
 # ------------------------------------------------------------
 #
@@ -34,24 +34,24 @@
 # R        A column vector holding avg_res, ss_avg_res, and R2
 # ------------------------------------------------------------
 
-m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean lm)
+m_lmPredictStats = function(Matrix[Double] Y, Matrix[Double] Ytest, Boolean lm)
   return (Matrix[Double] R)
 {
   print ("\n\nComputing the statistics...");
-  n = nrow(ytest)
+  n = nrow(Ytest)
 
-  sum_y_test = sum(ytest)
+  sum_y_test = sum(Ytest)
   mean_y_test = sum_y_test / n
-  sum_sq_y_test = sum(ytest^2)
+  sum_sq_y_test = sum(Ytest^2)
 
-  y_residual = ytest - yhat;
+  y_residual = Ytest - Y;
   avg_res = sum(y_residual) / n;
   ss_res = sum(y_residual^2);
   ss_avg_res = ss_res - n * avg_res^2;
   if( lm )
     R2 = 1 - ss_res / (sum_sq_y_test - n * (sum_y_test/n)^2);
   else
-    R2 = sum((yhat - mean_y_test)^2) / sum((ytest - mean_y_test)^2)
+    R2 = sum((Y - mean_y_test)^2) / sum((Ytest - mean_y_test)^2)
 
   avg_tot = sum_y_test / n;
   ss_tot = sum_sq_y_test;
@@ -59,8 +59,8 @@ m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean l
   var_tot = ss_avg_tot / (n - 1);
   R2_nobias = 1 - ss_avg_res / ss_avg_tot;
 
-  print("sum(ytest) = " + sum_y_test)
-  print("sum(yhat) = " + sum(yhat))
+  print("sum(Ytest) = " + sum_y_test)
+  print("sum(Y) = " + sum(Y))
   print("SS_AVG_RES_Y: " + ss_avg_res)
   # Average of the response value Y
   print("AVG_TOT_Y, " + avg_tot)
diff --git a/scripts/builtin/matrixProfile.dml b/scripts/builtin/matrixProfile.dml
index dd48dba92dd..6af8b01e9af 100644
--- a/scripts/builtin/matrixProfile.dml
+++ b/scripts/builtin/matrixProfile.dml
@@ -34,11 +34,11 @@
 # INPUT:
 # ----------------------------------------------------------------------------------
 # ts              Time series to profile
-# window_size     Sliding window size
-# sample_percent  Degree of approximation
+# windowSize      Sliding window size
+# sampleFrac      Degree of approximation
 #                 between zero and one (1
 #                 computes the exact solution)
-# is_verbose      Print debug information
+# verbose         Print debug information
 # ----------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -47,40 +47,40 @@
 # profile_index  Indices of least distances
 # -----------------------------------------------------------------------------------
 
-m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE)
+m_matrixProfile = function(Matrix[Double] ts, Integer windowSize=4, Double sampleFrac=1.0, Boolean verbose=FALSE)
   return(Matrix[Double] profile, Matrix[Double] profile_index)
 {
-  if (is_verbose)
+  if (verbose)
     print ("##############################\n# MATRIXPROFILE SCRIPT ENTRY #\n##############################");
 
   # TODO: preSCRIMP 
   # requires a similarity search algorithm e.g.: MASS (Mueen's Algorithm for Similarity Search)
 
   n = length(ts);
-  [mu,sig] = moving_avg(ts, n, window_size);
-  if (is_verbose) {
+  [mu,sig] = moving_avg(ts, n, windowSize);
+  if (verbose) {
     print_ts(ts);
     print_ts(mu);
     print_ts(sig);
   }
 
   # initialize
-  profile_len = n-window_size+1;
+  profile_len = n-windowSize+1;
   profile = matrix(Inf, cols=1, rows=profile_len);
   profile_index = matrix(1, cols=1, rows=profile_len);
 
   # random permutation
-  exclusion_zone = as.integer(ceil(window_size/4)) + 1;
+  exclusion_zone = as.integer(ceil(windowSize/4)) + 1;
   sample_size = profile_len-exclusion_zone;
-  if (sample_percent < 1.0 & sample_percent >= 0.0) {
-    sample_size = ceil(sample_size*sample_percent);
+  if (sampleFrac < 1.0 & sampleFrac >= 0.0) {
+    sample_size = ceil(sample_size*sampleFrac);
   }
   s = sample(sample_size, sample_size, FALSE);
   s = s + exclusion_zone;
 
-  if (is_verbose) {
+  if (verbose) {
     print("n: " + n);
-    print("window_size: " + window_size);
+    print("windowSize: " + windowSize);
     print("profile_len: " + profile_len);
     print("exclusion_zone: " + exclusion_zone);
     print("sample_size: " + sample_size);
@@ -90,12 +90,12 @@ m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double samp
     k = as.scalar(s[k_idx]);
     k_idx += 1;
     q = 0;
-    for (i in 1:n-window_size+2-k) {
+    for (i in 1:n-windowSize+2-k) {
       if (i==1)
-        q = as.scalar(t(ts[1:window_size]) %*% ts[k:k+window_size-1]);
+        q = as.scalar(t(ts[1:windowSize]) %*% ts[k:k+windowSize-1]);
       else
-        q = as.scalar(q - ts[i-1]%*%ts[i+k-2] + ts[i+window_size-1]%*%ts[i+k+window_size-2]);
-      d = sqrt(2*window_size*(1-(q - window_size*as.scalar(mu[i]*mu[i+k-1])) / (window_size*as.scalar(sig[i]*sig[i+k-1]))));
+        q = as.scalar(q - ts[i-1]%*%ts[i+k-2] + ts[i+windowSize-1]%*%ts[i+k+windowSize-2]);
+      d = sqrt(2*windowSize*(1-(q - windowSize*as.scalar(mu[i]*mu[i+k-1])) / (windowSize*as.scalar(sig[i]*sig[i+k-1]))));
 
       if (d < as.scalar(profile[i])) {
         profile[i] = d;
@@ -112,10 +112,10 @@ m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double samp
   print_ts(profile_index);
 }
 
-moving_avg = function(Matrix[Double] array, Integer n, Integer window_size)
+moving_avg = function(Matrix[Double] array, Integer n, Integer windowSize)
   return(Matrix[Double] mu, Matrix[Double] sig)
 {
-  profile_len = n - window_size + 1;
+  profile_len = n - windowSize + 1;
   cum_sum = matrix(0, cols=1, rows=n);
   sq_cum_sum = matrix(0, cols=1, rows=n);
   sums = matrix(0, cols=1, rows=profile_len);
@@ -127,16 +127,16 @@ moving_avg = function(Matrix[Double] array, Integer n, Integer window_size)
   cum_sum = cumsum(array);
   sq_cum_sum = cumsum(array*array);
 
-  sums[1] = cum_sum[window_size];
-  sq_sums[1] = sq_cum_sum[window_size];
-  for (i in 1:n-window_size) {
-    sums[i+1]    = cum_sum[window_size + i] - cum_sum[i];
-    sq_sums[i+1] = sq_cum_sum[window_size + i] - sq_cum_sum[i];
+  sums[1] = cum_sum[windowSize];
+  sq_sums[1] = sq_cum_sum[windowSize];
+  for (i in 1:n-windowSize) {
+    sums[i+1]    = cum_sum[windowSize + i] - cum_sum[i];
+    sq_sums[i+1] = sq_cum_sum[windowSize + i] - sq_cum_sum[i];
   }
 
   for (i in 1:profile_len) {
-    mu[i] = sums[i] / window_size;
-    sig_sq[i] = sq_sums[i] / window_size - mu[i] * mu[i];
+    mu[i] = sums[i] / windowSize;
+    sig_sq[i] = sq_sums[i] / windowSize - mu[i] * mu[i];
     sig[i] = max(sqrt(sig_sq[i]), 0);
   }
 }
diff --git a/scripts/builtin/mice.dml b/scripts/builtin/mice.dml
index 8d7b1af69e3..18a591c2aaf 100644
--- a/scripts/builtin/mice.dml
+++ b/scripts/builtin/mice.dml
@@ -28,8 +28,8 @@
 # INPUT:
 # ------------------------------------------------------------------------------------
 # X          Data Matrix (Recoded Matrix for categorical features)
-# cMask      A 0/1 row vector for identifying numeric (0) and categorical features (1)
-# iter       Number of iteration for multiple imputations
+# ctypes     A 0/1 row vector for identifying numeric (0) and categorical features (1)
+# maxIter    Number of iteration for multiple imputations
 # threshold  confidence value [0, 1] for robust imputation, values will only be imputed
 #            if the predicted value has probability greater than threshold,
 #            only applicable for categorical data
@@ -41,7 +41,7 @@
 # output  imputed dataset
 # ---------------------------------------------------------------------------------
 
-m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3, 
+m_mice= function(Matrix[Double] X, Matrix[Double] ctypes, Integer maxIter = 3,
   Double threshold = 0.8, Boolean verbose = FALSE)
   return(Matrix[Double] output, Matrix[Double] meta, Double threshold, Frame[String] dM, List[Unknown] betaList)
 {
@@ -49,35 +49,35 @@ m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3,
     stop("MICE can not be applied on single vectors.
          expected number of columns > 1 found: "+ncol(X))
 
-  if(ncol(cMask) != ncol(X))
+  if(ncol(ctypes) != ncol(X))
     stop("MICE Dimension mismatch: the columns in X != columns in mask")
   
   
   lastIndex = ncol(X);
-  sumMax = sum(cMask);
+  sumMax = sum(ctypes);
   
   # if all features are numeric add a categorical features
   # if all features are categorical add a numeric features
-  if(sumMax == 0 | sumMax == ncol(cMask)) {
+  if(sumMax == 0 | sumMax == ncol(ctypes)) {
     X = cbind(X, matrix(1, nrow(X), 1))
-    cMask = cbind(cMask, matrix(ifelse(sumMax==0, 1, 0), 1, 1))
+    ctypes = cbind(ctypes, matrix(ifelse(sumMax==0, 1, 0), 1, 1))
   }
 
   # impute by mean 
   Mask1 = is.na(X)
-  meta = rbind(cMask, (colSums(Mask1) > 0))
+  meta = rbind(ctypes, (colSums(Mask1) > 0))
   X = replace(target=X, pattern=NaN, replacement=0);
-  imputationVec = getInitialImputation(X, cMask)
+  imputationVec = getInitialImputation(X, ctypes)
   X1 = X + (Mask1 * imputationVec)
   d = ncol(X1)
   n = nrow(X1)
   # compute index of categorical features
-  index = vectorToCsv(cMask)
+  index = vectorToCsv(ctypes)
   # specifications for one-hot encoding of categorical features
   jspecDC = "{ids:true, dummycode:["+index+"]}";
   [dX, dM] = transformencode(target=as.frame(X1), spec=jspecDC);
-  dist = colDist(X1, cMask) # number of distinct items in categorical features
-  for(k in 1:iter) # start iterative imputation
+  dist = colDist(X1, ctypes) # number of distinct items in categorical features
+  for(k in 1:maxIter) # start iterative imputation
   {
     betaList = list()
     betaList = append(betaList, imputationVec)
@@ -91,7 +91,7 @@ m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3,
     {
       j = (i + as.scalar(dist[1,in_c])) - 1 # index value for iterating OHE columns
       beta = as.matrix(0)
-      if(sum(Mask1[, in_c]) > 0 & as.scalar(cMask[, in_c]) == 0) # impute numeric features
+      if(sum(Mask1[, in_c]) > 0 & as.scalar(ctypes[, in_c]) == 0) # impute numeric features
       {
         # construct column selector
         selX = matrix(1,1,ncol(dX))
@@ -110,16 +110,16 @@ m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3,
         test_Y = slice2a[,in_c]
 
         # learn a regression line
-        beta = lm(X=train_X, y=train_Y, verbose=FALSE, icpt=1, reg = 1e-7, tol = 1e-7);
+        beta = lm(X=train_X, y=train_Y, verbose=FALSE, intercept=1, reg = 1e-7, tol = 1e-7);
         # predicting missing values 
-        pred = lmPredict(X=test_X, B=beta, ytest= matrix(0,1,1), icpt=1, verbose = FALSE)
+        pred = lmPredict(X=test_X, B=beta, Ytest= matrix(0,1,1), intercept=1, verbose = FALSE)
         # imputing missing column values (assumes Mask_Filled being 0/1-matrix)
         R = removeEmpty(target=Mask_Filled[, in_c] * seq(1,n), margin="rows");
         # TODO modify removeEmpty to return zero row and n columns
         if(!(nrow(R) == 1 & as.scalar(R[1,1] == 0))) 
           Mask_Filled[,in_c] = table(R, 1, pred, n, 1);
       }
-      else if (sum(Mask1[, in_c]) > 0 & as.scalar(cMask[, in_c]) != 0) # impute categorical features
+      else if (sum(Mask1[, in_c]) > 0 & as.scalar(ctypes[, in_c]) != 0) # impute categorical features
       {
         # construct column selector
         selX = matrix(1,1,ncol(dX))
@@ -141,8 +141,8 @@ m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3,
           prob = matrix(1, nrow(test_Y), 1)
         }
         else {
-          beta = multiLogReg(X=train_X, Y=train_Y, icpt = 1, tol = 0.0001, reg = 0.00001, 
-            maxi = 20, maxii=20, verbose=FALSE)
+          beta = multiLogReg(X=train_X, Y=train_Y, intercept = 1, tol = 0.0001, reg = 0.00001,
+            maxIter = 20, maxInnerIter=20, verbose=FALSE)
           # predicting missing values 
           [prob, pred, acc] = multiLogRegPredict(X=test_X, B=beta, Y = test_Y)
           prob = rowMaxs(prob)
diff --git a/scripts/builtin/miceApply.dml b/scripts/builtin/miceApply.dml
index 448310ef3ca..5ed43543212 100644
--- a/scripts/builtin/miceApply.dml
+++ b/scripts/builtin/miceApply.dml
@@ -28,7 +28,7 @@
 # INPUT:
 # --------------------------------------------------------------------------------------
 # X          Data Matrix (Recoded Matrix for categorical features)
-# mtea       A meta matrix with each rows storing values 1) mask of original matrix, 
+# meta       A meta matrix with each rows storing values 1) mask of original matrix,
 #              2) information of columns with missing values on  original data 0 for no missing value in column and 1 otherwise
 #              3) dist values in each columns in original data 1 for continuous columns and colMax for categorical
 # threshold  confidence value [0, 1] for robust imputation, values will only be imputed
@@ -105,7 +105,7 @@ m_miceApply = function(Matrix[Double] X, Matrix[Double] meta, Double threshold,
       test_Y = slice2a[,in_c]
       beta = as.matrix(betaList[in_c])
       # learn a regression line
-      pred = lmPredict(X=test_X, B=beta, ytest= matrix(0,1,1), icpt=1, verbose = FALSE)
+      pred = lmPredict(X=test_X, B=beta, Ytest= matrix(0,1,1), intercept=1, verbose = FALSE)
       # imputing missing column values (assumes Mask_Filled being 0/1-matrix)
       R = removeEmpty(target=Mask_Filled[1:n, in_c] * seq(1,n), margin="rows");
       # TODO modify removeEmpty to return zero row and n columns
diff --git a/scripts/builtin/msvm.dml b/scripts/builtin/msvm.dml
index 076b9eb5971..399f74d652e 100644
--- a/scripts/builtin/msvm.dml
+++ b/scripts/builtin/msvm.dml
@@ -25,24 +25,24 @@
 #
 # INPUT:
 #-------------------------------------------------------------------------------
-# X              Feature matrix X (shape: m x n)
-# Y              Label vector y of class labels (shape: m x 1),
-#                where max(Y) is assumed to be the number of classes
-# intercept      Indicator if a bias column should be added to X and the model
-# epsilon        Tolerance for early termination if the reduction of objective
-#                function is less than epsilon times the initial objective
-# reg            Regularization parameter (lambda) for L2 regularization
-# maxIterations  Maximum number of conjugate gradient (outer l2svm) iterations
-# verbose        Indicator if training details should be printed
+# X               Feature matrix X (shape: m x n)
+# Y               Label vector y of class labels (shape: m x 1),
+#                 where max(Y) is assumed to be the number of classes
+# intercept       Indicator if a bias column should be added to X and the model
+# tol             Tolerance for early termination if the reduction of objective
+#                 function is less than tolerance times the initial objective
+# reg             Regularization parameter (lambda) for L2 regularization
+# maxIter         Maximum number of conjugate gradient (outer l2svm) iterations
+# verbose         Indicator if training details should be printed
 # ------------------------------------------------------------------------------
 #
 # OUTPUT:
 #-------------------------------------------------------------------------------
-# model          Trained model/weights (shape: n x max(Y), w/ intercept: n+1)
+# model           Trained model/weights (shape: n x max(Y), w/ intercept: n+1)
 #-------------------------------------------------------------------------------
 
 m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
-    Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100,
+    Double tol = 0.001, Double reg = 1.0, Integer maxIter = 100,
     Boolean verbose = FALSE)
   return(Matrix[Double] model)
 {
@@ -75,7 +75,7 @@ m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
     nnzY = sum(Y == class);
     if( nnzY > 0 ) {
       w[,class] = l2svm(X=X, Y=Y_local, intercept=FALSE,
-        epsilon=epsilon, reg=reg, maxIterations=maxIterations,
+        tol=tol, reg=reg, maxIter=maxIter,
         verbose=verbose, columnId=class)
     }
     else {
diff --git a/scripts/builtin/multiLogReg.dml b/scripts/builtin/multiLogReg.dml
index 528931ad8e2..7399eb37173 100644
--- a/scripts/builtin/multiLogReg.dml
+++ b/scripts/builtin/multiLogReg.dml
@@ -26,25 +26,25 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------
-# X        Location to read the matrix of feature vectors
-# Y        Location to read the matrix with category labels
-# icpt     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
-#          no shifting, no rescaling; 1 = add intercept, but neither shift nor
-#          rescale X; 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# tol      tolerance ("epsilon")
-# reg      regularization parameter (lambda = 1/C); intercept is not regularized
-# maxi     max. number of outer (Newton) iterations
-# maxii    max. number of inner (conjugate gradient) iterations, 0 = no max
-# verbose  flag specifying if logging information should be printed
+# X             Location to read the matrix of feature vectors
+# Y             Location to read the matrix with category labels
+# intercept     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+#               no shifting, no rescaling; 1 = add intercept, but neither shift nor
+#               rescale X; 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# tol           tolerance ("epsilon")
+# reg           regularization parameter (lambda = 1/C); intercept is not regularized
+# maxIter       max. number of outer (Newton) iterations
+# maxInnerIter  max. number of inner (conjugate gradient) iterations, 0 = no max
+# verbose       flag specifying if logging information should be printed
 # ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # --------------------------------------------------------------------------------------
-# betas  regression betas as output for prediction
+# betas         regression betas as output for prediction
 # --------------------------------------------------------------------------------------
 
-m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2, 
-  Double tol=1e-6, Double reg=1.0, Int maxi=100, Int maxii=20, Boolean verbose=TRUE)
+m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int intercept = 2,
+  Double tol=1e-6, Double reg=1.0, Int maxIter=100, Int maxInnerIter=20, Boolean verbose=TRUE)
   return(Matrix[Double] betas)
 {
   eta0 = 0.0001;
@@ -67,7 +67,7 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
   }
 
   # Introduce the intercept, shift and rescale the columns of X if needed
-  if (icpt == 1 | icpt == 2) { # add the intercept column
+  if (intercept == 1 | intercept == 2) { # add the intercept column
     if(N == nrow(X))
       N = nrow(X)
     X = cbind (X, matrix (1, N, 1));
@@ -75,10 +75,10 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
   }
 
   scale_lambda = matrix (1, D, 1);
-  if (icpt == 1 | icpt == 2)
+  if (intercept == 1 | intercept == 2)
     scale_lambda [D, 1] = 0;
 
-  if (icpt == 2)  # scale-&-shift X columns to mean 0, variance 1
+  if (intercept == 2)  # scale-&-shift X columns to mean 0, variance 1
   {                           # Important assumption: X [, D] = matrix (1, N, 1)
     avg_X_cols = t(colSums(X)) / N;
     var_X_cols = (t(colSums (X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1);
@@ -126,20 +126,20 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
   obj = N * log (K + 1);  ### obj = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2));
 
   Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
-  if (icpt == 2)
+  if (intercept == 2)
     Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
   
   Grad = Grad + lambda * B;
   norm_Grad = sqrt (sum (Grad ^ 2));
   norm_Grad_initial = norm_Grad;
 
-  if (maxii == 0) 
-    maxii = D * K;
+  if (maxInnerIter == 0)
+    maxInnerIter = D * K;
 
   iter = 1;
 
   # boolean for convergence check
-  converge = (norm_Grad < tol) | (iter > maxi);
+  converge = (norm_Grad < tol) | (iter > maxIter);
   if(verbose)
     print ("-- Initially:  Objective = " + obj + ",  Gradient Norm = " + norm_Grad + ",  Trust Delta = " + delta);
 
@@ -158,7 +158,7 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
     P_1K = P [, 1:K]
     while (! innerconverge)
     {
-      if (icpt == 2) {
+      if (intercept == 2) {
         ssX_V = diag (scale_X) %*% V;
         ssX_V [D, ] = ssX_V [D, ] + t(shift_X) %*% V;
       } 
@@ -168,7 +168,7 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
       Q = P_1K * (X %*% ssX_V);
       HV = t(X) %*% (Q - P_1K * (rowSums (Q) %*% matrix (1, 1, K)));
 
-      if (icpt == 2)
+      if (intercept == 2)
         HV = diag (scale_X) %*% HV + shift_X %*% HV [D, ];
 
       HV = HV + lambda * V;
@@ -198,14 +198,14 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
         innerconverge = TRUE;
       }
       inneriter = inneriter + 1;
-      innerconverge = innerconverge | (inneriter > maxii);
+      innerconverge = innerconverge | (inneriter > maxInnerIter);
     }
     # END TRUST REGION SUB-PROBLEM
     # compute rho, update B, obtain delta
     gs = sum (S * Grad);
     qk = - 0.5 * (gs - sum (S * R));
     B_new = B + S;
-    if (icpt == 2) {
+    if (intercept == 2) {
       ssX_B_new = diag (scale_X) %*% B_new;
       ssX_B_new [D, ] = ssX_B_new [D, ] + t(shift_X) %*% B_new;
     } 
@@ -255,7 +255,7 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
       B = B_new;
       P = P_new;
       Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
-      if (icpt == 2)
+      if (intercept == 2)
         Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
 
       Grad = Grad + lambda * B;
@@ -267,13 +267,13 @@ m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2,
             + snorm + ",  Gradient Norm = " + norm_Grad);
     }
     iter = iter + 1;
-    converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxi) |
+    converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxIter) |
       ((is_trust_boundary_reached == 0) & (abs (actred) < (abs (obj) + abs (obj_new)) * 1e-14)));
     if (verbose & converge) 
       print ("Termination / Convergence condition satisfied.");
   }
 
-  if (icpt == 2) {
+  if (intercept == 2) {
     betas = diag (scale_X) %*% B;
     betas[D,] = betas[D,] + t(shift_X) %*% B;
   }
diff --git a/scripts/builtin/outlierByArima.dml b/scripts/builtin/outlierByArima.dml
index 9292ad5d625..b60c284287e 100644
--- a/scripts/builtin/outlierByArima.dml
+++ b/scripts/builtin/outlierByArima.dml
@@ -35,7 +35,7 @@
 # D               seasonal differencing order
 # Q               seasonal MA order
 # s               period in terms of number of time-steps
-# include_mean    If the mean should be included
+# includeMean    If the mean should be included
 # solver          solver, is either "cg" or "jacobi"
 # -------------------------------------------------------------------------------------------
 #
@@ -45,7 +45,7 @@
 # -------------------------------------------------------------------------------------------------
 
 m_outlierByArima = function(Matrix[Double] X, Double k = 3, Integer repairMethod = 1, Integer p=0, Integer d=0,
-  Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1, Boolean include_mean=FALSE, String solver="jacobi")
+  Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1, Boolean includeMean=FALSE, String solver="jacobi")
   return(Matrix[Double] X_corrected)
 {
   outlierFilter = as.matrix(0)
@@ -58,7 +58,7 @@ m_outlierByArima = function(Matrix[Double] X, Double k = 3, Integer repairMethod
 
   # TODO replace by ARIMA once fully supported, LM only emulated the AR part
   model = lm(X=features, y=X_adapted)
-  y_hat = lmPredict(X=features, B=model, ytest=matrix(0,1,1))
+  y_hat = lmPredict(X=features, B=model, Ytest=matrix(0,1,1))
 
   upperBound = sd(X) + k * y_hat
   lowerBound = sd(X) - k * y_hat
diff --git a/scripts/builtin/outlierByIQR.dml b/scripts/builtin/outlierByIQR.dml
index a564e0b7463..f0d42a88ffc 100644
--- a/scripts/builtin/outlierByIQR.dml
+++ b/scripts/builtin/outlierByIQR.dml
@@ -25,11 +25,10 @@
 # -------------------------------------------------------------------------------------------
 # X               Matrix X
 # k               a constant used to discern outliers k*IQR
-# isIterative     iterative repair or single repair
 # repairMethod    values: 0 = delete rows having outliers,
 #                    1 = replace outliers with zeros
 #                    2 = replace outliers as missing values
-# max_iterations  values: 0 = arbitrary number of iteraition until all outliers are removed,
+# maxIter         values: 0 = arbitrary number of iteraition until all outliers are removed,
 #                 n = any constant defined by user
 # verbose         flag specifying if logging information should be printed
 # -------------------------------------------------------------------------------------------
@@ -40,7 +39,7 @@
 # ---------------------------------------------------------------------------------
 
 m_outlierByIQR = function(Matrix[Double] X, Double k =1.5, Integer repairMethod = 1, 
-  Integer max_iterations, Boolean verbose = TRUE) 
+  Integer maxIter, Boolean verbose = TRUE)
   return(Matrix[Double] Y, Matrix[Double] Q1, Matrix[Double] Q3, Matrix[Double] IQR, Double k, Integer repairMethod) 
 {
 
@@ -48,7 +47,7 @@ m_outlierByIQR = function(Matrix[Double] X, Double k =1.5, Integer repairMethod
   sumNext = as.double(1)
   counter = 0
 
-  while( max_iterations == 0 | counter < max_iterations )
+  while( maxIter == 0 | counter < maxIter )
   {
     [Q1, Q3, IQR] = compute_quartiles(X)
     upperBound = (Q3 + (k * IQR));
@@ -63,7 +62,7 @@ m_outlierByIQR = function(Matrix[Double] X, Double k =1.5, Integer repairMethod
       sumNext = sum(temp)
     }
     else
-      max_iterations = -1
+      maxIter = -1
 
     counter = counter + 1; 
   }
diff --git a/scripts/builtin/outlierBySd.dml b/scripts/builtin/outlierBySd.dml
index c53ed1cfef1..7a40dc58eff 100644
--- a/scripts/builtin/outlierBySd.dml
+++ b/scripts/builtin/outlierBySd.dml
@@ -27,7 +27,7 @@
 # k               threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
 # repairMethod    values: 0 = delete rows having outliers, 1 = replace outliers as  zeros
 #                 2 = replace outliers as missing values
-# max_iterations  values: 0 = arbitrary number of iteration until all outliers are removed,
+# maxIter         values: 0 = arbitrary number of iteration until all outliers are removed,
 #                 n = any constant defined by user
 # ----------------------------------------------------------------------------------------
 #
@@ -37,7 +37,7 @@
 # -------------------------------------------------------------------------------
 
 m_outlierBySd = function(Matrix[Double] X, Double k = 3, Integer repairMethod = 1, 
-  Integer max_iterations, Boolean verbose = TRUE) 
+  Integer maxIter, Boolean verbose = TRUE)
   return(Matrix[Double] out, Matrix[Double] colMean, Matrix[Double] colSD, Double k, Integer repairMethod) 
 {
   # variable initialization 
@@ -49,7 +49,7 @@ m_outlierBySd = function(Matrix[Double] X, Double k = 3, Integer repairMethod =
   if( k < 1 | k > 10)
     stop("outlierBySd: invalid argument - k should be in range 1-10 found "+k)
 
-  while( max_iterations == 0 | counter < max_iterations )
+  while( maxIter == 0 | counter < maxIter )
   {
     [colMean, colSD] = getColMean_Sd(X)
     
@@ -67,7 +67,7 @@ m_outlierBySd = function(Matrix[Double] X, Double k = 3, Integer repairMethod =
       sumNext = sum(temp)
     }
     else
-      max_iterations = - 1;
+      maxIter = - 1;
 
     counter = counter + 1; 
   }
diff --git a/scripts/builtin/outlierBySdApply.dml b/scripts/builtin/outlierBySdApply.dml
index e098cf6489d..42bd85ad3bd 100644
--- a/scripts/builtin/outlierBySdApply.dml
+++ b/scripts/builtin/outlierBySdApply.dml
@@ -26,18 +26,15 @@
 # X               Matrix X
 # colMean         Matrix X
 # k               a constant used to discern outliers k*IQR
-# isIterative     iterative repair or single repair
 # repairMethod    values: 0 = delete rows having outliers,
 #                    1 = replace outliers with zeros
 #                    2 = replace outliers as missing values
-# max_iterations  values: 0 = arbitrary number of iteraition until all outliers are removed,
-#                 n = any constant defined by user
 # verbose         flag specifying if logging information should be printed
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------------------------
-# Y     Matrix X with no outliers
+# Y               Matrix X with no outliers
 # ---------------------------------------------------------------------------------
 
 m_outlierBySdApply = function(Matrix[Double] X, Matrix[Double] colMean, Matrix[Double] colSD, Double k, Integer repairMethod) 
diff --git a/scripts/builtin/pageRank.dml b/scripts/builtin/pageRank.dml
index 11e3b74c161..e98516c0e53 100644
--- a/scripts/builtin/pageRank.dml
+++ b/scripts/builtin/pageRank.dml
@@ -29,7 +29,7 @@
 # e           additional customization, default vector of ones
 # u           personalization vector (number of nodes), default vector of ones
 # alpha       teleport probability
-# max_iter    maximum number of iterations
+# maxIter    maximum number of iterations
 # seed        seed for default rand initialization of page rank vector
 # ------------------------------------------------------------------------------
 #
@@ -40,7 +40,7 @@
 
 m_pageRank = function (Matrix[Double] G, Matrix[Double] p = as.matrix(1),
   Matrix[Double] e = as.matrix(1), Matrix[Double] u = as.matrix(1),
-  Double alpha = 0.85, Int max_iter = 20, Int seed = -1)
+  Double alpha = 0.85, Int maxIter = 20, Int seed = -1)
   return (Matrix[double] pprime)
 {
   # default vectorized if not passed
@@ -53,7 +53,7 @@ m_pageRank = function (Matrix[Double] G, Matrix[Double] p = as.matrix(1),
 
   # page rank computation via power iterations
   i = 0;
-  while( i < max_iter ) {
+  while( i < maxIter ) {
     p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p);
     i += 1;
   }
diff --git a/scripts/builtin/pnmf.dml b/scripts/builtin/pnmf.dml
index 721ab7232bf..431a6d70c60 100644
--- a/scripts/builtin/pnmf.dml
+++ b/scripts/builtin/pnmf.dml
@@ -29,33 +29,33 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------
-# X        Matrix of feature vectors.
-# rnk      Number of components into which matrix X is to be factored.
-# eps      Tolerance
-# maxi     Maximum number of conjugate gradient iterations.
-# verbose  If TRUE, 'iter' and 'obj' are printed.
+# X               Matrix of feature vectors.
+# rank            Number of components into which matrix X is to be factored.
+# tol             Tolerance
+# maxIter         Maximum number of conjugate gradient iterations.
+# verbose         If TRUE, 'iter' and 'obj' are printed.
 # ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ------------------------------------------------------------------------------------
-# W     List of pattern matrices, one for each repetition.
-# H     List of amplitude matrices, one for each repetition.
+# W               List of pattern matrices, one for each repetition.
+# H               List of amplitude matrices, one for each repetition.
 # ------------------------------------------------------------------------------------
 
-m_pnmf = function(Matrix[Double] X, Integer rnk, Double eps = 1e-8, Integer maxi = 10, Boolean verbose=TRUE) 
+m_pnmf = function(Matrix[Double] X, Integer rank, Double tol = 1e-8, Integer maxIter = 10, Boolean verbose=TRUE)
   return (Matrix[Double] W, Matrix[Double] H) 
 {
   #initialize W and H
-  W = rand(rows=nrow(X), cols=rnk, min=0, max=0.025);
-  H = rand(rows=rnk, cols=ncol(X), min=0, max=0.025);
+  W = rand(rows=nrow(X), cols=rank, min=0, max=0.025);
+  H = rand(rows=rank, cols=ncol(X), min=0, max=0.025);
 
   i = 0;
-  while(i < maxi) {
-    H = (H*(t(W)%*%(X/(W%*%H+eps)))) / t(colSums(W));
-    W = (W*((X/(W%*%H+eps))%*%t(H))) / t(rowSums(H));
+  while(i < maxIter) {
+    H = (H*(t(W)%*%(X/(W%*%H+tol)))) / t(colSums(W));
+    W = (W*((X/(W%*%H+tol))%*%t(H))) / t(rowSums(H));
     i = i + 1;
     if( verbose ) {
-      obj = sum(W%*%H) - sum(X*log(W%*%H+eps));
+      obj = sum(W%*%H) - sum(X*log(W%*%H+tol));
       print("iter=" + i + " obj=" + obj);
     }
   }
diff --git a/scripts/builtin/ppca.dml b/scripts/builtin/ppca.dml
index b209ba84419..1d25ebe008c 100644
--- a/scripts/builtin/ppca.dml
+++ b/scripts/builtin/ppca.dml
@@ -27,9 +27,9 @@
 # ----------------------------------------------------------------------------------------------
 # X          n x m input feature matrix
 # k          indicates dimension of the new vector space constructed from eigen vectors
-# maxi       maximum number of iterations until convergence
-# tolobj     objective function tolerance value to stop ppca algorithm
-# tolrecerr  reconstruction error tolerance value to stop the algorithm
+# maxIter    maximum number of iterations until convergence
+# tol        objective function tolerance value to stop ppca algorithm
+# tolRecon   reconstruction error tolerance value to stop the algorithm
 # verbose    verbose debug output
 # ----------------------------------------------------------------------------------------------
 #
@@ -39,8 +39,8 @@
 # Mout  Output dominant eigen vectors (can be used for projections)
 # -------------------------------------------------------------------------------------------------
 
-m_ppca = function(Matrix[Double] X, Integer K=2, Integer maxi = 10, 
-  Double tolobj = 0.00001, Double tolrecerr = 0.02, Boolean verbose = TRUE)
+m_ppca = function(Matrix[Double] X, Integer K=2, Integer maxIter = 10,
+  Double tol = 0.00001, Double tolRecon = 0.02, Boolean verbose = TRUE)
   return(Matrix[Double] Xout, Matrix[Double] Mout)
 {
   n = nrow(X);
@@ -57,13 +57,13 @@ m_ppca = function(Matrix[Double] X, Integer K=2, Integer maxi = 10,
     PC = C;
 
     # initilizing reconstruction error
-    RE = tolrecerr+1;
+    RE = tolRecon+1;
     REBest = RE;
 
     Z = matrix(0,rows=1,cols=1);
 
     #Objective function value
-    ObjRelChng = tolobj+1;
+    ObjRelChng = tol+1;
 
     # mean centered input matrix - dim -> [n,m]
     Xm = X - colMeans(X);
@@ -73,7 +73,7 @@ m_ppca = function(Matrix[Double] X, Integer K=2, Integer maxi = 10,
     I = diag(ITMP);
 
     i = 0;
-    while (i < maxi & ObjRelChng > tolobj & RE > tolrecerr){
+    while (i < maxIter & ObjRelChng > tol & RE > tolRecon){
       #Estimation step - Covariance matrix
       #M -> k x k
       M = t(C) %*% C + I*ss;
diff --git a/scripts/builtin/quantizeByCluster.dml b/scripts/builtin/quantizeByCluster.dml
index 824ac350534..42de8d9ac6b 100644
--- a/scripts/builtin/quantizeByCluster.dml
+++ b/scripts/builtin/quantizeByCluster.dml
@@ -32,13 +32,13 @@
 # M                             Number of subspaces
 # k                             Number of vectors in the subcodebooks
 # runs                          Number of runs (with different initial centroids)
-# max_iter                      Maximum number of iterations per run
-# eps                           Tolerance (epsilon) for WCSS change ratio
-# avg_sample_size_per_centroid  Average number of records per centroid in data samples
+# maxIter                       Maximum number of iterations per run
+# tol                           Tolerance (epsilon) for WCSS change ratio
+# avgSampleSizePerCentroid      Average number of records per centroid in data samples
 # separate                      Cluster subspaces separately. If value is set to true,
 #                               kmeans is run M times, once for each subspace. Otherwise
 #                               kmeans is run only once.
-# space_decomp                  Decompose the vector space by multiplying the input
+# spaceDecomp                   Decompose the vector space by multiplying the input
 #                               matrix X with an orthogonal matrix R. Assumes the data
 #                               follows a parametric Gaussian distribution.
 #                               Time complexity in O(nrow(X)^2 * min(nrow(X), ncol(X))).
@@ -55,11 +55,12 @@
 #           onto a vector of codes. The entries in the codes matrix are the indices of
 #           the vectors in the codebook. The codes matrix has the dimensions [nrow(X) x M].
 # R         The orthogonal matrix R which is applied to the input matrix X before performing
-#           the product quantization. Only relevant when space_decomp = TRUE.
+#           the product quantization. Only relevant when spaceDecomp = TRUE.
 # ------------------------------------------------------------------------------------------
 
-m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, Integer runs = 10,
-    Integer max_iter = 1000, Double eps = 1e-6, Integer avg_sample_size_per_centroid = 50, Boolean separate=TRUE, Boolean space_decomp=FALSE, Integer seed = -1)
+m_quantizeByCluster = function(Matrix[Double] X, Integer M = 4, Integer k = 10, Integer runs = 10,
+    Integer maxIter = 1000, Double tol = 1e-6, Integer avgSampleSizePerCentroid = 50, Boolean separate=TRUE,
+    Boolean spaceDecomp=FALSE, Integer seed = -1)
   return(Matrix[Double] codebook, Matrix[Double] codes, Matrix[Double] R)
 {
   #Pad the data with zeros if the number of columns of the input matrix X is not divisible by M
@@ -69,8 +70,9 @@ m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, I
   }
   subvector_size = ncol(X) / M
   #Transform the vector space by an orthogonal matrix R.
-  #R is computed by reordering the principal directions of the input matrix X such that the variance of each subspace is balanced.
-  if(space_decomp) {
+  #R is computed by reordering the principal directions of the input matrix X such that the variance of each subspace
+  #is balanced.
+  if(spaceDecomp) {
     #Perform PCA using SVD
     X2 = X - colMeans(X)
     [U, S, V] = svd(X2)
@@ -97,7 +99,7 @@ m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, I
   #The ith entry of a code vector has a value in [1, k*M].
   if(!separate) {
     A = matrix(X, rows= nrow(X) * M, cols=subvector_size)
-    [codebook, B] = kmeans(A, k * M, runs, max_iter, eps, FALSE, avg_sample_size_per_centroid, seed)
+    [codebook, B] = kmeans(A, k * M, runs, maxIter, tol, FALSE, avgSampleSizePerCentroid, seed)
     codes = matrix(B, rows = nrow(B) / M, cols = ncol(B) * M)
   }
   #Kmeans is run for every subspace separately. Subvectors are mapped to a subset of k vectors of the codebook.
@@ -106,7 +108,7 @@ m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, I
     codebook = matrix(1, rows=k*M, cols=subvector_size)
     codes = matrix(1, rows=nrow(X), cols=M)
     for(i in 1:M, check=0) {
-      [tmp_cbook, tmp_c] = kmeans(X[,(i-1)*subvector_size+1:i*subvector_size], k, runs, max_iter, eps, FALSE, avg_sample_size_per_centroid, seed)
+      [tmp_cbook, tmp_c] = kmeans(X[,(i-1)*subvector_size+1:i*subvector_size], k, runs, maxIter, tol, FALSE, avgSampleSizePerCentroid, seed)
       #If no output is produced, use a single centroid
       if(as.scalar(tmp_c[1,1]) < 1) {
         tmp_cbook = matrix(0, rows=k, cols=subvector_size)
diff --git a/scripts/builtin/randomForest.dml b/scripts/builtin/randomForest.dml
index 8daeb5bc7f0..20376375b6b 100644
--- a/scripts/builtin/randomForest.dml
+++ b/scripts/builtin/randomForest.dml
@@ -41,7 +41,7 @@
 #    [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
 #    |(L1)| |  (L2)   | |        (L3)         |
 #
-#   With feature sampling (feature_frac < 1), each tree is
+#   With feature sampling (featureFrac < 1), each tree is
 #   prefixed by a one-hot vector of sampled features
 #   (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
 #
@@ -52,16 +52,16 @@
 # y               Label matrix in recoded/binned representation
 # ctypes          Row-Vector of column types [1 scale/ordinal, 2 categorical]
 #                 of shape 1-by-(ncol(X)+1), where the last entry is the y type
-# num_trees       Number of trees to be learned in the random forest model
-# sample_frac     Sample fraction of examples for each tree in the forest
-# feature_frac    Sample fraction of features for each tree in the forest
-# max_depth       Maximum depth of the learned tree (stopping criterion)
-# min_leaf        Minimum number of samples in leaf nodes (stopping criterion)
-# min_split       Minimum number of samples in leaf for attempting a split
-# max_features    Parameter controlling the number of features used as split
-#                 candidates at tree nodes: m = ceil(num_features^max_features)
-# max_values      Parameter controlling the number of values per feature used
-#                 as split candidates: nb = ceil(num_values^max_values)
+# numTrees        Number of trees to be learned in the random forest model
+# sampleFrac      Sample fraction of examples for each tree in the forest
+# featureFrac     Sample fraction of features for each tree in the forest
+# maxDepth        Maximum depth of the learned tree (stopping criterion)
+# minLeaf         Minimum number of samples in leaf nodes (stopping criterion)
+# minSplit        Minimum number of samples in leaf for attempting a split
+# maxFeatures     Parameter controlling the number of features used as split
+#                 candidates at tree nodes: m = ceil(numFeatures^maxFeatures)
+# maxValues       Parameter controlling the number of values per feature used
+#                 as split candidates: nb = ceil(num_values^maxValues)
 # impurity        Impurity measure: entropy, gini (default), rss (regression)
 # seed            Fixed seed for randomization of samples and split candidates
 # verbose         Flag indicating verbose debug output
@@ -73,9 +73,9 @@
 # ------------------------------------------------------------------------------
 
 m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] ctypes,
-    Int num_trees = 16, Double sample_frac = 0.1, Double feature_frac = 1.0,
-    Int max_depth = 10, Int min_leaf = 20, Int min_split = 50,
-    Double max_features = 0.5, Double max_values = 1.0,
+    Int numTrees = 16, Double sampleFrac = 0.1, Double featureFrac = 1.0,
+    Int maxDepth = 10, Int minLeaf = 20, Int minSplit = 50,
+    Double maxFeatures = 0.5, Double maxValues = 1.0,
     String impurity = "gini", Int seed = -1, Boolean verbose = FALSE)
   return(Matrix[Double] M)
 {
@@ -83,8 +83,8 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
 
   # validation and initialization of reproducible seeds
   if(verbose) {
-    print("randomForest: initialize with num_trees=" + num_trees + ", sample_frac=" + sample_frac
-      + ", feature_frac=" + feature_frac + ", impurity=" + impurity + ", seed=" + seed + ".");
+    print("randomForest: initialize with numTrees=" + numTrees + ", sampleFrac=" + sampleFrac
+      + ", featureFrac=" + featureFrac + ", impurity=" + impurity + ", seed=" + seed + ".");
   }
   if(ncol(ctypes) != ncol(X)+1)
     stop("randomForest: inconsistent num features (incl. label) and col types: "+ncol(X)+" vs "+ncol(ctypes)+".");
@@ -96,20 +96,20 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
     stop("randomForest: y contains only one class label.");
 
   lseed = as.integer(ifelse(seed!=-1, seed, as.scalar(rand(rows=1,cols=1,min=0, max=1e9))));
-  randSeeds = rand(rows = 3 * num_trees, cols = 1, seed=lseed, min=0, max=1e9);
+  randSeeds = rand(rows = 3 * numTrees, cols = 1, seed=lseed, min=0, max=1e9);
 
   # training of num_tree decision trees
-  M = matrix(0, rows=num_trees, cols=2*(2^max_depth-1));
-  F = matrix(1, rows=num_trees, cols=ncol(X));
-  parfor(i in 1:num_trees) {
+  M = matrix(0, rows=numTrees, cols=2*(2^maxDepth-1));
+  F = matrix(1, rows=numTrees, cols=ncol(X));
+  parfor(i in 1:numTrees) {
     if( verbose )
-      print("randomForest: start training tree "+i+"/"+num_trees+".");
+      print("randomForest: start training tree "+i+"/"+numTrees+".");
 
     # step 1: sample data
     Xi = X; yi = y;
-    if( sample_frac < 1.0 ) {
+    if( sampleFrac < 1.0 ) {
       si1 = as.integer(as.scalar(randSeeds[3*(i-1)+1,1]));
-      I1 = rand(rows=nrow(X), cols=1, seed=si1) <= sample_frac;
+      I1 = rand(rows=nrow(X), cols=1, seed=si1) <= sampleFrac;
       if( sum(I1) <= 1 ) # min 2 tuples
         I1[1:2,] = matrix(1,2,1);
       Xi = removeEmpty(target=X, margin="rows", select=I1);
@@ -117,9 +117,9 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
     }
 
     # step 2: sample features
-    if( feature_frac < 1.0 ) {
+    if( featureFrac < 1.0 ) {
       si2 = as.integer(as.scalar(randSeeds[3*(i-1)+2,1]));
-      I2 = rand(rows=ncol(X), cols=1, seed=si2) <= feature_frac;
+      I2 = rand(rows=ncol(X), cols=1, seed=si2) <= featureFrac;
       Xi = removeEmpty(target=Xi, margin="cols", select=I2);
       F[i,] = t(I2);
     }
@@ -130,8 +130,8 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
     # step 3: train decision tree
     t2 = time();
     si3 = as.integer(as.scalar(randSeeds[3*(i-1)+3,1]));
-    Mtemp = decisionTree(X=Xi, y=yi, ctypes=ctypes, max_depth=max_depth, min_split=min_split,
-      min_leaf=min_leaf, max_features=max_features, max_values=max_values,
+    Mtemp = decisionTree(X=Xi, y=yi, ctypes=ctypes, maxDepth=maxDepth, minSplit=minSplit,
+      minLeaf=minLeaf, maxFeatures=maxFeatures, maxValues=maxValues,
       impurity=impurity, seed=si3, verbose=verbose);
     M[i,1:length(Mtemp)] = matrix(Mtemp, rows=1, cols=length(Mtemp));
     if( verbose )
@@ -140,6 +140,6 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
   M = cbind(F, M);
 
   if(verbose) {
-    print("randomForest: trained ensemble with num_trees="+num_trees+" in "+(time()-t1)/1e9+" seconds.");
+    print("randomForest: trained ensemble with numTrees="+numTrees+" in "+(time()-t1)/1e9+" seconds.");
   }
 }
diff --git a/scripts/builtin/selectByVarThresh.dml b/scripts/builtin/selectByVarThresh.dml
index 2431db0228f..607fc35f254 100644
--- a/scripts/builtin/selectByVarThresh.dml
+++ b/scripts/builtin/selectByVarThresh.dml
@@ -24,7 +24,7 @@
 # INPUT:
 # ------------------------------------------------------------------------------------------
 # X           Matrix of feature vectors.
-# thresh      The threshold for to drop
+# threshold   The threshold for to drop
 # ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -32,10 +32,10 @@
 # Xp     Matrix of feature vectors with <= thresh variance.
 # -------------------------------------------------------------------------------------
 
-m_selectByVarThresh = function(Matrix[Double] X, Double thresh = 0)
+m_selectByVarThresh = function(Matrix[Double] X, Double threshold = 0)
   return (Matrix[Double] Xp, Matrix[Double] I)
 {
   # drop feature with <= thresh variance, by default drop constants
-  I = (colVars(X) > thresh);
+  I = (colVars(X) > threshold);
   Xp = removeEmpty(target=X, margin="cols", select=I);
 }
diff --git a/scripts/builtin/shapExplainer.dml b/scripts/builtin/shapExplainer.dml
index b78a5dbcefb..4d14d07007e 100644
--- a/scripts/builtin/shapExplainer.dml
+++ b/scripts/builtin/shapExplainer.dml
@@ -30,17 +30,17 @@
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------
-# model_function  The function of the model to be evaluated as a String. This function has to take a matrix of samples
+# model           The function of the model to be evaluated as a String. This function has to take a matrix of samples
 #                 and return a vector of predictions.
 #                 It might be usefull to wrap the model into a function the takes and returns the desired shapes and
 #                 use this wrapper here.
-# model_args      Arguments in order for the model, if desired. This will be prepended by the created instances-matrix.
-# x_instances     Multiple instances as rows for which to compute the shapley values.
-# X_bg            The background dataset from which to pull the random samples to perform Monte Carlo integration.
-# n_permutations  The number of permutaions. Defaults to 10. Theoretical 1 should already be enough for models with up
+# modelArgs       Arguments in order for the model, if desired. This will be prepended by the created instances-matrix.
+# X               Multiple instances as rows for which to compute the shapley values.
+# Xbg             The background dataset from which to pull the random samples to perform Monte Carlo integration.
+# nPermutations   The number of permutaions. Defaults to 10. Theoretical 1 should already be enough for models with up
 #                 to second order interaction effects.
-# n_samples       Number of samples from X_bg used for marginalization.
-# remove_non_var  EXPERIMENTAL: If set, for every instance the varaince of each feature is checked against this feature in the
+# nSamples        Number of samples from Xbg used for marginalization.
+# removeNonVar    EXPERIMENTAL: If set, for every instance the varaince of each feature is checked against this feature in the
 #                 background data. If it does not change, we do not run any model cals for it.
 # seed            A seed, in case the sampling has to be deterministic.
 # verbose         A boolean to enable logging of each step of the function.
@@ -51,75 +51,75 @@
 # S              Matrix holding the shapley values along the cols, one row per instance.
 # expected       Double holding the average prediction of all instances.
 # -----------------------------------------------------------------------------
-s_shapExplainer = function(String model_function, list[unknown] model_args, Matrix[Double] x_instances,
-    Matrix[Double] X_bg, Integer n_permutations = 10, Integer n_samples = 100, Integer remove_non_var=0,
+s_shapExplainer = function(String model, list[unknown] modelArgs, Matrix[Double] X,
+    Matrix[Double] Xbg, Integer nPermutations = 10, Integer nSamples = 100, Integer removeNonVar=0,
     Matrix[Double] partitions=as.matrix(-1), Integer seed = -1, Integer verbose = 0)
   return (Matrix[Double] row_phis, Double expected)
 {
-  u_printShapMessage("Parallel Permutation Explainer for "+nrow(x_instances)+" rows.", verbose)
-  u_printShapMessage("Number of Features: "+ncol(x_instances), verbose )
-  total_preds=ncol(x_instances)*2*n_permutations*n_samples*nrow(x_instances)
-  u_printShapMessage("Number of predictions: "+toString(total_preds)+" in "+nrow(x_instances)+
+  u_printShapMessage("Parallel Permutation Explainer for "+nrow(X)+" rows.", verbose)
+  u_printShapMessage("Number of Features: "+ncol(X), verbose )
+  total_preds=ncol(X)*2*nPermutations*nSamples*nrow(X)
+  u_printShapMessage("Number of predictions: "+toString(total_preds)+" in "+nrow(X)+
     " parallel cals.", verbose )
 
   #start with all features
-  features=u_range(1, ncol(x_instances))
+  features=u_range(1, ncol(X))
 
   #handle partitions
   if(sum(partitions) != -1){
-    if(remove_non_var != 0){
+    if(removeNonVar != 0){
       stop("shapley_permutations_by_row:ERROR: Can't use n_non_varying_inds and partitions at the same time.")
     }
     features=removePartitionsFromFeatures(features, partitions)
-    reduced_total_preds=ncol(features)*2*n_permutations*n_samples*nrow(x_instances)
+    reduced_total_preds=ncol(features)*2*nPermutations*nSamples*nrow(X)
     u_printShapMessage("Using Partitions reduces number of features to "+ncol(features)+".", verbose )
     u_printShapMessage("Total number of predictions reduced by "+(total_preds-reduced_total_preds)/total_preds+" to "+reduced_total_preds+".", verbose )
   }
 
   #lengths and offsets
-  total_features = ncol(x_instances)
+  total_features = ncol(X)
   perm_length = ncol(features)
-  full_mask_offset = perm_length * 2 * n_samples
+  full_mask_offset = perm_length * 2 * nSamples
   n_partition_features = total_features - perm_length
 
-  #sample from X_bg
-  u_printShapMessage("Sampling from X_bg", verbose )
-  # could use new samples for each permutation by sampling n_samples*n_permutations
-  X_bg_samples = u_sample_with_potential_replace(X_bg=X_bg, samples=n_samples, seed=seed )
-  row_phis     = matrix(0, rows=nrow(x_instances), cols=total_features)
-  expected_m   = matrix(0, rows=nrow(x_instances), cols=1)
+  #sample from Xbg
+  u_printShapMessage("Sampling from Xbg", verbose )
+  # could use new samples for each permutation by sampling nSamples*nPermutations
+  X_bg_samples = u_sample_with_potential_replace(Xbg=Xbg, nSamples=nSamples, seed=seed )
+  row_phis     = matrix(0, rows=nrow(X), cols=total_features)
+  expected_m   = matrix(0, rows=nrow(X), cols=1)
 
   #prepare masks for all permutations, since it stays the same for every row
   u_printShapMessage("Preparing reusable intermediate masks.", verbose )
-  permutations = matrix(0, rows=n_permutations, cols=perm_length)
-  masks_for_permutations = matrix(0, rows=perm_length*2*n_permutations*n_samples, cols=total_features)
+  permutations = matrix(0, rows=nPermutations, cols=perm_length)
+  masks_for_permutations = matrix(0, rows=perm_length*2*nPermutations*nSamples, cols=total_features)
 
-  parfor (i in 1:n_permutations, check=0){
+  parfor (i in 1:nPermutations, check=0){
     #shuffle features to get permutation
     permutations[i] = t(u_shuffle(t(features)))
     perm_mask = prepare_mask_for_permutation(permutation=permutations[i], partitions=partitions)
 
     offset_masks = (i-1) * full_mask_offset + 1
-    masks_for_permutations[offset_masks:offset_masks+full_mask_offset-1]=prepare_full_mask(perm_mask, n_samples)
+    masks_for_permutations[offset_masks:offset_masks+full_mask_offset-1]=prepare_full_mask(perm_mask, nSamples)
   }
 
   #replicate background and mask it, since it also can stay the same for every row
-  # could use new samples for each permutation by sampling n_samples*n_permutations and telling this function about it
+  # could use new samples for each permutation by sampling nSamples*nPermutations and telling this function about it
   masked_bg_for_permutations = prepare_masked_X_bg(masks_for_permutations, X_bg_samples, 0)
   u_printShapMessage("Computing phis in parallel.", verbose )
 
   #enable spark execution for parfor if desired
   #TODO allow spark mode via parameter?
-  #parfor (i in 1:nrow(x_instances), opt=CONSTRAINED, mode=REMOTE_SPARK){
+  #parfor (i in 1:nrow(X), opt=CONSTRAINED, mode=REMOTE_SPARK){
 
-  parfor (i in 1:nrow(x_instances)){
-    if(remove_non_var == 1){
+  parfor (i in 1:nrow(X)){
+    if(removeNonVar == 1){
       # try to remove inds that do not vary from the background
-      non_var_inds = get_non_varying_inds(x_instances[i], X_bg_samples)
+      non_var_inds = get_non_varying_inds(X[i], X_bg_samples)
       # only remove if more than 2 features remain, less then two breaks removal procedure
-      if (ncol(x_instances) > length(non_var_inds)+2){
+      if (ncol(X) > length(non_var_inds)+2){
         #remove samples and masks for non varying features
-        [i_masks_for_permutations, i_masked_bg_for_permutations] = remove_inds(masks_for_permutations, masked_bg_for_permutations, permutations, non_var_inds, n_samples)
+        [i_masks_for_permutations, i_masked_bg_for_permutations] = remove_inds(masks_for_permutations, masked_bg_for_permutations, permutations, non_var_inds, nSamples)
       }else{
         # we would remove all but two features, whichs breaks the removal algorithm
         non_var_inds = as.matrix(-1)
@@ -133,16 +133,16 @@ s_shapExplainer = function(String model_function, list[unknown] model_args, Matr
     }
 
     #apply masks and bg data for all permutations at once
-    X_test = apply_full_mask(x_instances[i], i_masks_for_permutations, i_masked_bg_for_permutations)
+    X_test = apply_full_mask(X[i], i_masks_for_permutations, i_masked_bg_for_permutations)
 
     #generate args for call to model
-    X_arg = append(list(X=X_test), model_args)
+    X_arg = append(list(X=X_test), modelArgs)
 
     #call model
-    P = eval(model_function, X_arg)
+    P = eval(model, X_arg)
 
-    #compute means, deviding n_rows by n_samples
-    P = compute_means_from_predictions(P=P, n_samples=n_samples)
+    #compute means, deviding n_rows by nSamples
+    P = compute_means_from_predictions(P=P, n_samples=nSamples)
 
     #compute phis
     [phis, e] = compute_phis_from_prediction_means(P=P, permutations=permutations, non_var_inds=non_var_inds, n_partition_features=n_partition_features)
@@ -162,21 +162,21 @@ s_shapExplainer = function(String model_function, list[unknown] model_args, Matr
 # INPUT:
 # -----------------------------------------------------------------------------
 # x     One single instance.
-# X_bg  Background dataset.
+# Xbg  Background dataset.
 # -----------------------------------------------------------------------------
 # OUTPUT:
 # -----------------------------------------------------------------------------
 # non_varying_inds A row-vector with all the indices that do not vary from the background dataset.
 # -----------------------------------------------------------------------------
-get_non_varying_inds = function(Matrix[Double] x, Matrix[Double] X_bg)
+get_non_varying_inds = function(Matrix[Double] x, Matrix[Double] Xbg)
 return (Matrix[Double] non_varying_inds){
   #from numpy.isclose but adapted to fit MSE of shap, which is within the same scale
   rtol = 1e-04
   atol = 1e-05
 
   # compute distance metrics
-  diff = colMaxs(abs(X_bg -x))
-  rdist = atol + rtol * colMaxs(abs(X_bg))
+  diff = colMaxs(abs(Xbg -x))
+  rdist = atol + rtol * colMaxs(abs(Xbg))
 
   non_varying_inds = (diff <= rdist)
   # translate to indices
@@ -531,55 +531,55 @@ return (Matrix[Double] short_features){
 ########################
 
 
-# Samples from the background data X_bg.
+# Samples from the background data Xbg.
 # The function first uses all background samples without replacement, but if more samples are requested than
-# available in X_bg, it shuffles X_bg and pulls more samples from it, making it sampling with replacement.
+# available in Xbg, it shuffles Xbg and pulls more samples from it, making it sampling with replacement.
 # TODO: Might be replacable by other builtin for sampling in the future
 #
 # INPUT:
 # ---------------------------------------------------------------------------------------
-# X_bg            Matrix of background data
-# samples         Number of total samples
-# always_shuffle  Boolean to enable reshuffleing of X_bg, defaults to false.
-# seed            A seed for the shuffleing etc.
+# Xbg            Matrix of background data
+# nSamples        Number of total samples
+# shuffle         Boolean to enable reshuffling of Xbg, defaults to false.
+# seed            A seed for the shuffling etc.
 # ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------
-# X_sample        New Matrix containing #samples, from X_bg, potentially with replacement.
+# X_sample        New Matrix containing #samples, from Xbg, potentially with replacement.
 # -----------------------------------------------------------------------------
-u_sample_with_potential_replace = function(Matrix[Double] X_bg, Integer samples, Boolean always_shuffle = 0, Integer seed)
+u_sample_with_potential_replace = function(Matrix[Double] Xbg, Integer nSamples, Boolean shuffle = 0, Integer seed)
 return (Matrix[Double] X_sample){
-  number_of_bg_samples = nrow(X_bg)
+  number_of_bg_samples = nrow(Xbg)
 
   # expect to not use all from background and subsample from it
   num_of_full_X_bg = 0
-  num_of_remainder_samples = samples
+  num_of_remainder_samples = nSamples
 
   # shuffle background if desired
-  if(always_shuffle) {
-  X_bg = u_shuffle(X_bg)
+  if(shuffle) {
+  Xbg = u_shuffle(Xbg)
   }
 
   # list to store references to generated matrices so we can rbind them in one call
   samples_list = list()
 
   # in case we need more than in the background data, use it multiple times with replacement
-  if(samples >= number_of_bg_samples)  {
-    u_printShapMessage("WARN: More samples ("+toString(samples)+") are requested than available in the background dataset ("+toString(number_of_bg_samples)+"). Using replacement", 1)
+  if(nSamples >= number_of_bg_samples)  {
+    u_printShapMessage("WARN: More samples ("+toString(nSamples)+") are requested than available in the background dataset ("+toString(number_of_bg_samples)+"). Using replacement", 1)
 
     # get number of full sets of background by integer division
-    num_of_full_X_bg = samples %/% number_of_bg_samples
+    num_of_full_X_bg = nSamples %/% number_of_bg_samples
     # get remaining samples using modulo
-    num_of_remainder_samples = samples %% number_of_bg_samples
+    num_of_remainder_samples = nSamples %% number_of_bg_samples
 
     #use background data once
-    samples_list = append(samples_list, X_bg)
+    samples_list = append(samples_list, Xbg)
 
     if(num_of_full_X_bg > 1){
       # add shuffled versions of background data
       for (i in 1:num_of_full_X_bg-1){
-      samples_list = append(samples_list, u_shuffle(X_bg))
+      samples_list = append(samples_list, u_shuffle(Xbg))
       }
     }
   }
@@ -593,7 +593,7 @@ return (Matrix[Double] X_sample){
     R_cont = table(random_samples_indices, random_samples_indices, number_of_bg_samples, number_of_bg_samples)
 
     #pick samples by multiplication with contingency table of indices and removing empty rows
-    samples_list = append(samples_list, removeEmpty(target=t(t(X_bg) %*% R_cont), margin="rows"))
+    samples_list = append(samples_list, removeEmpty(target=t(t(Xbg) %*% R_cont), margin="rows"))
   }
 
 
@@ -658,17 +658,17 @@ return (Matrix[Double] range){
 # INPUT:
 # -----------------------------------------------------------------------------
 # M        Matrix where rows will be replicated.
-# n_times  Number of replications.
+# n        Number of replications.
 # -----------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------
 # M         Matrix of replicated rows.
 # -----------------------------------------------------------------------------
-u_repeatRows = function(Matrix[Double] M, Integer n_times)
+u_repeatRows = function(Matrix[Double] M, Integer n)
 return(Matrix[Double] M){
   #get indices for new rows (e.g. 1,1,1,2,2,2 for 2 rows, each replicated 3 times)
-  indices = ceil(seq(1,nrow(M)*n_times,1) / n_times)
+  indices = ceil(seq(1,nrow(M)*n,1) / n)
 
   #to one hot, so we get a replication matrix R
   R = toOneHot(indices, nrow(M))
@@ -691,36 +691,36 @@ return(Matrix[Double] M){
 # INPUT:
 # -----------------------------------------------------------------------------
 # M        Matrix where rows will be replicated.
-# n_times  Number of replications.
+# n        Number of replications.
 # -----------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------
 # M         Matrix of replicated rows.
 # -----------------------------------------------------------------------------
-u_repeatMatrix = function(Matrix[Double] M, Integer n_times)
+u_repeatMatrix = function(Matrix[Double] M, Integer n)
 return(Matrix[Double] M){
   n_rows=nrow(M)
   n_cols=ncol(M)
   #reshape to row vector
   M = matrix(M, rows=1, cols=length(M))
   #replicate via outer product
-  M = matrix(1, rows=n_times, cols=1) %*% M
+  M = matrix(1, rows=n, cols=1) %*% M
   #reshape to get matrix
-  M = matrix(M, rows=n_rows*n_times, cols=n_cols)
+  M = matrix(M, rows=n_rows*n, cols=n_cols)
 }
 
 # Like repeatMatrix(), but alows to define parts of matrix as blocks to replicate n-rows as a block.
-u_repeatMatrixBlocks = function(Matrix[Double] M, Integer rows_per_block, Integer n_times)
+u_repeatMatrixBlocks = function(Matrix[Double] M, Integer rowsPerBlock, Integer n)
 return(Matrix[Double] M){
   n_rows=nrow(M)
   n_cols=ncol(M)
   #reshape to row vector
-  M = matrix(M, rows=n_rows/rows_per_block, cols=n_cols*rows_per_block)
+  M = matrix(M, rows=n_rows/rowsPerBlock, cols=n_cols*rowsPerBlock)
   #repeat block rows
-  M = u_repeatRows(M, n_times)
+  M = u_repeatRows(M, n)
   #reshape to get matrix
-  M = matrix(M, rows=n_rows*n_times, cols=n_cols)
+  M = matrix(M, rows=n_rows*n, cols=n_cols)
 }
 
 #utility function to print with shap-explainer-tag
diff --git a/scripts/builtin/sherlock.dml b/scripts/builtin/sherlock.dml
index 01430b6843c..af3237d8cc7 100644
--- a/scripts/builtin/sherlock.dml
+++ b/scripts/builtin/sherlock.dml
@@ -30,8 +30,8 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------
-# X_train  matrix of feature vectors
-# y_train  matrix Y of class labels of semantic data type
+# X  matrix of feature vectors
+# Y  matrix Y of class labels of semantic data type
 # ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -50,7 +50,7 @@
 
 source("scripts/nn/examples/sherlockNet.dml") as sherlockNet
 
-m_sherlock = function(Matrix[Double] X_train, Matrix[Double] y_train)
+m_sherlock = function(Matrix[Double] X, Matrix[Double] Y)
       return (Matrix[Double] cW1, Matrix[Double] cb1,
             Matrix[Double] cW2, Matrix[Double] cb2,
             Matrix[Double] cW3, Matrix[Double] cb3,
@@ -66,22 +66,22 @@ m_sherlock = function(Matrix[Double] X_train, Matrix[Double] y_train)
             Matrix[Double] fW1, Matrix[Double] fb1,
             Matrix[Double] fW2, Matrix[Double] fb2,
             Matrix[Double] fW3, Matrix[Double] fb3) {
-  train_cols = ncol(X_train)
-  train_rows = nrow(X_train)
-  [cW1, cb1, cW2, cb2, cW3, cb3] = sherlockNet::train(X_train[1:train_rows, 224:1183], y_train, 300)
-  [wW1, wb1, wW2, wb2, wW3, wb3] = sherlockNet::train(cbind(X_train[1:train_rows, 13:212], X_train[1:train_rows, 1188]), y_train, 200)
-  [pW1, pb1, pW2, pb2, pW3, pb3] = sherlockNet::train(X_train[1:train_rows, 1189:1588], y_train, 400)
-  [sW1, sb1, sW2, sb2, sW3, sb3] = sherlockNet::train(cbind(X_train[1:train_rows, 1:12], X_train[1:train_rows, 213:223], X_train[1:train_rows, 1184:1187]), y_train, 80)
+  train_cols = ncol(X)
+  train_rows = nrow(X)
+  [cW1, cb1, cW2, cb2, cW3, cb3] = sherlockNet::train(X[1:train_rows, 224:1183], Y, 300)
+  [wW1, wb1, wW2, wb2, wW3, wb3] = sherlockNet::train(cbind(X[1:train_rows, 13:212], X[1:train_rows, 1188]), Y, 200)
+  [pW1, pb1, pW2, pb2, pW3, pb3] = sherlockNet::train(X[1:train_rows, 1189:1588], Y, 400)
+  [sW1, sb1, sW2, sb2, sW3, sb3] = sherlockNet::train(cbind(X[1:train_rows, 1:12], X[1:train_rows, 213:223], X[1:train_rows, 1184:1187]), Y, 80)
 
   # train features seperate
-  cprobs = sherlockNet::predict(X_train[1:train_rows, 224:1183], cW1, cb1, cW2, cb2, cW3, cb3)
-  wprobs = sherlockNet::predict(cbind(X_train[1:train_rows, 13:212], X_train[1:train_rows, 1188]), wW1,wb1, wW2, wb2, wW3, wb3)
-  pprobs = sherlockNet::predict(X_train[1:train_rows, 1189:1588], pW1, pb1, pW2, pb2, pW3, pb3)
-  sprobs = sherlockNet::predict(cbind(X_train[1:train_rows, 1:12], X_train[1:train_rows, 213:223], X_train[1:train_rows, 1184:1187]), sW1, sb1, sW2, sb2, sW3, sb3)
+  cprobs = sherlockNet::predict(X[1:train_rows, 224:1183], cW1, cb1, cW2, cb2, cW3, cb3)
+  wprobs = sherlockNet::predict(cbind(X[1:train_rows, 13:212], X[1:train_rows, 1188]), wW1,wb1, wW2, wb2, wW3, wb3)
+  pprobs = sherlockNet::predict(X[1:train_rows, 1189:1588], pW1, pb1, pW2, pb2, pW3, pb3)
+  sprobs = sherlockNet::predict(cbind(X[1:train_rows, 1:12], X[1:train_rows, 213:223], X[1:train_rows, 1184:1187]), sW1, sb1, sW2, sb2, sW3, sb3)
 
   #use trained features to train final net
   first_predictions = cbind(cprobs, wprobs, pprobs, sprobs)
-  [fW1, fb1, fW2, fb2, fW3, fb3] = sherlockNet::train(first_predictions, y_train, 500)
+  [fW1, fb1, fW2, fb2, fW3, fb3] = sherlockNet::train(first_predictions, Y, 500)
 }
 
 # ---------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/shortestPath.dml b/scripts/builtin/shortestPath.dml
index 3eecf45426c..5cf6db91cd4 100644
--- a/scripts/builtin/shortestPath.dml
+++ b/scripts/builtin/shortestPath.dml
@@ -32,7 +32,7 @@
 #             The values of G can be 0/1 (just specifying whether the nodes
 #             are connected or not) or integer values (representing the weight
 #             of the edges or the distances between nodes, 0 if not connected).
-# maxi        Integer max number of iterations accepted (0 for FALSE, i.e.
+# maxIter     Integer max number of iterations accepted (0 for FALSE, i.e.
 #             max number of iterations not defined)
 # sourceNode  node index to calculate the shortest paths to all other nodes.
 # verbose     flag for verbose debug output
@@ -47,7 +47,7 @@
 #        not connected.
 # --------------------------------------------------------------------------------------
 
-m_shortestPath = function(Matrix[Double] G, Integer maxi = 0, Integer sourceNode, Boolean verbose = FALSE) 
+m_shortestPath = function(Matrix[Double] G, Integer maxIter = 0, Integer sourceNode, Boolean verbose = FALSE)
   return (Matrix[Double] C) 
 {
   if(verbose)
@@ -66,7 +66,7 @@ m_shortestPath = function(Matrix[Double] G, Integer maxi = 0, Integer sourceNode
 
   iter = 1
   diff = Inf;
-  while( diff > 0 & (maxi==0 | iter<=maxi) ) {
+  while( diff > 0 & (maxIter==0 | iter<=maxIter) ) {
     # avoid densification of 'colMins(G + minDist)' via colMin-colMax transform
     # (we exploit here that ^x with x!=0 is treated as sparse-safe and otherwise
     # Inf or NaN and replaced with 0, which keeps large sparse graphs sparse) 
diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml
index 83c7c874b24..1971d56dcdd 100644
--- a/scripts/builtin/steplm.dml
+++ b/scripts/builtin/steplm.dml
@@ -26,46 +26,46 @@
 #
 # .. code-block:: 
 #
-#   return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-#           OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-#   icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-#   icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#   icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                          Col.2: betas for shifted/rescaled X and intercept
+#   return: Matrix of regression parameters (the betas) and its size depend on intercept input value:
+#                OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+#   intercept=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+#   intercept=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#   intercept=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                               Col.2: betas for shifted/rescaled X and intercept
 #
 # In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
 # name-value pair per each line, as follows:
 #
 # INPUT:
 # ------------------------------------------------------------------------------------------
-# X        Matrix X of feature vectors
-# Y        Single-column Matrix Y of response values
-# icpt     Intercept presence, shifting and rescaling the columns of X:
-#          0 = no intercept, no shifting, no rescaling;
-#          1 = add intercept, but neither shift nor rescale X;
-#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg      Regularization parameter, 0 for no penalty
-# tol      Tolerance threshold to train until achieved
-# maxi     Maximum iterations 0 means until tolerance is reached
-# verbose  Indicator for verbose debug output
+# X              Matrix X of feature vectors
+# Y              Single-column Matrix Y of response values
+# intercept      Intercept presence, shifting and rescaling the columns of X:
+#                0 = no intercept, no shifting, no rescaling;
+#                1 = add intercept, but neither shift nor rescale X;
+#                2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg            Regularization parameter, 0 for no penalty
+# tol            Tolerance threshold to train until achieved
+# maxIter        Maximum iterations 0 means until tolerance is reached
+# verbose        Indicator for verbose debug output
 # ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------------------------
-# B     Matrix of regression parameters (the betas) and its size depend on icpt input value.
-# S     Matrix of selected features ordered as computed by the algorithm.
+# B              Matrix of regression parameters (the betas) and its size depend on intercept input value.
+# S              Matrix of selected features ordered as computed by the algorithm.
 # -----------------------------------------------------------------------------------------------
 
-m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
-  Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
+m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer intercept = 0,
+  Double reg = 1e-7, Double tol = 1e-7, Integer maxIter = 0, Boolean verbose = TRUE)
   return(Matrix[Double] B, Matrix[Double] S)
 {
-  if( icpt!=0 & icpt!=1 & icpt!=2 )
-    stop("Invalid steplm invocation with icpt="+icpt+" (valid values: 0,1,2).");
+  if( intercept!=0 & intercept!=1 & intercept!=2 )
+    stop("Invalid steplm invocation with intercept="+intercept+" (valid values: 0,1,2).");
 
   # NOTE: currently only the forward selection strategy in supported:
   # start from one feature and iteratively add features until AIC improves
-  thr = 0.001;
+  threshold = 0.001;
 
   if(verbose)
     print("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
@@ -80,7 +80,7 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
   # X_global stores the best model found at each step
   X_global = matrix(0, n, 1);
 
-  if (icpt == 1 | icpt == 2) {
+  if (intercept == 1 | intercept == 2) {
     beta = mean(y);
     AIC_best_orig = 2 + n * log(sum((beta - y) ^ 2) / n);
   } else {
@@ -89,27 +89,27 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
   }
   if(verbose)
     print("Best AIC without any features: " + AIC_best_orig);
-  boa_ncol = ncol(X_orig) + as.integer(icpt!=0);
+  boa_ncol = ncol(X_orig) + as.integer(intercept!=0);
   beta_out_all = matrix(0, boa_ncol, m_orig);
 
   # First pass to examine single features
   AICs = matrix(0, 1, m_orig);
   parfor (i in 1:m_orig) {
-    [AIC_1, beta_out_i] = linear_regression(X_orig[, i], y, icpt, reg, tol, maxi, verbose);
+    [AIC_1, beta_out_i] = linear_regression(X_orig[, i], y, intercept, reg, tol, maxIter, verbose);
     AICs[1, i] = AIC_1;
     beta_out_all[1:nrow(beta_out_i), i] = beta_out_i;
   }
   AIC_best = min(min(AICs), AIC_best_orig);
-  AIC_check = checkAIC(AIC_best, AIC_best_orig, thr);
+  AIC_check = checkAIC(AIC_best, AIC_best_orig, threshold);
   column_best = ifelse(AIC_check, as.scalar(rowIndexMin(AICs)), 0);
 
   # beta best so far
   beta_best = beta_out_all[, column_best];
   if (column_best == 0) {
     if(verbose)
-      print("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
+      print("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (threshold * 100) + "% decrease in AIC!");
     B = matrix(0, m_orig, 1);
-    if (icpt != 0)
+    if (intercept != 0)
       B = rbind(B, as.matrix(beta));
     S = matrix(0, 1, 1);
   }
@@ -130,7 +130,7 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
         if (as.scalar(columns_fixed[1, i]) == 0) {
           # Construct the feature matrix
           Xi = cbind(X_global, X_orig[, i]);
-          [AIC_2, beta_out_i2] = linear_regression(Xi, y, icpt, reg, tol, maxi, verbose);
+          [AIC_2, beta_out_i2] = linear_regression(Xi, y, intercept, reg, tol, maxIter, verbose);
           AICs_2[1, i] = AIC_2;
           beta_out_all_2[1:nrow(beta_out_i2), i] = beta_out_i2;
         }
@@ -142,7 +142,7 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
       # Determine the best AIC
       AIC_best_orig = AIC_best;
       AIC_best = min(min(AICs_2), AIC_best_orig);
-      AIC_check = checkAIC(AIC_best, AIC_best_orig, thr);
+      AIC_check = checkAIC(AIC_best, AIC_best_orig, threshold);
       column_best = ifelse(AIC_check, as.scalar(rowIndexMin(AICs_2)), column_best);
 
       # have the best beta store in the matrix
@@ -169,24 +169,24 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
     # run linear regression with selected set of features
     if( verbose )
       print("Running linear regression with selected features...");
-    [AIC, beta_out] = linear_regression(X_global, y, icpt, reg, tol, maxi, verbose);
+    [AIC, beta_out] = linear_regression(X_global, y, intercept, reg, tol, maxIter, verbose);
     S = columns_fixed_ordered;
-    if (icpt != 0)
+    if (intercept != 0)
       S = cbind(S, matrix(boa_ncol, 1, 1))
     B = reorder_matrix(boa_ncol, beta_out, S);
   }
 }
 
 # Computes linear regression using lm and outputs AIC.
-linear_regression = function(Matrix[Double] X, Matrix[Double] y, Integer icpt,
-  Double reg, Double tol, Integer maxi, Boolean verbose)
+linear_regression = function(Matrix[Double] X, Matrix[Double] y, Integer intercept,
+  Double reg, Double tol, Integer maxIter, Boolean verbose)
   return(Double AIC, Matrix[Double] beta)
 {
   # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
-  beta = lm(X = X, y = y, icpt = icpt, reg=reg, tol=tol, maxi=maxi, verbose=FALSE);
+  beta = lm(X = X, y = y, intercept = intercept, reg=reg, tol=tol, maxIter=maxIter, verbose=FALSE);
 
   # PREPARE X for SCORING
-  if( icpt != 0 )
+  if( intercept != 0 )
     X = cbind(X, matrix(1,nrow(X),1))
 
   # COMPUTE AIC
diff --git a/scripts/builtin/tSNE.dml b/scripts/builtin/tSNE.dml
index e4af10fbd81..6248bc64f0c 100644
--- a/scripts/builtin/tSNE.dml
+++ b/scripts/builtin/tSNE.dml
@@ -36,37 +36,37 @@
 # -------------------------------------------------------------------------------------------
 # X              Data Matrix of shape
 #                (number of data points, input dimensionality)
-# reduced_dims   Output dimensionality
+# reducedDims    Output dimensionality
 # perplexity     Perplexity Parameter
 # lr             Learning rate
 # momentum       Momentum Parameter
-# max_iter       Number of iterations
+# maxIter        Number of iterations
 # tol            Tolerance for early stopping in gradient descent
 # seed           The seed used for initial values.
 #                If set to -1 random seeds are selected.
-# is_verbose     Print debug information
-# print_iter     Intervals of printing out the L1 norm values. Parameter not relevant if
-#                is_verbose = FALSE.
+# verbose        Print debug information
+# printIter      Intervals of printing out the L1 norm values. Parameter not relevant if
+#                verbose = FALSE.
 # -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------------
-# Y      Data Matrix of shape (number of data points, reduced_dims)
+# Y      Data Matrix of shape (number of data points, reducedDims)
 # -------------------------------------------------------------------------------------------
 
-m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity = 30,
-  Double lr = 300., Double momentum = 0.9, Integer max_iter = 1000, Double tol = 1e-5, 
-  Integer seed = -1, Boolean is_verbose = FALSE, Integer print_iter = 10)
+m_tSNE = function(Matrix[Double] X, Integer reducedDims = 2, Integer perplexity = 30,
+  Double lr = 300., Double momentum = 0.9, Integer maxIter = 1000, Double tol = 1e-5,
+  Integer seed = -1, Boolean verbose = FALSE, Integer printIter = 10)
   return(Matrix[Double] Y)
 {
-  d = reduced_dims
+  d = reducedDims
   n = nrow(X)
 
-  P = x2p(X, perplexity, is_verbose)
+  P = x2p(X, perplexity, verbose)
   P = P*4
   Y = rand(rows=n, cols=d, pdf="normal", seed=seed)
   dY = matrix(0, rows=n, cols=d)
-  C = matrix(0, rows=max_iter/100, cols=1)
+  C = matrix(0, rows=maxIter/100, cols=1)
   ZERODIAG = (diag(matrix(-1, rows=n, cols=1)) + 1)
 
   D = matrix(0, rows=n, cols=n)
@@ -74,7 +74,7 @@ m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity
   Q = matrix(0, rows=n, cols=n)
   W = matrix(0, rows=n, cols=n)
 
-  if(is_verbose)
+  if(verbose)
     print("starting loop....")
 
   itr = 1
@@ -93,7 +93,7 @@ m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity
   norm_initial = norm
   norm_target = norm_initial * tol
 
-  if(is_verbose){
+  if(verbose){
     print("L1 Norm initial : " + norm_initial)
     print("L1 Norm target  : " + norm_target)
   }
@@ -110,7 +110,7 @@ m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity
   itr = itr + 1
   # End of first iteration
 
-  while (itr <= max_iter & norm > norm_target) {
+  while (itr <= maxIter & norm > norm_target) {
     D = dist(Y)
     Z = 1/(D + 1)
     Z = Z * ZERODIAG
@@ -121,7 +121,7 @@ m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity
     dY = momentum*dY - lr*g
 
     norm = sum(dY^2)
-    if(is_verbose & itr %% print_iter == 0){
+    if(verbose & itr %% printIter == 0){
       print("Iteration: " + itr)
       print("L1 Norm: " + norm)
     }
@@ -139,15 +139,15 @@ m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity
   }
 }
 
-x2p = function(matrix[double] X, double perplexity, Boolean is_verbose = FALSE)
+x2p = function(matrix[double] X, double perplexity, Boolean verbose = FALSE)
 return(matrix[double] P)
 {
-  if(is_verbose)
+  if(verbose)
     print("x2p....")
   tol = 1.0e-5
   INF = 1.0e20
   n = nrow(X)
-  if(is_verbose)
+  if(verbose)
     print(n)
   D = dist(X)
 
@@ -183,6 +183,6 @@ return(matrix[double] P)
 
   P = P + t(P)
   P = P / sum(P)
-  if(is_verbose)
+  if(verbose)
     print("x2p finishing....")
 }
diff --git a/scripts/builtin/topk_cleaning.dml b/scripts/builtin/topk_cleaning.dml
index 6f946c7729c..3f81956d9ec 100644
--- a/scripts/builtin/topk_cleaning.dml
+++ b/scripts/builtin/topk_cleaning.dml
@@ -21,16 +21,52 @@
 
 # This function cleans top-K item (where K is given as input)for a given list of users.
 # metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
-
+#
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# dataTrain         Frame[Unknown]
+# dataTest          Frame[Unknown]
+# metaData          Frame[Unknown]
+# primitives        Frame[Unknown]
+# parameters        Frame[Unknown]
+# refSol            Frame[Unknown]
+# evaluationFunc    String
+# evalFunHp         Matrix[Double]
+# topK              Integer
+# resourceVal       Integer
+# maxIter           Integer
+# lq                Double
+# uq                Double
+# sample            Double
+# expectedIncrease  Double
+# seed              Integer
+# cv                Boolean
+# cvk               Integer
+# isLastLabel       Boolean
+# rowCount          Integer
+# correctTypos      Boolean
+# enablePruning     Boolean
+# ----------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ----------------------------------------------------------------------------------------
+# topKPipelines     ---
+# topKHyperParams   ---
+# topKScores        ---
+# dirtyScore        ---
+# evalFunHp         ---
+# applyFunc         ---
+# ----------------------------------------------------------------------------------------
 source("scripts/pipelines/scripts/utils.dml") as utils;
 source("scripts/pipelines/scripts/enumerateLogical.dml") as lg;
 source("scripts/builtin/bandit.dml") as bandit;
 
-f_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = as.frame("NULL"), Frame[Unknown] metaData = as.frame("NULL"), Frame[Unknown] primitives,
-  Frame[Unknown] parameters, Frame[String] refSol = as.frame("NaN"), String evaluationFunc, Matrix[Double] evalFunHp, Integer topK = 5, Integer resource_val = 20,
-  Integer max_iter = 10, Double lq = 0.1, Double uq=0.7, Double sample = 1.0, Double expectedIncrease=1.0, Integer seed = -1, Boolean cv=TRUE, Integer cvk = 2, 
-  Boolean isLastLabel = TRUE, Integer rowCount = 3700,
-  Boolean correctTypos=FALSE, Boolean enablePruning = FALSE)
+f_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = as.frame("NULL"),
+  Frame[Unknown] metaData = as.frame("NULL"), Frame[Unknown] primitives, Frame[Unknown] parameters,
+  Frame[String] refSol = as.frame("NaN"), String evaluationFunc, Matrix[Double] evalFunHp, Integer topK = 5,
+  Integer resourceVal = 20, Integer maxIter = 10, Double lq = 0.1, Double uq=0.7, Double sample = 1.0,
+  Double expectedIncrease=1.0, Integer seed = -1, Boolean cv=TRUE, Integer cvk = 2, Boolean isLastLabel = TRUE,
+  Integer rowCount = 3700, Boolean correctTypos=FALSE, Boolean enablePruning = FALSE)
   return (Frame[Unknown] topKPipelines, Matrix[Double] topKHyperParams, Matrix[Double] topKScores,
     Double dirtyScore, Matrix[Double] evalFunHp, Frame[Unknown] applyFunc)
 {
@@ -114,16 +150,16 @@ f_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
   print("-- Cleaning - Enum Logical Pipelines: ");
   print("---- Data Dimension before Cleaning: "+ nrow(eXtrain) + ", " + ncol(eXtrain));
   [bestLogical, bestHp, con, refChanges, acc] = lg::enumerateLogical(X=eXtrain, y=eYtrain, Xtest=eXtest, ytest=eYtest,
-  initial_population=logical, refSol=refSol, seed = seed,  max_iter=max_iter, metaList = metaList,
+  initial_population=logical, refSol=refSol, seed = seed,  max_iter=maxIter, metaList = metaList,
   evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, primitives=primitives, param=parameters,
   dirtyScore = (dirtyScore + expectedIncrease), cv=cv, cvk=cvk, verbose=TRUE, ctx=ctx)
   t6 = time(); print("---- finalized in: "+(t6-t5)/1e9+"s");
   topKPipelines = as.frame("NULL"); topKHyperParams = matrix(0,0,0); topKScores = matrix(0,0,0); applyFunc = as.frame("NULL")
   # write(acc, output+"/acc.csv", format="csv")
   # stop("end of enumlp")
-  [topKPipelines, topKHyperParams, topKScores, applyFunc] = bandit(X_train=eXtrain, Y_train=eYtrain, X_test=eXtest, Y_test=eYtest,  metaList=metaList,
+  [topKPipelines, topKHyperParams, topKScores, applyFunc] = bandit(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest,  metaList=metaList,
     evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, lp=bestLogical, lpHp=bestHp, primitives=primitives, param=parameters, baseLineScore=dirtyScore,
-    k=topK, R=resource_val, cv=cv, cvk=cvk, ref=refChanges, seed=seed, enablePruning = enablePruning, verbose=TRUE);  
+    k=topK, R=resourceVal, cv=cv, cvk=cvk, ref=refChanges, seed=seed, enablePruning = enablePruning, verbose=TRUE);
   t7 = time(); print("-- Cleaning - Enum Physical Pipelines: "+(t7-t6)/1e9+"s");
 }
 
diff --git a/scripts/builtin/univar.dml b/scripts/builtin/univar.dml
index 06ef013a94b..801ab889e6a 100644
--- a/scripts/builtin/univar.dml
+++ b/scripts/builtin/univar.dml
@@ -23,9 +23,9 @@
 #
 # INPUT:
 # ----------------------------------------------------------------------------------
-# X      Input matrix of the shape (N, D)
-# TYPES  Matrix of the shape (1, D) with features types:
-#        1 for scale, 2 for nominal, 3 for ordinal
+# X       Input matrix of the shape (N, D)
+# CTYPES  Row-Vector of the shape (1, D) of column types:
+#         1 for scale, 2 for nominal, 3 for ordinal
 # ----------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -33,10 +33,10 @@
 # univarStats  univariate statistics for all attributes
 # ----------------------------------------------------------------------------------------
 
-m_univar = function(Matrix[Double] X, Matrix[Double] types)
+m_univar = function(Matrix[Double] X, Matrix[Double] ctypes)
 return(Matrix[Double] univarStats)
 {
-  max_kind = max(types);
+  max_kind = max(ctypes);
   N = nrow(X);
   D = ncol(X);
 
@@ -45,12 +45,12 @@ return(Matrix[Double] univarStats)
   univarStats = matrix(0, rows=numBaseStats, cols=D);
 
   # Compute max domain size among all categorical attributes
-  maxDomain = as.integer(max((types > 1) * colMaxs(X)));
+  maxDomain = as.integer(max((ctypes > 1) * colMaxs(X)));
 
   parfor(i in 1:D, check=0) {
     F = X[,i];
 
-    type = as.scalar(types[1,i]);
+    type = as.scalar(ctypes[1,i]);
     minF = min(F);
     maxF = max(F);
 
diff --git a/scripts/builtin/winsorize.dml b/scripts/builtin/winsorize.dml
index 311f1e5bf37..50fd77bf4b7 100644
--- a/scripts/builtin/winsorize.dml
+++ b/scripts/builtin/winsorize.dml
@@ -35,14 +35,14 @@
 # -----------------------------------------------------------------------------------
 
 m_winsorize = function(Matrix[Double] X, Double ql = 0.05, Double qu = 0.95, Boolean verbose) 
-return (Matrix[Double] Y, Matrix[Double] qLower, Matrix[Double] qUpper) {
-  qLower = matrix(0, rows=1, cols=ncol(X))
-  qUpper = matrix(0, rows=1, cols=ncol(X))
+return (Matrix[Double] Y, Matrix[Double] QL, Matrix[Double] QU) {
+  QL = matrix(0, rows=1, cols=ncol(X))
+  QU = matrix(0, rows=1, cols=ncol(X))
   Xtemp = replace(target=X, pattern=NaN, replacement=0)
   parfor(i in 1:ncol(X), check=0) {
-    qLower[1,i] = quantile(Xtemp[,i], ql)
-    qUpper[1,i] = quantile(Xtemp[,i], qu)
+    QL[1,i] = quantile(Xtemp[,i], ql)
+    QU[1,i] = quantile(Xtemp[,i], qu)
   }
-  Y = winsorizeApply(X, qLower, qUpper)
+  Y = winsorizeApply(X, QL, QU)
 }
 
diff --git a/scripts/builtin/winsorizeApply.dml b/scripts/builtin/winsorizeApply.dml
index 527d103f51a..9699f9a2d12 100644
--- a/scripts/builtin/winsorizeApply.dml
+++ b/scripts/builtin/winsorizeApply.dml
@@ -25,8 +25,8 @@
 # INPUT:
 # --------------------------------------------------
 # X       Input feature matrix
-# qLower  row vector of upper bounds per column
-# qUpper  row vector of lower bounds per column 
+# QL      row vector of upper bounds per column
+# QU      row vector of lower bounds per column
 # --------------------------------------------------
 #
 # OUTPUT:
@@ -35,9 +35,9 @@
 # ------------------------------------------------
 
 
-m_winsorizeApply = function(Matrix[Double] X,  Matrix[Double] qLower, Matrix[Double] qUpper)
+m_winsorizeApply = function(Matrix[Double] X,  Matrix[Double] QL, Matrix[Double] QU)
   return (Matrix[Double] Y)
 {
   # replace values outside [ql,qu] w/ ql and qu respectively
-  Y = min(max(X, qLower), qUpper);
+  Y = min(max(X, QL), QU);
 }
diff --git a/scripts/builtin/xgboost.dml b/scripts/builtin/xgboost.dml
index ae6bcfcce51..b48e2cf6ac6 100644
--- a/scripts/builtin/xgboost.dml
+++ b/scripts/builtin/xgboost.dml
@@ -48,11 +48,11 @@
 #                - R[,2]: 2 (categorical feature)
 #                Feature 1 is a scalar feature and features 2 is a categorical feature
 #                If R is not provided by default all variables are assumed to be scale (1)
-# sml_type       Supervised machine learning type: 1 = Regression(default), 2 = Classification
-# num_trees      Number of trees to be created in the xgboost model
-# learning_rate  Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
-# max_depth      Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
-# lambda         L2 regularization term on weights. Increasing this value will make model more conservative and reduce amount of leaves of a tree
+# smlType        Supervised machine learning type: 1 = Regression(default), 2 = Classification
+# numTrees       Number of trees to be created in the xgboost model
+# lr             Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
+# maxDepth       Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
+# reg            L2 regularization term on weights (lambda). Increasing this value will make model more conservative and reduce amount of leaves of a tree
 # -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -61,8 +61,8 @@
 # -----------------------------------------------------------------------------------
 
 m_xgboost = function(Matrix[Double] X, Matrix[Double] y,
-  Matrix[Double] R = matrix(1,rows=1,cols=nrow(X)), Integer sml_type = 1, Integer num_trees = 7,
-  Double learning_rate = 0.3, Integer max_depth = 6, Double lambda = 0.0)
+  Matrix[Double] R = matrix(1,rows=1,cols=nrow(X)), Integer smlType = 1, Integer numTrees = 7,
+  Double lr = 0.3, Integer maxDepth = 6, Double reg = 0.0)
   return (Matrix[Double] M)
 {
   # test if input correct
@@ -79,19 +79,19 @@ m_xgboost = function(Matrix[Double] X, Matrix[Double] y,
   current_prediction = matrix(median(y), rows=nrow(y), cols=1)
 
   tree_id = 1
-  while(tree_id <= num_trees) {
-    if(sml_type == 1) # Regression
+  while(tree_id <= numTrees) {
+    if(smlType == 1) # Regression
     {
-      curr_M = buildOneTreeRegression(X, y, R, max_depth, current_prediction, tree_id, lambda)
+      curr_M = buildOneTreeRegression(X, y, R, maxDepth, current_prediction, tree_id, reg)
     }
     else # classification
     {
-      assert(sml_type == 2)
-      curr_M = buildOneTreeClassification(X, y, R, max_depth, current_prediction, tree_id, lambda)
+      assert(smlType == 2)
+      curr_M = buildOneTreeClassification(X, y, R, maxDepth, current_prediction, tree_id, reg)
     }
 
     # in current prediction all previous trees are considered, so we only add the current tree to calculate new predictions
-    current_prediction = calculateNewPredictions(X, sml_type,current_prediction, learning_rate, curr_M)
+    current_prediction = calculateNewPredictions(X, smlType,current_prediction, lr, curr_M)
 
     tree_id = tree_id + 1
     M = cbind(M, curr_M) # concat the new tree to the existing one (forest-ing)
@@ -101,18 +101,19 @@ m_xgboost = function(Matrix[Double] X, Matrix[Double] y,
 
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    X: nxn matrix, original input matrix
+# INUT:     smlType: Supervised machine learning type: 1 = Regression(default), 2 = Classification
 # INPUT:    current_prediction: nx1 vector of the current prediction for my target features y (1st run is init prediction)
-# INPUT:    learning_rate: set by user
+# INPUT:    lr: learning rate set by user
 # INPUT:    curr_M: The current M matrix with the current tree
 # OUTPUT:   new_prediction: x1 vector of new new_prediction for my target features y
-calculateNewPredictions = function(Matrix[Double] X, Integer sml_type, Matrix[Double] current_prediction,
-    Double learning_rate, Matrix[Double] curr_M)
+calculateNewPredictions = function(Matrix[Double] X, Integer smlType, Matrix[Double] current_prediction,
+    Double lr, Matrix[Double] curr_M)
     return (Matrix[Double] new_prediction)
 {
   new_prediction = matrix(0, rows=nrow(current_prediction), cols=1)
   start_node_current_tree = curr_M[,1]
 
-  if(sml_type == 1) # Regression
+  if(smlType == 1) # Regression
   {
     for(entry in 1:nrow(X)) # go though each entry in X and calculate the new prediction
     {
@@ -120,7 +121,7 @@ calculateNewPredictions = function(Matrix[Double] X, Integer sml_type, Matrix[Do
 
       output_value = getOutputValueForEntry(X[entry,], curr_M, start_node_current_tree)
       output_values = cbind(output_values, as.matrix(output_value))
-      new_prediction[entry,] = current_prediction[entry,] + learning_rate * sum(output_values)
+      new_prediction[entry,] = current_prediction[entry,] + lr * sum(output_values)
     }
   }
   else # Classification
@@ -135,7 +136,7 @@ calculateNewPredictions = function(Matrix[Double] X, Integer sml_type, Matrix[Do
         log_odds = 0
       else
         log_odds = log(odds / (1 - odds))
-      x = (log_odds + learning_rate * sum(output_values))
+      x = (log_odds + lr * sum(output_values))
       e = 2.7182818284
       new_prediction[entry,] = e^x / (1 + e^x)
     }
@@ -197,16 +198,16 @@ getOutputValueForEntry = function(Matrix[Double] row_vector,
 # INPUT:    R The matrix R which for each feature in X contains the following information
 #           - R[,2]: 1 (scalar feature)
 #           - R[,1]: 2 (categorical feature)
-# INPUT:    max_depth: the max depth of a tree
+# INPUT:    maxDepth: the max depth of a tree
 # INPUT:    prediction: nx1 vector, my current predictions for my target value y
 # INPUT:    tree_id: The current tree id, starting at 1
-# INPUT:    lambda: the regularization parameter lambda
+# INPUT:    reg: the regularization parameter lambda
 # OUTPUT:   M: the current M matrix of this tree
-buildOneTreeRegression = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] R, Integer max_depth,
-  Matrix[Double] prediction, Double tree_id, Double lambda)
+buildOneTreeRegression = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] R, Integer maxDepth,
+  Matrix[Double] prediction, Double tree_id, Double reg)
   return (Matrix[Double] M)
 {
-  sml_type = 1 # regression
+  smlType = 1 # regression
 
   M = matrix(0,rows=6,cols=0)
   node_queue = matrix(1, rows=1, cols=1)     # Add first Node
@@ -235,15 +236,15 @@ buildOneTreeRegression = function(Matrix[Double] X, Matrix[Double] y, Matrix[Dou
     best_feature_index = 0.00
     done = FALSE
 
-    if(available_rows > 1 & max_depth > level & done == FALSE) # leaf check or max depth check
+    if(available_rows > 1 & maxDepth > level & done == FALSE) # leaf check or max depth check
     {
-      best_feature_index = findBestFeature(X=curr_X, y=curr_y, sml_type=sml_type)
+      best_feature_index = findBestFeature(X=curr_X, y=curr_y, smlType=smlType)
       type = getTypeOfFeature(R, best_feature_index)
 
       if(type == 1.0) # SCALAR
       {
-        similarity_score = calculateSimilarityScore(residual_matrix, lambda)
-        [best_split_threshold, best_gain] = findBestSplit(sml_type, curr_X[,best_feature_index], similarity_score, curr_prediction, lambda)
+        similarity_score = calculateSimilarityScore(residual_matrix, reg)
+        [best_split_threshold, best_gain] = findBestSplit(smlType, curr_X[,best_feature_index], similarity_score, curr_prediction, reg)
         has_child = best_gain > 0 # if the gain is < 0, the split is worse than the current node
       }
       else # CATEGORICAL
@@ -274,7 +275,7 @@ buildOneTreeRegression = function(Matrix[Double] X, Matrix[Double] y, Matrix[Dou
     }
     else # has no child => must be leaf
     {
-      output_value = calculateOutputValue(residual_matrix, lambda)
+      output_value = calculateOutputValue(residual_matrix, reg)
                                     # offset, best_feature_idx, threshold
       M = addOutputRow(M, node, tree_id, R, 0.0, 0.0, 0.0, output_value)
     }
@@ -289,16 +290,16 @@ buildOneTreeRegression = function(Matrix[Double] X, Matrix[Double] y, Matrix[Dou
 # INPUT:    R The matrix R which for each feature in X contains the following information
 #           - R[,2]: 1 (scalar feature)
 #           - R[,1]: 2 (categorical feature)
-# INPUT:    max_depth: the max depth of a tree
+# INPUT:    maxDepth: the max depth of a tree
 # INPUT:    prediction: nx1 vector, my current predictions for my target value y
 # INPUT:    tree_id: The current tree id, starting at 1
-# INPUT:    lambda: the regularization parameter lambda
+# INPUT:    reg: the regularization parameter lambda
 # OUTPUT:   M: the current M matrix of this tree
-buildOneTreeClassification = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] R, Integer max_depth,
-  Matrix[Double] prediction, Double tree_id, Double lambda)
+buildOneTreeClassification = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] R, Integer maxDepth,
+  Matrix[Double] prediction, Double tree_id, Double reg)
   return (Matrix[Double] M)
 {
-  sml_type = 2 # classification
+  smlType = 2 # classification
 
   M = matrix(0,rows=6,cols=0)
   node_queue = matrix(1, rows=1, cols=1)     # Add first Node
@@ -328,15 +329,15 @@ buildOneTreeClassification = function(Matrix[Double] X, Matrix[Double] y, Matrix
     count = sum(curr_y)
     done = (count == 0) | (count == nrow(curr_y));
 
-    if(available_rows > 1 & max_depth > level & !done) # leaf check or max depth check
+    if(available_rows > 1 & maxDepth > level & !done) # leaf check or max depth check
     {
-      best_feature_index = findBestFeature(X=curr_X, y=curr_y, sml_type=sml_type)
+      best_feature_index = findBestFeature(X=curr_X, y=curr_y, smlType=smlType)
       type = getTypeOfFeature(R, best_feature_index)
 
       if(type == 1.0) # SCALAR
       {
-        similarity_score = calculateSimilarityScoreClassification(residual_matrix, curr_prediction, lambda)
-        [best_split_threshold, best_gain] = findBestSplit(sml_type, curr_X[,best_feature_index], similarity_score, curr_prediction, lambda)
+        similarity_score = calculateSimilarityScoreClassification(residual_matrix, curr_prediction, reg)
+        [best_split_threshold, best_gain] = findBestSplit(smlType, curr_X[,best_feature_index], similarity_score, curr_prediction, reg)
         has_child = best_gain > 0 # if the gain is < 0, the split is worse than the current node
       }
       else # CATEGORICAL
@@ -368,7 +369,7 @@ buildOneTreeClassification = function(Matrix[Double] X, Matrix[Double] y, Matrix
     }
     else # has no child => must be leaf
     {
-      output_value = calculateOutputValueClassification(residual_matrix, curr_prediction, lambda)
+      output_value = calculateOutputValueClassification(residual_matrix, curr_prediction, reg)
                                     # offset, best_feature_idx, threshold
       M = addOutputRow(M, node, tree_id, R, 0.0, 0.0, 0.0, output_value)
     }
@@ -532,7 +533,7 @@ dataQueuePush = function(Matrix[Double] left, Matrix[Double] right, Matrix[Doubl
 # INPUT:    X: a nxn matrix (the current samples with all features we observe)
 # INPUT:    y: a 1xn matrix (the current y to all observed samples)
 # OUTPUT:   lowest_residuals_index: the feature index with the lowest residuals
-findBestFeature = function(Matrix[Double] X, Matrix[Double] y, Integer sml_type)
+findBestFeature = function(Matrix[Double] X, Matrix[Double] y, Integer smlType)
   return (Integer lowest_residuals_index)
 {
   lowest_residuals = 0
@@ -542,7 +543,7 @@ findBestFeature = function(Matrix[Double] X, Matrix[Double] y, Integer sml_type)
     current_feature = X[,i]
 
     # TODO investigate if glm is necessary here
-    if(sml_type == 1) # Regression
+    if(smlType == 1) # Regression
       weights = glm(X=current_feature, Y=y, dfam=1, verbose=FALSE)
     else # Classification
       weights = glm(X=current_feature, Y=y, dfam=2, verbose=FALSE)
@@ -561,7 +562,7 @@ findBestFeature = function(Matrix[Double] X, Matrix[Double] y, Integer sml_type)
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    one_featureX: a 1xn matrix (one feature with all values)
 # OUTPUT:   best_split: the best split (highest gain indicates best splitting of datasets)
-findBestSplit = function(Integer sml_type, Matrix[Double] one_featureX, Double sim_score_parent, Matrix[Double] predictions, Double lambda)
+findBestSplit = function(Integer smlType, Matrix[Double] one_featureX, Double sim_score_parent, Matrix[Double] predictions, Double reg)
   return (Double best_split, Double best_gain)
 {
   assert(ncol(one_featureX) == 1)
@@ -577,13 +578,13 @@ findBestSplit = function(Integer sml_type, Matrix[Double] one_featureX, Double s
     current_split = average(ordered_X[i,], ordered_X[i+1,])
     [left, right] = splitMatrixByValue(one_featureX, current_split)
 
-    if(sml_type == 1) { # Regression
-      sim_score_left = calculateSimilarityScore(left, lambda)
-      sim_score_right = calculateSimilarityScore(right, lambda)
+    if(smlType == 1) { # Regression
+      sim_score_left = calculateSimilarityScore(left, reg)
+      sim_score_right = calculateSimilarityScore(right, reg)
     }
     else { # Classification
-      sim_score_left = calculateSimilarityScoreClassification(left, predictions, lambda)
-      sim_score_right = calculateSimilarityScoreClassification(right, predictions, lambda)
+      sim_score_left = calculateSimilarityScoreClassification(left, predictions, reg)
+      sim_score_right = calculateSimilarityScoreClassification(right, predictions, reg)
     }
     current_gain = sim_score_left + sim_score_right - sim_score_parent
 
@@ -639,31 +640,31 @@ splitMatrixByCategory = function(Matrix[Double] curr_X, Matrix[Double] X)
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    row_vector: a 1xn matrix (one feature with all residuals)
 # OUTPUT:   similarity_score: the similarity score of the residuals
-calculateSimilarityScore = function (matrix[Double] row_vector, Double lambda)
+calculateSimilarityScore = function (matrix[Double] row_vector, Double reg)
   return (Double similarity_score)
 {
-  similarity_score = (sum(row_vector)^2) / (nrow(row_vector) + lambda);
+  similarity_score = (sum(row_vector)^2) / (nrow(row_vector) + reg);
 }
 
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    row_vector: a 1xn matrix (one feature with all residuals)
 # OUTPUT:   similarity_score: the similarity score of the residuals
-calculateSimilarityScoreClassification = function (matrix[Double] row_vector, matrix[Double] predictions, Double lambda)
+calculateSimilarityScoreClassification = function (matrix[Double] row_vector, matrix[Double] predictions, Double reg)
   return (Double similarity_score)
 {
   nominator = (sum(row_vector)^2)
   d =  predictions * (1 - predictions)
-  denominator = sum(d) + lambda
+  denominator = sum(d) + reg
   similarity_score = nominator / denominator;
 }
 
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    residuals_vector: a 1xn matrix (one feature with all residuals)
 # OUTPUT:   similarity_score: the similarity score of the residuals
-calculateOutputValue = function (matrix[Double] residuals_vector, Double lambda)
+calculateOutputValue = function (matrix[Double] residuals_vector, Double reg)
   return (Double output_value)
 {
-  output_value = (sum(residuals_vector)) / (nrow(residuals_vector) + lambda);
+  output_value = (sum(residuals_vector)) / (nrow(residuals_vector) + reg);
   if(output_value == 'NaN') # just in case we have a node with no sample inside
     output_value = 0.0
 }
@@ -671,12 +672,12 @@ calculateOutputValue = function (matrix[Double] residuals_vector, Double lambda)
 #-----------------------------------------------------------------------------------------------------------------------
 # INPUT:    residuals_vector: a 1xn matrix (one feature with all residuals)
 # OUTPUT:   similarity_score: the similarity score of the residuals
-calculateOutputValueClassification = function (matrix[Double] residuals_vector, matrix[Double] predictions, Double lambda)
+calculateOutputValueClassification = function (matrix[Double] residuals_vector, matrix[Double] predictions, Double reg)
   return (Double output_value)
 {
   nominator = (sum(residuals_vector))
   d =  predictions * (1 - predictions)
-  denominator = sum(d) + lambda
+  denominator = sum(d) + reg
   if(denominator == 0)
     output_value = 0
   else
diff --git a/scripts/builtin/xgboostPredictClassification.dml b/scripts/builtin/xgboostPredictClassification.dml
index 06aa23a57ac..a09f721b02f 100644
--- a/scripts/builtin/xgboostPredictClassification.dml
+++ b/scripts/builtin/xgboostPredictClassification.dml
@@ -26,15 +26,15 @@
 # -------------------------------------------------------------------------------------
 # X              Matrix of feature vectors we want to predict (X_test)
 # M              The model created at xgboost
-# learning_rate  The learning rate used in the model
+# lr             The learning rate used in the model
 # -------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -----------------------------------------------------------------------------
-# P     The predictions of the samples using the given xgboost model. (y_prediction)
+# P              The predictions of the samples using the given xgboost model. (y_prediction)
 # -----------------------------------------------------------------------------
 
-m_xgboostPredictClassification = function(Matrix[Double] X, Matrix[Double] M, Double learning_rate = 0.3)
+m_xgboostPredictClassification = function(Matrix[Double] X, Matrix[Double] M, Double lr = 0.3)
   return (Matrix[Double] P) {
 
   nr_trees = max(M[2,])
@@ -62,7 +62,7 @@ m_xgboostPredictClassification = function(Matrix[Double] X, Matrix[Double] M, Do
         log_odds = 0
       else
         log_odds = log(odds / (1 - odds))
-      x = (log_odds + learning_rate * sum(output_values))
+      x = (log_odds + lr * sum(output_values))
       e = 2.7182818284
 
       current_prediction[entry,] = e^x / (1 + e^x)
diff --git a/scripts/builtin/xgboostPredictRegression.dml b/scripts/builtin/xgboostPredictRegression.dml
index 6170cd7b94f..857b622dad6 100644
--- a/scripts/builtin/xgboostPredictRegression.dml
+++ b/scripts/builtin/xgboostPredictRegression.dml
@@ -26,7 +26,7 @@
 # ---------------------------------------------------------------------------------------
 # X               Matrix of feature vectors we want to predict (X_test)
 # M               The model created at xgboost
-# learning_rate   The learning rate used in the model
+# lr              The learning rate used in the model
 # ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -34,7 +34,7 @@
 # P     The predictions of the samples using the given xgboost model. (y_prediction)
 # -----------------------------------------------------------------------------
 
-m_xgboostPredictRegression = function(Matrix[Double] X, Matrix[Double] M, Double learning_rate = 0.3)
+m_xgboostPredictRegression = function(Matrix[Double] X, Matrix[Double] M, Double lr = 0.3)
   return (Matrix[Double] P)
 {
   nr_trees = max(M[2,])
@@ -56,7 +56,7 @@ m_xgboostPredictRegression = function(Matrix[Double] X, Matrix[Double] M, Double
       output_value = getOutputValueForEntryPredict(X[entry,], M[, begin_cur_tree:end_cur_tree])
       output_values = cbind(output_values, as.matrix(output_value))
     }
-    P[entry,] = initial_prediction + learning_rate * sum(output_values)
+    P[entry,] = initial_prediction + lr * sum(output_values)
   }
 }
 
diff --git a/scripts/perftest/scripts/GLM-predict.dml b/scripts/perftest/scripts/GLM-predict.dml
index 2064204382a..96cb4f7fb29 100644
--- a/scripts/perftest/scripts/GLM-predict.dml
+++ b/scripts/perftest/scripts/GLM-predict.dml
@@ -32,7 +32,7 @@ lpow = ifdef ($lpow, 1.0);  # $lpow = 1.0;
 disp = ifdef ($disp, 1.0);  # $disp = 1.0;
 
 
-[M] = glmPredict(X=X, B=B, ytest=Y,
+[M] = glmPredict(X=X, B=B, Ytest=Y,
   dfam=dfam, vpow=vpow, link=link, lpow=lpow, disp=disp);
 
 if( $M != " " )
diff --git a/scripts/perftest/scripts/GLM.dml b/scripts/perftest/scripts/GLM.dml
index 783f63c293f..a14c2e6d2e5 100755
--- a/scripts/perftest/scripts/GLM.dml
+++ b/scripts/perftest/scripts/GLM.dml
@@ -44,6 +44,7 @@ eps = as.double (eps);
 
 beta = glm(X=X,Y=Y, dfam=distribution_type, vpow=variance_as_power_of_the_mean,
         link=link_type, lpow=link_as_power_of_the_mean, yneg=bernoulli_No_label,
-        icpt=intercept_status, reg=regularization, tol=eps, moi=max_iteration_IRLS, mii=max_iteration_CG)
+        intercept=intercept_status, reg=regularization, tol=eps, maxIter=max_iteration_IRLS,
+        maxInnerIter=max_iteration_CG)
 
 write (beta, fileB, format=fmtB);
diff --git a/scripts/perftest/scripts/Kmeans.dml b/scripts/perftest/scripts/Kmeans.dml
index d81865957d5..52b8b72d6dd 100755
--- a/scripts/perftest/scripts/Kmeans.dml
+++ b/scripts/perftest/scripts/Kmeans.dml
@@ -25,7 +25,7 @@ num_centroids = $k;
 max_iter   = ifdef ($maxi, 1000);    # $maxi=1000;
 eps        = ifdef ($tol, 0.000001); # $tol=0.000001;
 
-[C, Y] = kmeans(X = X, k = num_centroids, max_iter = max_iter, eps = eps)
+[C, Y] = kmeans(X = X, k = num_centroids, maxIter = max_iter, tol = eps)
 
 write (C, fileC, format="text");
 write (Y, $prY, format="text");
diff --git a/scripts/perftest/scripts/LinearRegCG.dml b/scripts/perftest/scripts/LinearRegCG.dml
index 9f10918b690..d75f8ab7809 100755
--- a/scripts/perftest/scripts/LinearRegCG.dml
+++ b/scripts/perftest/scripts/LinearRegCG.dml
@@ -32,6 +32,6 @@ regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
 X = read (fileX);
 y = read (fileY);
 
-beta_out = lmCG(X = X, y = y, icpt = intercept_status, reg = regularization, tol = tolerance, maxi = max_iteration, verbose = TRUE)
+beta_out = lmCG(X = X, y = y, intercept = intercept_status, reg = regularization, tol = tolerance, maxIter = max_iteration, verbose = TRUE)
 
 write (beta_out, fileB, format=fmtB);
diff --git a/scripts/perftest/scripts/LinearRegDS.dml b/scripts/perftest/scripts/LinearRegDS.dml
index ad2c98cd75d..8a91f909d78 100755
--- a/scripts/perftest/scripts/LinearRegDS.dml
+++ b/scripts/perftest/scripts/LinearRegDS.dml
@@ -30,6 +30,6 @@ regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
 X = read (fileX);
 y = read (fileY);
 
-beta_out = lmDS(X = X, y = y, icpt = intercept_status, reg = regularization, verbose = TRUE)
+beta_out = lmDS(X = X, y = y, intercept = intercept_status, reg = regularization, verbose = TRUE)
 
 write (beta_out, fileB, format=fmtB);
diff --git a/scripts/perftest/scripts/MultiLogReg.dml b/scripts/perftest/scripts/MultiLogReg.dml
index 4e7ad46004b..0e533b8a2b1 100755
--- a/scripts/perftest/scripts/MultiLogReg.dml
+++ b/scripts/perftest/scripts/MultiLogReg.dml
@@ -34,5 +34,5 @@ tol = as.double ($tol);
 X = read (fileX);
 Y_vec = read (fileY);
 
-B_out = multiLogReg(X = X, Y = Y_vec, icpt = intercept_status, tol = tol, reg = regularization, maxi = maxiter, maxii = maxinneriter, verbose = TRUE)
+B_out = multiLogReg(X = X, Y = Y_vec, intercept = intercept_status, tol = tol, reg = regularization, maxIter = maxiter, maxInnerIter = maxinneriter, verbose = TRUE)
 write (B_out, fileB, format=fmtB);
diff --git a/scripts/perftest/scripts/alsCG.dml b/scripts/perftest/scripts/alsCG.dml
index 2e8115e5778..da9e84c9ddd 100644
--- a/scripts/perftest/scripts/alsCG.dml
+++ b/scripts/perftest/scripts/alsCG.dml
@@ -32,7 +32,7 @@ check = ifdef($check, TRUE);
 
 X = read($X);
 
-[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg, maxi=maxiter, check=check, thr=thr, verbose=verbose);
+[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg, maxIterations=maxiter, check=check, threshold=thr, verbose=verbose);
 
 write(U, $modelU, format=fmt);
 write(V, $modelV, format=fmt);
diff --git a/scripts/perftest/scripts/alsDS.dml b/scripts/perftest/scripts/alsDS.dml
index 6334cc1af4f..aa1b2ad10cc 100755
--- a/scripts/perftest/scripts/alsDS.dml
+++ b/scripts/perftest/scripts/alsDS.dml
@@ -31,7 +31,7 @@ check = ifdef($check, TRUE);
 
 X = read($X);
 
-[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxiter, check=check, thr=thr, verbose=verbose);
+[U, V] = alsDS(X=X, rank=rank, reg=reg, maxIterations=maxiter, check=check, threshold=thr, verbose=verbose);
 
 write(U, $modelU, format=fmt);
 write(V, $modelV, format=fmt);
diff --git a/scripts/perftest/scripts/m-svm.dml b/scripts/perftest/scripts/m-svm.dml
index e0fd125f159..98c86676186 100755
--- a/scripts/perftest/scripts/m-svm.dml
+++ b/scripts/perftest/scripts/m-svm.dml
@@ -29,7 +29,7 @@ tol = as.double ($tol);
 X = read($X)
 Y = read($Y)
 
-model = msvm(X = X, Y = Y, intercept = icpt, epsilon = tol, reg = reg, maxIterations = maxiter, verbose = FALSE)
+model = msvm(X = X, Y = Y, intercept = icpt, tol = tol, reg = reg, maxIter = maxiter, verbose = FALSE)
 
 extra_model_params = matrix(0, rows=2, cols=ncol(model))
 extra_model_params[1, 1] = icpt
diff --git a/scripts/perftest/todo/scripts/decision-tree.dml b/scripts/perftest/todo/scripts/decision-tree.dml
index d8875328718..107e5516d3a 100644
--- a/scripts/perftest/todo/scripts/decision-tree.dml
+++ b/scripts/perftest/todo/scripts/decision-tree.dml
@@ -79,7 +79,7 @@ fmtO = ifdef($fmt, "text");
 X = read($X);
 Y_bin = read($Y);
 R = matrix(1, rows=1, cols=ncol(X));
-
-M = decisionTree(X = X, Y = Y_bin, R = R, bins = num_bins, depth = depth);
+# TODO: deprecated decisionTree functon call
+M = decisionTree(X = X, Y = Y_bin, R = R, bins = num_bins, maxDepth = depth);
 
 write (M, fileM, format = fmtO);
diff --git a/scripts/perftest/todo/scripts/random-forest.dml b/scripts/perftest/todo/scripts/random-forest.dml
index c01ecd745e4..94bf9e12b90 100644
--- a/scripts/perftest/todo/scripts/random-forest.dml
+++ b/scripts/perftest/todo/scripts/random-forest.dml
@@ -85,6 +85,8 @@ X = read($X);
 Y_bin = read($Y);
 R = matrix(0, cols=0, rows=0);
 
+
+# TODO: deprecated randomForest function call
 [M, C, S_map, C_map] = randomForest(X = X, Y = Y_bin, R = R,
     bins = num_bins, depth = depth, num_leaf = num_leaf, num_samples = threshold,
     num_trees = num_trees, subsamp_rate = rate, feature_subset = fpow, impurity = imp);
diff --git a/scripts/pipelines/scripts/cleaning.dml b/scripts/pipelines/scripts/cleaning.dml
index 73200bdde85..5f505328605 100644
--- a/scripts/pipelines/scripts/cleaning.dml
+++ b/scripts/pipelines/scripts/cleaning.dml
@@ -104,7 +104,7 @@ startCleaning = function(Frame[Unknown] F, Frame[Unknown] logical, String target
 
 
 
-  [pip, hp, acc] = bandit(X_train=X_train, Y_train=y_train,  mask=getMask, MLhp=opt,
+  [pip, hp, acc] = bandit(X=X_train, Y=y_train,  mask=getMask, MLhp=opt,
     schema=getSchema, lp=logical, primitives=primitives, param=param, k=k, testAccuracy=d_accuracy,
     isWeighted=isWeighted, R=R, cv=cv, verbose=TRUE);
   # acc = as.matrix(1)
@@ -158,12 +158,12 @@ testBestPipeline = function(Frame[Unknown] pip, Matrix[Double] hp, Matrix[Double
   y_test_clean = Y_train[trRow+1:nrow(X_train), ]
 
   # classify after cleaning  
-  betas = multiLogReg(X=X_train_clean, Y=y_train_clean, icpt=1,
-    reg=as.scalar(MLhp[1,1]), tol= 1e-9, maxi=as.scalar(MLhp[1,2]), 
-    maxii= 50, verbose=FALSE);
+  betas = multiLogReg(X=X_train_clean, Y=y_train_clean, intercept=1,
+    reg=as.scalar(MLhp[1,1]), tol= 1e-9, maxIter=as.scalar(MLhp[1,2]),
+    maxInnerIter= 50, verbose=FALSE);
     
   [c_prob, c_yhat, c_accuracy] = multiLogRegPredict(X_test_clean, betas, y_test_clean, FALSE)
-    c_accuracy = getAccuracy(y_test_clean, c_yhat, isWeighted)
+    c_accuracy = getAccuracy(c_yhat, y_test_clean, isWeighted)
   [confusionCount_c, confusionAVG_c] = confusionMatrix(P=c_yhat, Y=y_test_clean)
   
   print("accuracy of dirty data  "+dirAcc)
@@ -338,7 +338,7 @@ return(Matrix[Double] sortedX, Matrix[Double] sortedY)
   # print('ncol in dx train '+ncol(dX_train))
   while(FALSE){}
   # learn model
-  B = multiLogReg(X=dX_train, Y=y, icpt=1, reg=0, tol=1e-6, maxi = 100, maxii= 0, verbose=FALSE);
+  B = multiLogReg(X=dX_train, Y=y, intercept=1, reg=0, tol=1e-6, maxIter = 100, maxInnerIter= 0, verbose=FALSE);
   [prob,yhat,accuracy] = multiLogRegPredict(X=dX_train, B=B, Y=y, verbose=FALSE);
   # print("the accuracy "+accuracy)
   
@@ -480,7 +480,7 @@ classifyDirty = function(Matrix[Double] Xtrain, Matrix[Double] ytrain, Matrix[Do
   accuracy = mean(accuracy)
 
   # # learn model
-  # B = multiLogReg(X=dX_train, Y=ytrain, icpt=2, reg=as.scalar(opt[1,1]), maxi = as.scalar(opt[1,2]), maxii= 0, verbose=FALSE);
+  # B = multiLogReg(X=dX_train, Y=ytrain, intercept=2, reg=as.scalar(opt[1,1]), maxIter = as.scalar(opt[1,2]), maxInnerIter= 0, verbose=FALSE);
   # [M,pred,accuracy] = multiLogRegPredict(X=dX_test, B=B, Y=ytest, verbose=FALSE);
 
   # if(isWeighted) 
@@ -536,8 +536,8 @@ return (Matrix[Double] accuracyMatrix)
       trainy = trainset[, 1]
       testX = testset[, 2:ncol(testset)]
       testy = testset[, 1]
-      beta = multiLogReg(X=trainX, Y=trainy, icpt=1, reg=as.scalar(MLhp[1,1]), tol= 1e-9, 
-      maxi=as.scalar(MLhp[1,2]), maxii= 50, verbose=FALSE);
+      beta = multiLogReg(X=trainX, Y=trainy, intercept=1, reg=as.scalar(MLhp[1,1]), tol= 1e-9,
+      maxIter=as.scalar(MLhp[1,2]), maxInnerIter= 50, verbose=FALSE);
       [prob, yhat, a] = multiLogRegPredict(testX, beta, testy, FALSE)
       accuracy = getAccuracy(testy, yhat, isWeighted)
       accuracyMatrix[i] = accuracy
diff --git a/scripts/pipelines/scripts/utils.dml b/scripts/pipelines/scripts/utils.dml
index 6577628486d..c38d9cf95cc 100644
--- a/scripts/pipelines/scripts/utils.dml
+++ b/scripts/pipelines/scripts/utils.dml
@@ -91,7 +91,7 @@ doErrorSample = function(Matrix[Double] eX, Matrix[Double] eY, Double lq, Double
     filterMask = matrix(1, rows=nrow(eY), cols=1)
   else {
     # # # prepare feature vector for NB
-    beta = multiLogReg(X=eX, Y=eY, icpt=1, reg=1e-3, tol=1e-6,  maxi=20, maxii=20, verbose=FALSE);
+    beta = multiLogReg(X=eX, Y=eY, intercept=1, reg=1e-3, tol=1e-6,  maxIter=20, maxInnerIter=20, verbose=FALSE);
     [trainProbs, yhat, accuracy] = multiLogRegPredict(eX, beta, eY, FALSE)
 
  
diff --git a/scripts/staging/bayesian_optimization/test/bayesianOptimizationMLTest.dml b/scripts/staging/bayesian_optimization/test/bayesianOptimizationMLTest.dml
index 268ef120fb2..002cfed9ce8 100644
--- a/scripts/staging/bayesian_optimization/test/bayesianOptimizationMLTest.dml
+++ b/scripts/staging/bayesian_optimization/test/bayesianOptimizationMLTest.dml
@@ -39,7 +39,7 @@ return (Double index)
   index = as.scalar(rowIndexMin(t(alphas)));
 }
 
-params = list("icpt", "reg", "tol", "maxi", "verbose");
+params = list("intercept", "reg", "tol", "maxIter", "verbose");
 paramValues = list(as.matrix(0), 10^seq(0,-3), 10^seq(-6,-10), 10^seq(3,6), as.matrix(1));
 
 N = 200;
@@ -73,10 +73,10 @@ opt = bayOpt::m_bayesianOptimization(
 B1 = lm(
   X=xTrain,
   y=yTrain,
-  icpt = as.scalar(opt[1,1]),
+  intercept = as.scalar(opt[1,1]),
   reg = as.scalar(opt[1,2]),
   tol = as.scalar(opt[1,3]),
-  maxi = as.scalar(opt[1,4]),
+  maxIter = as.scalar(opt[1,4]),
   verbose = FALSE
 );
 
diff --git a/scripts/staging/clusterAndClassify/clusteredClassification.dml b/scripts/staging/clusterAndClassify/clusteredClassification.dml
index 81a0d4805a0..7c40a418a33 100644
--- a/scripts/staging/clusterAndClassify/clusteredClassification.dml
+++ b/scripts/staging/clusterAndClassify/clusteredClassification.dml
@@ -47,7 +47,7 @@ X = scale(X=X)
 [Xtrain,Xtest,ytrain,ytest] = split(X=X,Y=y,f=0.7,cont=FALSE,seed=7)
 
 # learn baseline model
-B = multiLogReg(X=Xtrain, Y=ytrain, maxii=50, icpt=2, reg=0.001, verbose=FALSE);
+B = multiLogReg(X=Xtrain, Y=ytrain, maxInnerIter=50, intercept=2, reg=0.001, verbose=FALSE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtrain, B=B, Y=ytrain, verbose=TRUE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtest, B=B, Y=ytest, verbose=TRUE);
 
@@ -67,7 +67,7 @@ for(k in 2:16) {
     Xi = removeEmpty(target=Xtrain, margin="rows", select=I);
     yi = removeEmpty(target=ytrain, margin="rows", select=I);
     if( sum(I) > 15 & (max(yi)-min(yi)) > 0 ) {
-      Bi = multiLogReg(X=Xi, Y=yi, maxii=50, icpt=2, reg=0.001, verbose=FALSE);
+      Bi = multiLogReg(X=Xi, Y=yi, maxInnerIter=50, intercept=2, reg=0.001, verbose=FALSE);
       [Mi,yhati,acci] = multiLogRegPredict(X=Xi, B=Bi, Y=yi, verbose=FALSE);
       acctrain += acci*nrow(Xi); count = count+nrow(Xi);
       models = append(models, Bi);
diff --git a/scripts/staging/learnedSampling/1_Data_Model_Prep.dml b/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
index 99d5a98e826..77ebd3fc836 100644
--- a/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
+++ b/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
@@ -35,7 +35,7 @@ X = scale(X=X)
 [Xtrain,Xtest,ytrain,ytest] = split(X=X,Y=y,f=0.7,cont=FALSE,seed=7)
 
 # learn model
-B = multiLogReg(X=Xtrain, Y=ytrain, maxii=50, icpt=2, reg=0.001, verbose=TRUE);
+B = multiLogReg(X=Xtrain, Y=ytrain, maxInnerIter=50, intercept=2, reg=0.001, verbose=TRUE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtrain, B=B, Y=ytrain, verbose=TRUE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtest, B=B, Y=ytest, verbose=TRUE);
 
diff --git a/scripts/staging/learnedSampling/2_Baseline_Sampling.dml b/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
index 9e5aa5a50cf..3891519b513 100644
--- a/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
+++ b/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
@@ -40,7 +40,7 @@ for(i in 1:nrow(sf)) {
   sfi = as.scalar(sf[i]);
   print("Baseline uniform sampling with f="+sfi+", nrow="+round(nrow(X)*sfi))
   [Xtrain2,Xtest2,ytrain2,ytest2] = split(X=X,Y=y,f=sfi,cont=FALSE,seed=7)
-  B = multiLogReg(X=Xtrain2, Y=ytrain2, maxii=50, icpt=2, reg=0.001, verbose=FALSE);
+  B = multiLogReg(X=Xtrain2, Y=ytrain2, maxInnerIter=50, intercept=2, reg=0.001, verbose=FALSE);
 
   [M,yhat,acc] = multiLogRegPredict(X=Xtrain2, B=B, Y=ytrain2, verbose=TRUE);
   [M,yhat,acc] = multiLogRegPredict(X=Xtest, B=B, Y=ytest, verbose=TRUE);
diff --git a/scripts/staging/learnedSampling/4_CombinedTraining.dml b/scripts/staging/learnedSampling/4_CombinedTraining.dml
index 1abbbafa6eb..96db0a9ad66 100644
--- a/scripts/staging/learnedSampling/4_CombinedTraining.dml
+++ b/scripts/staging/learnedSampling/4_CombinedTraining.dml
@@ -67,7 +67,7 @@ ytrain2 = removeEmpty(target=y, margin="rows", select=(S>reg_thr));
 
 print("Model training on sample "+nrow(Xtrain2))
 print(" -- with labels 1: "+sum(ytrain2==1)+", 2: "+sum(ytrain2==2))
-B = multiLogReg(X=Xtrain2, Y=ytrain2, maxii=50, icpt=0, reg=0.001, verbose=TRUE);
+B = multiLogReg(X=Xtrain2, Y=ytrain2, maxInnerIter=50, intercept=0, reg=0.001, verbose=TRUE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtrain2, B=B, Y=ytrain2, verbose=TRUE);
 [M,yhat,acc] = multiLogRegPredict(X=Xtest, B=B, Y=ytest, verbose=TRUE);
 
diff --git a/scripts/tutorials/federated/code/exp/adult.dml b/scripts/tutorials/federated/code/exp/adult.dml
index aa4084aa258..57eb0b8b9c8 100644
--- a/scripts/tutorials/federated/code/exp/adult.dml
+++ b/scripts/tutorials/federated/code/exp/adult.dml
@@ -38,7 +38,7 @@ jspec1 = read("code/exp/adult_spec1.json", data_type="scalar", value_type="strin
 # model training
 y = X0[,ncol(X0)]
 X = X0[,2:(ncol(X0)-1)]
-B = multiLogReg(X=X, Y=y, icpt=2, verbose=TRUE)
+B = multiLogReg(X=X, Y=y, intercept=2, verbose=TRUE)
 
 # Predicting
 [m, pred, acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=TRUE)
diff --git a/scripts/tutorials/federated/code/exp/criteo.dml b/scripts/tutorials/federated/code/exp/criteo.dml
index a79edba4a21..24c7c7f3120 100644
--- a/scripts/tutorials/federated/code/exp/criteo.dml
+++ b/scripts/tutorials/federated/code/exp/criteo.dml
@@ -38,7 +38,7 @@ jspec1 = read("code/exp/criteo_spec1.json", data_type="scalar", value_type="stri
 
 y = X0[,1] + 1; 
 X = X0[,2:ncol(X0)]
-B = multiLogReg(X=X, Y=y, icpt=2, maxi=30, verbose=TRUE);
+B = multiLogReg(X=X, Y=y, intercept=2, maxIter=30, verbose=TRUE);
 
 ## done
 [m, pred, acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=TRUE)
diff --git a/scripts/tutorials/federated/code/exp/lm.dml b/scripts/tutorials/federated/code/exp/lm.dml
index 050fced4fff..a9b8ea082db 100644
--- a/scripts/tutorials/federated/code/exp/lm.dml
+++ b/scripts/tutorials/federated/code/exp/lm.dml
@@ -22,6 +22,6 @@
 X = read($1)
 Y = read($2)
 
-m = lm(X=X, y=Y, reg= 1e-16, tol=1e-16, maxi= 100, verbose=$3)
+m = lm(X=X, y=Y, reg= 1e-16, tol=1e-16, maxIter= 100, verbose=$3)
 
 print(toString(m))
diff --git a/scripts/tutorials/federated/code/exp/mLogReg.dml b/scripts/tutorials/federated/code/exp/mLogReg.dml
index c7d20b8f961..55f7f363f7b 100644
--- a/scripts/tutorials/federated/code/exp/mLogReg.dml
+++ b/scripts/tutorials/federated/code/exp/mLogReg.dml
@@ -24,7 +24,7 @@ Y = read($2) + 1
 Xt = read($3)
 Yt = read($4) + 1
 
-beta = multiLogReg(X=X, Y=Y, verbose=$5, maxi=10)
+beta = multiLogReg(X=X, Y=Y, verbose=$5, maxIter=10)
 
 [m, pred, acc] = multiLogRegPredict(X=Xt, B=beta, Y=Yt)
 
diff --git a/src/main/python/docs/source/code/guide/algorithms/FullScript.py b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
index e8cd82cc1f6..6ac9c994b14 100644
--- a/src/main/python/docs/source/code/guide/algorithms/FullScript.py
+++ b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
@@ -35,7 +35,7 @@
     # Train Data
     X_ds = sds.from_numpy(X)
     Y_ds = sds.from_numpy(Y) + 1.0
-    bias = multiLogReg(X_ds, Y_ds, maxi=30, verbose=False)
+    bias = multiLogReg(X_ds, Y_ds, maxIter=30, verbose=False)
     # Test data
     Xt_ds = sds.from_numpy(Xt)
     Yt_ds = sds.from_numpy(Yt) + 1.0
diff --git a/src/main/python/generator/dml_parser.py b/src/main/python/generator/dml_parser.py
index 2abffb021f6..edce6a6a76a 100644
--- a/src/main/python/generator/dml_parser.py
+++ b/src/main/python/generator/dml_parser.py
@@ -28,7 +28,7 @@
 class FunctionParser(object):
     header_input_pattern = r"^[ \t\n]*[#]+[ \t\n]*input[ \t\n\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\n\-]*[\s#\-]*$"
     header_output_pattern = r"[\s#\-]*[#]+[ \t]*(return|output)[ \t\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\-]*[\s#\-]*$"
-    function_pattern = r"^[ms]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*"
+    function_pattern = r"^[msf]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*"
     # parameter_pattern = r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*(?=return)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*"
     header_parameter_pattern = r"[\s#\-]*[#]+[ \t]*([\w|-]+)[\s]+([\w]+)[\s]+([\w,\d.\"\-]+)[\s]+([\w|\W]+)"
     divider_pattern = r"[\s#\-]*"
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py
index bd611ee6cc6..caa20b15a23 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -30,8 +30,9 @@
 from .builtin.alsDS import alsDS 
 from .builtin.alsPredict import alsPredict 
 from .builtin.alsTopkPredict import alsTopkPredict 
-from .builtin.ampute import ampute 
-from .builtin.arima import arima 
+from .builtin.ampute import ampute
+from .builtin.apply_pipeline import apply_pipeline
+from .builtin.arima import arima
 from .builtin.auc import auc 
 from .builtin.autoencoder_2layer import autoencoder_2layer 
 from .builtin.bandit import bandit 
@@ -39,7 +40,9 @@
 from .builtin.components import components 
 from .builtin.confusionMatrix import confusionMatrix 
 from .builtin.cor import cor 
-from .builtin.cov import cov 
+from .builtin.correctTypos import correctTypos
+from .builtin.correctTyposApply import correctTyposApply
+from .builtin.cov import cov
 from .builtin.cox import cox 
 from .builtin.cspline import cspline 
 from .builtin.csplineCG import csplineCG 
@@ -50,16 +53,23 @@
 from .builtin.decisionTree import decisionTree 
 from .builtin.decisionTreePredict import decisionTreePredict 
 from .builtin.deepWalk import deepWalk 
-from .builtin.differenceStatistics import differenceStatistics 
+from .builtin.denialConstraints import denialConstraints
+from .builtin.differenceStatistics import differenceStatistics
 from .builtin.discoverFD import discoverFD 
 from .builtin.dist import dist 
-from .builtin.executePipeline import executePipeline 
+from .builtin.dmv import dmv
+from .builtin.ema import ema
+from .builtin.executePipeline import executePipeline
 from .builtin.f1Score import f1Score 
 from .builtin.fdr import fdr 
 from .builtin.ffPredict import ffPredict 
 from .builtin.ffTrain import ffTrain 
-from .builtin.flattenQuantile import flattenQuantile 
-from .builtin.frequencyEncode import frequencyEncode 
+from .builtin.fit_pipeline import fit_pipeline
+from .builtin.fixInvalidLengths import fixInvalidLengths
+from .builtin.fixInvalidLengthsApply import fixInvalidLengthsApply
+from .builtin.flattenQuantile import flattenQuantile
+from .builtin.frameSort import frameSort
+from .builtin.frequencyEncode import frequencyEncode
 from .builtin.frequencyEncodeApply import frequencyEncodeApply 
 from .builtin.garch import garch 
 from .builtin.gaussianClassifier import gaussianClassifier 
@@ -97,7 +107,8 @@
 from .builtin.impurityMeasures import impurityMeasures 
 from .builtin.imputeByFD import imputeByFD 
 from .builtin.imputeByFDApply import imputeByFDApply 
-from .builtin.imputeByMean import imputeByMean 
+from .builtin.imputeByKNN import imputeByKNN
+from .builtin.imputeByMean import imputeByMean
 from .builtin.imputeByMeanApply import imputeByMeanApply 
 from .builtin.imputeByMedian import imputeByMedian 
 from .builtin.imputeByMedianApply import imputeByMedianApply 
@@ -126,7 +137,8 @@
 from .builtin.mape import mape 
 from .builtin.matrixProfile import matrixProfile 
 from .builtin.mcc import mcc 
-from .builtin.mice import mice 
+from .builtin.mdedup import mdedup
+from .builtin.mice import mice
 from .builtin.miceApply import miceApply 
 from .builtin.mse import mse 
 from .builtin.msmape import msmape 
@@ -153,7 +165,8 @@
 from .builtin.pnmf import pnmf 
 from .builtin.ppca import ppca 
 from .builtin.psnr import psnr 
-from .builtin.raGroupby import raGroupby 
+from .builtin.quantizeByCluster import quantizeByCluster
+from .builtin.raGroupby import raGroupby
 from .builtin.raJoin import raJoin 
 from .builtin.raSelection import raSelection 
 from .builtin.randomForest import randomForest 
@@ -163,25 +176,26 @@
 from .builtin.scaleApply import scaleApply 
 from .builtin.scaleMinMax import scaleMinMax 
 from .builtin.selectByVarThresh import selectByVarThresh 
-from .builtin.ses import ses 
-from .builtin.setdiff import setdiff 
-from .builtin.sherlock import sherlock 
+from .builtin.ses import ses
+from .builtin.setdiff import setdiff
+from .builtin.shapExplainer import shapExplainer
+from .builtin.sherlock import sherlock
 from .builtin.sherlockPredict import sherlockPredict 
 from .builtin.shortestPath import shortestPath 
 from .builtin.sigmoid import sigmoid 
 from .builtin.skewness import skewness 
 from .builtin.sliceLine import sliceLine 
 from .builtin.sliceLineDebug import sliceLineDebug 
-from .builtin.sliceLineExtract import sliceLineExtract 
-from .builtin.slicefinder import slicefinder 
+from .builtin.sliceLineExtract import sliceLineExtract
+from .builtin.slicefinder import slicefinder
 from .builtin.smape import smape 
 from .builtin.smote import smote 
 from .builtin.softmax import softmax 
 from .builtin.solve import solve 
 from .builtin.split import split 
 from .builtin.splitBalanced import splitBalanced 
-from .builtin.sqrtMatrix import sqrtMatrix 
-from .builtin.stableMarriage import stableMarriage 
+from .builtin.sqrtMatrix import sqrtMatrix
+from .builtin.stableMarriage import stableMarriage
 from .builtin.statsNA import statsNA 
 from .builtin.steplm import steplm 
 from .builtin.stratstats import stratstats 
@@ -189,11 +203,13 @@
 from .builtin.tSNE import tSNE 
 from .builtin.toOneHot import toOneHot 
 from .builtin.tomeklink import tomeklink 
-from .builtin.underSampling import underSampling 
+from .builtin.topk_cleaning import topk_cleaning
+from .builtin.underSampling import underSampling
 from .builtin.union import union 
 from .builtin.univar import univar 
 from .builtin.vectorToCsv import vectorToCsv 
-from .builtin.winsorize import winsorize 
+from .builtin.wer import wer
+from .builtin.winsorize import winsorize
 from .builtin.winsorizeApply import winsorizeApply 
 from .builtin.xdummy1 import xdummy1 
 from .builtin.xdummy2 import xdummy2 
@@ -211,6 +227,7 @@
  'alsPredict',
  'alsTopkPredict',
  'ampute',
+ 'apply_pipeline',
  'arima',
  'auc',
  'autoencoder_2layer',
@@ -219,6 +236,8 @@
  'components',
  'confusionMatrix',
  'cor',
+ 'correctTypos',
+ 'correctTyposApply',
  'cov',
  'cox',
  'cspline',
@@ -230,15 +249,22 @@
  'decisionTree',
  'decisionTreePredict',
  'deepWalk',
+ 'denialConstraints',
  'differenceStatistics',
  'discoverFD',
  'dist',
+ 'dmv',
+ 'ema',
  'executePipeline',
  'f1Score',
  'fdr',
  'ffPredict',
  'ffTrain',
+ 'fit_pipeline',
+ 'fixInvalidLengths',
+ 'fixInvalidLengthsApply',
  'flattenQuantile',
+ 'frameSort',
  'frequencyEncode',
  'frequencyEncodeApply',
  'garch',
@@ -277,6 +303,7 @@
  'impurityMeasures',
  'imputeByFD',
  'imputeByFDApply',
+ 'imputeByKNN',
  'imputeByMean',
  'imputeByMeanApply',
  'imputeByMedian',
@@ -306,6 +333,7 @@
  'mape',
  'matrixProfile',
  'mcc',
+ 'mdedup',
  'mice',
  'miceApply',
  'mse',
@@ -333,6 +361,7 @@
  'pnmf',
  'ppca',
  'psnr',
+ 'quantizeByCluster',
  'raGroupby',
  'raJoin',
  'raSelection',
@@ -345,6 +374,7 @@
  'selectByVarThresh',
  'ses',
  'setdiff',
+ 'shapExplainer',
  'sherlock',
  'sherlockPredict',
  'shortestPath',
@@ -369,10 +399,12 @@
  'tSNE',
  'toOneHot',
  'tomeklink',
+ 'topk_cleaning',
  'underSampling',
  'union',
  'univar',
  'vectorToCsv',
+ 'wer',
  'winsorize',
  'winsorizeApply',
  'xdummy1',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/adasyn.py b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
index a45c3c96257..ee506c53830 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
@@ -41,7 +41,7 @@ def adasyn(X: Matrix,
     :param Y: Class labels [shape: n-by-1]
     :param k: Number of nearest neighbors
     :param beta: Desired balance level after generation of synthetic data [0, 1]
-    :param dth: Distribution threshold
+    :param threshold: Distribution threshold
     :param seed: Seed for randomized data point selection
     :return: Feature matrix of n original rows followed by G = (ml-ms)*beta synthetic rows
     :return: Class labels aligned with output X
diff --git a/src/main/python/systemds/operator/algorithm/builtin/als.py b/src/main/python/systemds/operator/algorithm/builtin/als.py
index 50781c97f78..1de8e475144 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/als.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/als.py
@@ -48,9 +48,9 @@ def als(X: Matrix,
         + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
         + sum (V ^ 2 * col_nonzeros))
     :param reg: Regularization parameter, no regularization if 0.0
-    :param maxi: Maximum number of iterations
+    :param maxIter: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating U and V once
-    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
+    :param threshold: Assuming check is set to TRUE, the algorithm stops and convergence is declared
         if the decrease in loss in any two consecutive iterations falls below this threshold;
         if check is FALSE thr is ignored
     :param seed: The seed to random parts of the algorithm
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
index 73d8aadcda1..ec0d8e316ba 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
@@ -48,11 +48,11 @@ def alsCG(X: Matrix,
         + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
         + sum (V ^ 2 * col_nonzeros))
     :param reg: Regularization parameter, no regularization if 0.0
-    :param maxi: Maximum number of iterations
+    :param maxIter: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating U and V once
-    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
+    :param threshold: Assuming check is set to TRUE, the algorithm stops and convergence is declared
         if the decrease in loss in any two consecutive iterations falls below this threshold;
-        if check is FALSE thr is ignored
+        if check is FALSE threshold is ignored
     :param seed: The seed to random parts of the algorithm
     :param verbose: If the algorithm should run verbosely
     :return: An m x r matrix where r is the factorization rank
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
index 907787373b4..d040c97bb1d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
@@ -41,11 +41,11 @@ def alsDS(X: Matrix,
     :param X: Location to read the input matrix V to be factorized
     :param rank: Rank of the factorization
     :param reg: Regularization parameter, no regularization if 0.0
-    :param maxi: Maximum number of iterations
+    :param maxIter: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating L and R once
-    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
+    :param threshold: Assuming check is set to TRUE, the algorithm stops and convergence is declared
         if the decrease in loss in any two consecutive iterations falls below this threshold;
-        if check is FALSE thr is ignored
+        if check is FALSE threshold is ignored
     :param seed: The seed to random parts of the algorithm
     :param verbose: If the algorithm should run verbosely
     :return: An m x r matrix where r is the factorization rank
diff --git a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
index be1100b4127..63ffc3f66b3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/arima.py b/src/main/python/systemds/operator/algorithm/builtin/arima.py
index c412407e0c9..74fc673fb32 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/arima.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/arima.py
@@ -36,7 +36,7 @@ def arima(X: Matrix,
     
     
     :param X: The input Matrix to apply Arima on.
-    :param max_func_invoc: ---
+    :param maxIter: max_func_invoc
     :param p: non-seasonal AR order
     :param d: non-seasonal differencing order
     :param q: non-seasonal MA order
@@ -44,7 +44,7 @@ def arima(X: Matrix,
     :param D: seasonal differencing order
     :param Q: seasonal MA order
     :param s: period in terms of number of time-steps
-    :param include_mean: center to mean 0, and include in result
+    :param includeMean: center to mean 0, and include in result
     :param solver: solver, is either "cg" or "jacobi"
     :return: The calculated coefficients
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
index 73f2ca9f81c..a0f315ccf4d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
@@ -29,9 +29,9 @@
 
 
 def autoencoder_2layer(X: Matrix,
-                       num_hidden1: int,
-                       num_hidden2: int,
-                       max_epochs: int,
+                       numHidden1: int,
+                       numHidden2: int,
+                       epochs: int,
                        **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Trains a 2-layer autoencoder with minibatch SGD and step-size decay.
@@ -45,20 +45,21 @@ def autoencoder_2layer(X: Matrix,
     
     
     :param X: Filename where the input is stored
-    :param num_hidden1: Number of neurons in the 1st hidden layer
-    :param num_hidden2: Number of neurons in the 2nd hidden layer
-    :param max_epochs: Number of epochs to train for
-    :param full_obj: If TRUE, Computes objective function value (squared-loss)
+    :param numHidden1: Number of neurons in the 1st hidden layer
+    :param numHidden2: Number of neurons in the 2nd hidden layer
+    :param epochs: Number of epochs to train for
+    :param fullObj: If TRUE, Computes objective function value (squared-loss)
         at the end of each epoch. Note that, computing the full
         objective can take a lot of time.
-    :param batch_size: Mini-batch size (training parameter)
+    :param batchSize: Mini-batch size (training parameter)
     :param step: Initial step size (training parameter)
     :param decay: Decays step size after each epoch (training parameter)
     :param mu: Momentum parameter (training parameter)
-    :param W1_rand: Weights might be initialized via input matrices
-    :param W2_rand: ---
-    :param W3_rand: ---
-    :param W4_rand: ---
+    :param W1: Weights might be initialized via input matrices
+    :param W2: ---
+    :param W3: ---
+    :param W4: ---
+    :param Order: ---
     :return: Matrix storing weights between input layer and 1st hidden layer
     :return: Matrix storing bias between input layer and 1st hidden layer
     :return: Matrix storing weights between 1st hidden layer and 2nd hidden layer
@@ -70,7 +71,7 @@ def autoencoder_2layer(X: Matrix,
     :return: Matrix storing the hidden (2nd) layer representation if needed
     """
 
-    params_dict = {'X': X, 'num_hidden1': num_hidden1, 'num_hidden2': num_hidden2, 'max_epochs': max_epochs}
+    params_dict = {'X': X, 'numHidden1': numHidden1, 'numHidden2': numHidden2, 'epochs': epochs}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
index 8bd5e5ccf7e..b5ca46bf68e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
@@ -28,10 +28,10 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def bandit(X_train: Matrix,
-           Y_train: Matrix,
-           X_test: Matrix,
-           Y_test: Matrix,
+def bandit(X: Matrix,
+           Y: Matrix,
+           Xtest: Matrix,
+           Ytest: Matrix,
            metaList: List,
            evaluationFunc: str,
            evalFunHp: Matrix,
@@ -48,10 +48,10 @@ def bandit(X_train: Matrix,
     
     
     
-    :param X_train: ---
-    :param Y_train: ---
-    :param X_test: ---
-    :param Y_test: ---
+    :param X: ---
+    :param Y: ---
+    :param Xtest: ---
+    :param Ytest: ---
     :param metaList: ---
     :param evaluationFunc: ---
     :param evalFunHp: ---
@@ -64,20 +64,19 @@ def bandit(X_train: Matrix,
     :param cv: ---
     :param cvk: ---
     :param verbose: ---
-    :param output: ---
     :return: ---
     """
 
-    params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp, 'lp': lp, 'lpHp': lpHp, 'primitives': primitives, 'param': param, 'baseLineScore': baseLineScore, 'cv': cv}
+    params_dict = {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp, 'lp': lp, 'lpHp': lpHp, 'primitives': primitives, 'param': param, 'baseLineScore': baseLineScore, 'cv': cv}
     params_dict.update(kwargs)
     
-    vX_0 = Frame(X_train.sds_context, '')
-    vX_1 = Matrix(X_train.sds_context, '')
-    vX_2 = Matrix(X_train.sds_context, '')
-    vX_3 = Frame(X_train.sds_context, '')
+    vX_0 = Frame(X.sds_context, '')
+    vX_1 = Matrix(X.sds_context, '')
+    vX_2 = Matrix(X.sds_context, '')
+    vX_3 = Frame(X.sds_context, '')
     output_nodes = [vX_0, vX_1, vX_2, vX_3, ]
 
-    op = MultiReturn(X_train.sds_context, 'bandit', output_nodes, named_input_nodes=params_dict)
+    op = MultiReturn(X.sds_context, 'bandit', output_nodes, named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
diff --git a/src/main/python/systemds/operator/algorithm/builtin/components.py b/src/main/python/systemds/operator/algorithm/builtin/components.py
index 0630f3ba5f7..478270e9107 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/components.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/components.py
@@ -40,13 +40,13 @@ def components(G: Matrix,
     
     :param X: Location to read the matrix of feature vectors
     :param Y: Location to read the matrix with category labels
-    :param icpt: Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+    :param intercept: Intercept presence, shifting and rescaling X columns: 0 = no intercept,
         no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
         2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param tol: tolerance ("epsilon")
     :param reg: regularization parameter (lambda = 1/C); intercept is not regularized
-    :param maxi: max. number of outer (Newton) iterations
-    :param maxii: max. number of inner (conjugate gradient) iterations, 0 = no max
+    :param maxIter: max. number of outer (Newton) iterations
+    :param maxInnerIter: max. number of inner (conjugate gradient) iterations, 0 = no max
     :param verbose: flag specifying if logging information should be printed
     :return: regression betas as output for prediction
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
index 321a1949f58..54848593de1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
@@ -48,9 +47,9 @@ def correctTypos(strings: Frame,
     
     
     :param strings: The nx1 input frame of corrupted strings
-    :param frequency_threshold: Strings that occur above this frequency level will not be corrected
-    :param distance_threshold: Max distance at which strings are considered similar
-    :param is_verbose: Print debug information
+    :param frequencyThreshold: Strings that occur above this frequency level will not be corrected
+    :param distanceThreshold: Max distance at which strings are considered similar
+    :param verbose: Print debug information
     :return: Corrected nx1 output frame
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
index 0a2c61a6f40..5cb6027b8a9 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
@@ -25,12 +25,11 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def correctTyposApply(strings: Frame,
-                      distance_matrix: Matrix,
+                      distances: Matrix,
                       dict: Frame,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
@@ -53,14 +52,14 @@ def correctTyposApply(strings: Frame,
     
     :param strings: The nx1 input frame of corrupted strings
     :param nullMask: ---
-    :param frequency_threshold: Strings that occur above this frequency level will not be corrected
-    :param distance_threshold: Max distance at which strings are considered similar
-    :param distance matrix: ---
+    :param frequencyThreshold: Strings that occur above this frequency level will not be corrected
+    :param distanceThreshold: Max distance at which strings are considered similar
+    :param distances: ---
     :param dict: ---
     :return: Corrected nx1 output frame
     """
 
-    params_dict = {'strings': strings, 'distance_matrix': distance_matrix, 'dict': dict}
+    params_dict = {'strings': strings, 'distances': distances, 'dict': dict}
     params_dict.update(kwargs)
     return Matrix(strings.sds_context,
         'correctTyposApply',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cox.py b/src/main/python/systemds/operator/algorithm/builtin/cox.py
index c184ef715e2..b6ec7b695bf 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cox.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cox.py
@@ -60,8 +60,8 @@ def cox(X: Matrix,
         if R is not provided by default all variables are considered to be continuous
     :param alpha: Parameter to compute a 100*(1-alpha)% confidence interval for the betas
     :param tol: Tolerance ("epsilon")
-    :param moi: Max. number of outer (Newton) iterations
-    :param mii: Max. number of inner (conjugate gradient) iterations, 0 = no max
+    :param maxIter: Max. number of outer (Newton) iterations
+    :param maxInnerIter: Max. number of inner (conjugate gradient) iterations, 0 = no max
     :return: A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
         M[,1]: betas
         M[,2]: exp(betas)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cspline.py b/src/main/python/systemds/operator/algorithm/builtin/cspline.py
index 46258d1f912..ebb56461e76 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cspline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cspline.py
@@ -30,7 +30,7 @@
 
 def cspline(X: Matrix,
             Y: Matrix,
-            inp_x: float,
+            xPred: float,
             **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Solves Cubic Spline Interpolation
@@ -43,16 +43,16 @@ def cspline(X: Matrix,
     :param X: 1-column matrix of x values knots. It is assumed that x values are
         monotonically increasing and there is no duplicates points in X
     :param Y: 1-column matrix of corresponding y values knots
-    :param inp_x: the given input x, for which the cspline will find predicted y
+    :param xPred: the given input x, for which the cspline will find predicted y
     :param mode: Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
     :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if
         L2 norm of the beta-residual is less than tolerance * its initial norm
-    :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum
+    :param maxIter: Maximum number of conjugate gradient iterations, 0 = no maximum
     :return: Predicted value
     :return: Matrix of k parameters
     """
 
-    params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
+    params_dict = {'X': X, 'Y': Y, 'xPred': xPred}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
index d1ff9b84530..35a640d4d8d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
@@ -30,7 +30,7 @@
 
 def csplineCG(X: Matrix,
               Y: Matrix,
-              inp_x: float,
+              xPred: float,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Builtin that solves cubic spline interpolation using conjugate gradient algorithm
@@ -40,7 +40,7 @@ def csplineCG(X: Matrix,
     :param X: 1-column matrix of x values knots. It is assumed that x values are
         monotonically increasing and there is no duplicates points in X
     :param Y: 1-column matrix of corresponding y values knots
-    :param inp_x: the given input x, for which the cspline will find predicted y.
+    :param xPred: the given input x, for which the cspline will find predicted y.
     :param tol: Tolerance (epsilon); conjugate gradient procedure terminates early if
         L2 norm of the beta-residual is less than tolerance * its initial norm
     :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum
@@ -48,7 +48,7 @@ def csplineCG(X: Matrix,
     :return: Matrix of k parameters
     """
 
-    params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
+    params_dict = {'X': X, 'Y': Y, 'xPred': xPred}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py b/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
index 0c5b2aa4f3c..b054b126064 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
@@ -30,7 +30,7 @@
 
 def csplineDS(X: Matrix,
               Y: Matrix,
-              inp_x: float):
+              xPred: float):
     """
      Builtin that solves cubic spline interpolation using a direct solver.
     
@@ -39,12 +39,12 @@ def csplineDS(X: Matrix,
     :param X: 1-column matrix of x values knots. It is assumed that x values are
         monotonically increasing and there is no duplicates points in X
     :param Y: 1-column matrix of corresponding y values knots
-    :param inp_x: the given input x, for which the cspline will find predicted y.
+    :param xPred: the given input x, for which the cspline will find predicted y.
     :return: Predicted value
     :return: Matrix of k parameters
     """
 
-    params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
+    params_dict = {'X': X, 'Y': Y, 'xPred': xPred}
     
     vX_0 = Matrix(X.sds_context, '')
     vX_1 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cvlm.py b/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
index 775ee7cdcb8..f3bf0259670 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
@@ -42,7 +42,7 @@ def cvlm(X: Matrix,
     :param X: Recorded Data set into matrix
     :param y: 1-column matrix of response values.
     :param k: Number of subsets needed, It should always be more than 1 and less than nrow(X)
-    :param icpt: Intercept presence, shifting and rescaling the columns of X
+    :param intercept: Intercept presence, shifting and rescaling the columns of X
     :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for
         highly dependant/sparse/numerous features
     :return: Response values
diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
index a1a751d0aad..4fe11a999fe 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
@@ -61,15 +61,15 @@ def decisionTree(X: Matrix,
     :param y: Label matrix in recoded/binned representation
     :param ctypes: Row-Vector of column types [1 scale/ordinal, 2 categorical]
         of shape 1-by-(ncol(X)+1), where the last entry is the y type
-    :param max_depth: Maximum depth of the learned tree (stopping criterion)
-    :param min_leaf: Minimum number of samples in leaf nodes (stopping criterion),
+    :param maxDepth: Maximum depth of the learned tree (stopping criterion)
+    :param minLeaf: Minimum number of samples in leaf nodes (stopping criterion),
         odd number recommended to avoid 50/50 leaf label decisions
-    :param min_split: Minimum number of samples in leaf for attempting a split
-    :param max_features: Parameter controlling the number of features used as split
-        candidates at tree nodes: m = ceil(num_features^max_features)
-    :param max_values: Parameter controlling the number of values per feature used
-        as split candidates: nb = ceil(num_values^max_values)
-    :param max_dataratio: Parameter in [0,1] controlling when to materialize data
+    :param minSplit: Minimum number of samples in leaf for attempting a split
+    :param maxFeatures: Parameter controlling the number of features used as split
+        candidates at tree nodes: m = ceil(numFeatures^maxFeatures)
+    :param maxValues: Parameter controlling the number of values per feature used
+        as split candidates: nb = ceil(numValues^maxValues)
+    :param maxDataRatio: Parameter in [0,1] controlling when to materialize data
         subsets of X and y on node splits. When set to 0, we always
         scan the original X and y, which has the benefit of avoiding
         the allocation and maintenance of data for all active nodes.
diff --git a/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py b/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
index 35ddc1f4b05..7a4cb2d56ed 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
@@ -44,7 +44,7 @@ def deepWalk(Graph: Matrix,
     :param d: embedding size
     :param gamma: walks per vertex
     :param t: walk length
-    :param alpha: learning rate
+    :param lr: learning rate
     :param beta: factor for decreasing learning rate
     :return: matrix of vertex/word representation (n x d)
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
index 347502b848e..5cdec212965 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dmv.py b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
index deaf3ea8a6b..2955e505e13 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dmv.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ema.py b/src/main/python/systemds/operator/algorithm/builtin/ema.py
index 4e0ccca6bbb..e0324620a68 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ema.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ema.py
@@ -25,12 +25,11 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def ema(X: Frame,
-        search_iterations: int,
+        iter: int,
         mode: str,
         freq: int,
         alpha: float,
@@ -42,7 +41,7 @@ def ema(X: Frame,
     
     
     :param X: Frame that contains time series data that needs to be imputed
-        search_iterations	Integer	--	Budget iterations for parameter optimization,
+    :param iter: Integer	--	Budget iterations for parameter optimization,
         used if parameters weren't set
     :param mode: Type of EMA method. Either "single", "double" or "triple"
     :param freq: Seasonality when using triple EMA.
@@ -52,7 +51,7 @@ def ema(X: Frame,
     :return: Frame with EMA results
     """
 
-    params_dict = {'X': X, 'search_iterations': search_iterations, 'mode': mode, 'freq': freq, 'alpha': alpha, 'beta': beta, 'gamma': gamma}
+    params_dict = {'X': X, 'iter': iter, 'mode': mode, 'freq': freq, 'alpha': alpha, 'beta': beta, 'gamma': gamma}
     return Matrix(X.sds_context,
         'ema',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
index 1fffb46f100..2dfabbf9436 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
@@ -28,7 +28,16 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def executePipeline(X: Matrix):
+def executePipeline(pipeline: Frame,
+                    X: Matrix,
+                    Y: Matrix,
+                    Xtest: Matrix,
+                    Ytest: Matrix,
+                    metaList: List,
+                    hyperParameters: Matrix,
+                    flagsCount: int,
+                    verbose: bool,
+                    **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This function execute pipeline.
     
@@ -56,17 +65,30 @@ def executePipeline(X: Matrix):
     :return: ---
     """
 
-    params_dict = {'X': X}
+    params_dict = {'pipeline': pipeline, 'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'hyperParameters': hyperParameters, 'flagsCount': flagsCount, 'verbose': verbose}
+    params_dict.update(kwargs)
     
-    vX_0 = Matrix(X.sds_context, '')
-    vX_1 = Matrix(X.sds_context, '')
-    vX_2 = Matrix(X.sds_context, '')
-    output_nodes = [vX_0, vX_1, vX_2, ]
+    vX_0 = Matrix(pipeline.sds_context, '')
+    vX_1 = Matrix(pipeline.sds_context, '')
+    vX_2 = Matrix(pipeline.sds_context, '')
+    vX_3 = Matrix(pipeline.sds_context, '')
+    vX_4 = Scalar(pipeline.sds_context, '')
+    vX_5 = Matrix(pipeline.sds_context, '')
+    vX_6 = Matrix(pipeline.sds_context, '')
+    vX_7 = Scalar(pipeline.sds_context, '')
+    vX_8 = List(pipeline.sds_context, '')
+    output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ]
 
-    op = MultiReturn(X.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict)
+    op = MultiReturn(pipeline.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
     vX_2._unnamed_input_nodes = [op]
+    vX_3._unnamed_input_nodes = [op]
+    vX_4._unnamed_input_nodes = [op]
+    vX_5._unnamed_input_nodes = [op]
+    vX_6._unnamed_input_nodes = [op]
+    vX_7._unnamed_input_nodes = [op]
+    vX_8._unnamed_input_nodes = [op]
 
     return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
index 3d9614245e9..cda30856a84 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
@@ -38,7 +38,7 @@ def ffPredict(model: List,
     
     :param Model: Trained ff neural network model
     :param X: Data used for making predictions
-    :param batch_size: Batch size
+    :param batchSize: Batch size
     :return: Predicted value
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py b/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
index 7efb7a17ee6..6384ad2ba4c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
@@ -30,8 +30,8 @@
 
 def ffTrain(X: Matrix,
             Y: Matrix,
-            out_activation: str,
-            loss_fcn: str,
+            outActivation: str,
+            lossFn: str,
             **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This builtin function trains simple feed-forward neural network. The architecture of the
@@ -42,21 +42,21 @@ def ffTrain(X: Matrix,
     
     :param X: Training data
     :param Y: Labels/Target values
-    :param batch_size: Batch size
+    :param batchSize: Batch size
     :param epochs: Number of epochs
-    :param learning_rate: Learning rate
-    :param out_activation: User specified output activation function. Possible values:
+    :param lr: Learning rate
+    :param outActivation: User specified output activation function. Possible values:
         "sigmoid", "relu", "lrelu", "tanh", "softmax", "logits" (no activation).
-    :param loss_fcn: User specified loss function. Possible values:
+    :param lossFn: User specified loss function. Possible values:
         "l1", "l2", "log_loss", "logcosh_loss", "cel" (cross-entropy loss).
     :param shuffle: Flag which indicates if dataset should be shuffled or not
-    :param validation_split: Fraction of training set used as validation set
+    :param validationSplit: Fraction of training set used as validation set
     :param seed: Seed for model initialization
     :param verbose: Flag which indicates if function should print to stdout
     :return: Trained model which can be used in ffPredict
     """
 
-    params_dict = {'X': X, 'Y': Y, 'out_activation': out_activation, 'loss_fcn': loss_fcn}
+    params_dict = {'X': X, 'Y': Y, 'outActivation': outActivation, 'lossFn': lossFn}
     params_dict.update(kwargs)
     return Matrix(X.sds_context,
         'ffTrain',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
index 5de40c745f8..48363035d8b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
index b635f31b298..cc0e83a51e4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
index cc8fe68aacc..7a8f52debaa 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
@@ -25,28 +25,27 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def fixInvalidLengthsApply(X: Frame,
-                           mask: Matrix,
-                           qLow: Matrix,
-                           qUp: Matrix):
+                           Mask: Matrix,
+                           QL: Matrix,
+                           QU: Matrix):
     """
      Fix invalid lengths
     
     
     
     :param X: ---
-    :param mask: ---
-    :param ql: ---
-    :param qu: ---
+    :param Mask: ---
+    :param QL: ---
+    :param QU: ---
     :return: ---
     :return: ---
     """
 
-    params_dict = {'X': X, 'mask': mask, 'qLow': qLow, 'qUp': qUp}
+    params_dict = {'X': X, 'Mask': Mask, 'QL': QL, 'QU': QU}
     return Matrix(X.sds_context,
         'fixInvalidLengthsApply',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
index 0bfc7f3afec..2575baefe4b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/garch.py b/src/main/python/systemds/operator/algorithm/builtin/garch.py
index 41caf82a107..252830571ae 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/garch.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/garch.py
@@ -29,13 +29,13 @@
 
 
 def garch(X: Matrix,
-          kmax: int,
+          iter: int,
           momentum: float,
-          start_stepsize: float,
-          end_stepsize: float,
-          start_vicinity: float,
-          end_vicinity: float,
-          sim_seed: int,
+          startStepsize: float,
+          endStepsize: float,
+          startVicinity: float,
+          endVicinity: float,
+          seed: int,
           verbose: bool):
     """
      This is a builtin function that implements GARCH(1,1), a statistical model used in analyzing time-series data where the variance
@@ -49,13 +49,13 @@ def garch(X: Matrix,
     
     
     :param X: The input Matrix to apply Arima on.
-    :param kmax: Number of iterations
+    :param iter: Number of iterations
     :param momentum: Momentum for momentum-gradient descent (set to 0 to deactivate)
-    :param start_stepsize: Initial gradient-descent stepsize
-    :param end_stepsize: gradient-descent stepsize at end (linear descent)
-    :param start_vicinity: proportion of randomness of restart-location for gradient descent at beginning
-    :param end_vicinity: same at end (linear decay)
-    :param sim_seed: seed for simulation of process on fitted coefficients
+    :param startStepsize: Initial gradient-descent stepsize
+    :param endStepsize: gradient-descent stepsize at end (linear descent)
+    :param startVicinity: proportion of randomness of restart-location for gradient descent at beginning
+    :param endVicinity: same at end (linear decay)
+    :param seed: seed for simulation of process on fitted coefficients
     :param verbose: verbosity, comments during fitting
     :return: simulated garch(1,1) process on fitted coefficients
     :return: variances of simulated fitted process
@@ -64,7 +64,7 @@ def garch(X: Matrix,
     :return: 1-st garch-coefficient of fitted process
     """
 
-    params_dict = {'X': X, 'kmax': kmax, 'momentum': momentum, 'start_stepsize': start_stepsize, 'end_stepsize': end_stepsize, 'start_vicinity': start_vicinity, 'end_vicinity': end_vicinity, 'sim_seed': sim_seed, 'verbose': verbose}
+    params_dict = {'X': X, 'iter': iter, 'momentum': momentum, 'startStepsize': startStepsize, 'endStepsize': endStepsize, 'startVicinity': startVicinity, 'endVicinity': endVicinity, 'seed': seed, 'verbose': verbose}
     
     vX_0 = Matrix(X.sds_context, '')
     vX_1 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py b/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
index 8f6a2a30559..c3d2a1a0bf7 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
@@ -28,22 +28,22 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def getAccuracy(y: Matrix,
-                yhat: Matrix,
+def getAccuracy(Y: Matrix,
+                Ytest: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This builtin function compute the weighted and simple accuracy for given predictions
     
     
     
-    :param y: Ground truth (Actual Labels)
-    :param yhat: Predictions (Predicted labels)
+    :param Y: Predictions (Predicted labels)
+    :param Ytest: Ground truth (Actual Labels)
     :param isWeighted: Flag for weighted or non-weighted accuracy calculation
     :return: accuracy of the predicted labels
     """
 
-    params_dict = {'y': y, 'yhat': yhat}
+    params_dict = {'Y': Y, 'Ytest': Ytest}
     params_dict.update(kwargs)
-    return Matrix(y.sds_context,
+    return Matrix(Y.sds_context,
         'getAccuracy',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/glm.py b/src/main/python/systemds/operator/algorithm/builtin/glm.py
index 0536c9e221c..e8a1710f9bb 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/glm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/glm.py
@@ -48,7 +48,7 @@ def glm(X: Matrix,
        BETA_MIN_INDEX        Column index for the smallest beta value
        BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
        BETA_MAX_INDEX        Column index for the largest beta value
-       INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+       INTERCEPT             Intercept value, or NaN if there is no intercept (if intercept=0)
        DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
                              or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
        DISPERSION_EST        Dispersion estimated from the dataset
@@ -117,18 +117,18 @@ def glm(X: Matrix,
     :param lpow: Power for Link function defined as (mean)^power (ignored if link != 1):
         -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
     :param yneg: Response value for Bernoulli "No" label, usually 0.0 or -1.0
-    :param icpt: Intercept presence, X columns shifting and rescaling:
+    :param intercept: Intercept presence, X columns shifting and rescaling:
         0 = no intercept, no shifting, no rescaling;
         1 = add intercept, but neither shift nor rescale X;
         2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param reg: Regularization parameter (lambda) for L2 regularization
     :param tol: Tolerance (epsilon)
     :param disp: (Over-)dispersion value, or 0.0 to estimate it from data
-    :param moi: Maximum number of outer (Newton / Fisher Scoring) iterations
-    :param mii: Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+    :param maxIter: Maximum number of outer (Newton / Fisher Scoring) iterations
+    :param maxInnerIter: Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
     :param verbose: if the Algorithm should be verbose
-    :return: Matrix beta, whose size depends on icpt:
-        icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+    :return: Matrix beta, whose size depends on intercept:
+        intercept=0: ncol(X) x 1;  intercept=1: (ncol(X) + 1) x 1;  intercept=2: (ncol(X) + 1) x 2
     """
 
     params_dict = {'X': X, 'Y': Y}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
index 2609039dc59..f1a7d75ad86 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
@@ -78,7 +78,7 @@ def glmPredict(X: Matrix,
         ncol(X)   x k: do not add intercept
         ncol(X)+1 x k: add intercept as given by the last B-row
         if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
-    :param ytest: Response matrix Y, with the following dimensions:
+    :param Ytest: Response matrix Y, with the following dimensions:
         nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
         nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
         nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmm.py b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
index 5c0f89b2863..dc028375512 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
@@ -38,16 +38,16 @@ def gmm(X: Matrix,
     
     
     :param X: Dataset input to fit the GMM model
-    :param n_components: Number of components to use in the Gaussian mixture model
+    :param nComponents: Number of components to use in the Gaussian mixture model
     :param model: "VVV": unequal variance (full),each component has its own general covariance matrix
         "EEE": equal variance (tied), all components share the same general covariance matrix
         "VVI": spherical, unequal volume (diag), each component has its own diagonal
         covariance matrix
         "VII": spherical, equal volume (spherical), each component has its own single variance
-    :param init_param: Initialization algorithm to use to initialize the gaussian weights, valid inputs are:
+    :param initParams: Initialization algorithm to use to initialize the gaussian weights, valid inputs are:
         "kmeans" or "random"
-    :param iterations: Number of iterations
-    :param reg_covar: Regularization parameter for covariance matrix
+    :param maxIter: Number of iterations
+    :param reg: Regularization parameter for covariance matrix
     :param tol: Tolerance value for convergence
     :param seed: The seed value to initialize the values for fitting the GMM.
     :return: The predictions made by the gaussian model on the X input dataset
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
index 205cac4205c..cdbf9ea78f3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
@@ -29,9 +29,9 @@
 
 
 def gmmPredict(X: Matrix,
-               weight: Matrix,
-               mu: Matrix,
-               precisions_cholesky: Matrix,
+               W: Matrix,
+               Mu: Matrix,
+               Precisions: Matrix,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Prediction function for a Gaussian Mixture Model (gmm).
@@ -40,11 +40,11 @@ def gmmPredict(X: Matrix,
     
     
     :param X: Dataset input to predict the labels from
-    :param weight: Weight of learned model:
+    :param W: Weights of learned model:
         A matrix whose [i,k]th entry is the probability
         that observation i in the test data belongs to the kth class
-    :param mu: Fitted clusters mean
-    :param precisions_cholesky: Fitted precision matrix for each mixture
+    :param Mu: Fitted clusters mean
+    :param Precisions: Fitted precision cholesky matrix for each mixture
     :param model: "VVV": unequal variance (full),each component has its own general covariance matrix
         "EEE": equal variance (tied), all components share the same general covariance matrix
         "VVI": spherical, unequal volume (diag), each component has its own diagonal
@@ -54,7 +54,7 @@ def gmmPredict(X: Matrix,
     :return: Probability of the predictions given the X input dataset
     """
 
-    params_dict = {'X': X, 'weight': weight, 'mu': mu, 'precisions_cholesky': precisions_cholesky}
+    params_dict = {'X': X, 'W': W, 'Mu': Mu, 'Precisions': Precisions}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gnmf.py b/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
index edf5ab4daed..3236e9b9d2c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
@@ -29,7 +29,7 @@
 
 
 def gnmf(X: Matrix,
-         rnk: int,
+         rank: int,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      The gnmf-function does Gaussian Non-Negative Matrix Factorization. In this, a matrix X is factorized into two
@@ -44,14 +44,14 @@ def gnmf(X: Matrix,
     
     
     :param X: Matrix of feature vectors.
-    :param rnk: Number of components into which matrix X is to be factored
-    :param eps: Tolerance
-    :param maxi: Maximum number of conjugate gradient iterations
+    :param rank: Number of components into which matrix X is to be factored
+    :param tol: Tolerance
+    :param maxIter: Maximum number of conjugate gradient iterations
     :return: List of pattern matrices, one for each repetition
     :return: List of amplitude matrices, one for each repetition
     """
 
-    params_dict = {'X': X, 'rnk': rnk}
+    params_dict = {'X': X, 'rank': rank}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
index 26d165dc028..c679ab4a509 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
@@ -28,10 +28,10 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def hyperband(X_train: Matrix,
-              y_train: Matrix,
-              X_val: Matrix,
-              y_val: Matrix,
+def hyperband(X: Matrix,
+              Y: Matrix,
+              Xtest: Matrix,
+              Ytest: Matrix,
               params: List,
               paramRanges: Matrix,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
@@ -46,10 +46,10 @@ def hyperband(X_train: Matrix,
     
     
     
-    :param X_train: Input Matrix of training vectors
-    :param y_train: Labels for training vectors
-    :param X_val: Input Matrix of validation vectors
-    :param y_val: Labels for validation vectors
+    :param X: Input Matrix of training vectors
+    :param Y: Labels for training vectors
+    :param Xtest: Input Matrix of validation vectors
+    :param Ytest: Labels for validation vectors
     :param params: List of parameters to optimize
     :param paramRanges: The min and max values for the uniform distributions to draw from.
         One row per hyper parameter, first column specifies min, second column max value.
@@ -60,14 +60,14 @@ def hyperband(X_train: Matrix,
     :return: hyper parameters of best performing candidate
     """
 
-    params_dict = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'params': params, 'paramRanges': paramRanges}
+    params_dict = {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'params': params, 'paramRanges': paramRanges}
     params_dict.update(kwargs)
     
-    vX_0 = Matrix(X_train.sds_context, '')
-    vX_1 = Frame(X_train.sds_context, '')
+    vX_0 = Matrix(X.sds_context, '')
+    vX_1 = Frame(X.sds_context, '')
     output_nodes = [vX_0, vX_1, ]
 
-    op = MultiReturn(X_train.sds_context, 'hyperband', output_nodes, named_input_nodes=params_dict)
+    op = MultiReturn(X.sds_context, 'hyperband', output_nodes, named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py b/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
index 63a472716ab..6b2ad0b0901 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
@@ -28,21 +28,21 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_brightness(img_in: Matrix,
+def img_brightness(img: Matrix,
                    value: float,
-                   channel_max: int):
+                   maxValue: int):
     """
      The img_brightness-function is an image data augmentation function. It changes the brightness of the image.
     
     
     
-    :param img_in: Input matrix/image
+    :param img: Input matrix/image
     :param value: The amount of brightness to be changed for the image
-    :param channel_max: Maximum value of the brightness of the image
+    :param maxValue: Maximum channel value of the brightness of the image
     :return: Output matrix/image
     """
 
-    params_dict = {'img_in': img_in, 'value': value, 'channel_max': channel_max}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'value': value, 'maxValue': maxValue}
+    return Matrix(img.sds_context,
         'img_brightness',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py
index a5b62012f50..c9c9d2be41d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py
@@ -28,21 +28,21 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_brightness_linearized(img_in: Matrix,
+def img_brightness_linearized(img: Matrix,
                               value: float,
-                              channel_max: int):
+                              maxValue: int):
     """
      The img_brightness_linearized-function is an image data augmentation function. It changes the brightness of one or multiple images.
     
     
     
-    :param img_in: Input matrix/image (can represent multiple images every row of the matrix represents a linearized image)
+    :param img: Input matrix/image (can represent multiple images every row of the matrix represents a linearized image)
     :param value: The amount of brightness to be changed for the image
-    :param channel_max: Maximum value of the brightness of the image
+    :param maxValue: Maximum channel value of the brightness of the image
     :return: Output matrix/images  (every row of the matrix represents a linearized image)
     """
 
-    params_dict = {'img_in': img_in, 'value': value, 'channel_max': channel_max}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'value': value, 'maxValue': maxValue}
+    return Matrix(img.sds_context,
         'img_brightness_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_crop.py b/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
index f0432c7578a..66ceb52f8b4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
@@ -28,25 +28,25 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_crop(img_in: Matrix,
+def img_crop(img: Matrix,
              w: int,
              h: int,
-             x_offset: int,
-             y_offset: int):
+             offsetX: int,
+             offsetY: int):
     """
      The img_crop-function is an image data augmentation function. It cuts out a subregion of an image.
     
     
     
-    :param img_in: Input matrix/image
+    :param img: Input matrix/image
     :param w: The width of the subregion required
     :param h: The height of the subregion required
-    :param x_offset: The horizontal coordinate in the image to begin the crop operation
-    :param y_offset: The vertical coordinate in the image to begin the crop operation
+    :param offsetX: The horizontal coordinate in the image to begin the crop operation
+    :param offsetY: The vertical coordinate in the image to begin the crop operation
     :return: Cropped matrix/image
     """
 
-    params_dict = {'img_in': img_in, 'w': w, 'h': h, 'x_offset': x_offset, 'y_offset': y_offset}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'w': w, 'h': h, 'offsetX': offsetX, 'offsetY': offsetY}
+    return Matrix(img.sds_context,
         'img_crop',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py
index 4321a8af0db..f68851ca69a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py
@@ -28,29 +28,29 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_crop_linearized(img_in: Matrix,
+def img_crop_linearized(img: Matrix,
                         w: int,
                         h: int,
-                        x_offset: int,
-                        y_offset: int,
-                        s_cols: int,
-                        s_rows: int):
+                        offsetX: int,
+                        offsetY: int,
+                        sW: int,
+                        sH: int):
     """
      The img_crop_linearized cuts out a rectangular section of multiple linearized images.
     
     
     
-    :param img_in: Linearized input images as 2D matrix
+    :param img: Linearized input images as 2D matrix
     :param w: The width of the subregion required
     :param h: The height of the subregion required
-    :param x_offset: The horizontal offset for the center of the crop region
-    :param y_offset: The vertical offset for the center of the crop region
-    :param s_cols: Width of a single image
-    :param s_rows: Height of a single image
+    :param offsetX: The horizontal offset for the center of the crop region
+    :param offsetY: The vertical offset for the center of the crop region
+    :param sW: Width of a single image
+    :param sH: Height of a single image
     :return: Cropped images as linearized 2D matrix
     """
 
-    params_dict = {'img_in': img_in, 'w': w, 'h': h, 'x_offset': x_offset, 'y_offset': y_offset, 's_cols': s_cols, 's_rows': s_rows}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'w': w, 'h': h, 'offsetX': offsetX, 'offsetY': offsetY, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_crop_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py b/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
index 93befbd7366..71056fb49d5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
@@ -28,27 +28,27 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_cutout(img_in: Matrix,
+def img_cutout(img: Matrix,
                x: int,
                y: int,
-               width: int,
-               height: int,
-               fill_value: float):
+               w: int,
+               h: int,
+               value: float):
     """
      Image Cutout function replaces a rectangular section of an image with a constant value.
     
     
     
-    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
+    :param img: Input image as 2D matrix with top left corner at [1, 1]
     :param x: Column index of the top left corner of the rectangle (starting at 1)
     :param y: Row index of the top left corner of the rectangle (starting at 1)
-    :param width: Width of the rectangle (must be positive)
-    :param height: Height of the rectangle (must be positive)
-    :param fill_value: The value to set for the rectangle
+    :param w: Width of the rectangle (must be positive)
+    :param h: Height of the rectangle (must be positive)
+    :param value: The value to set for the rectangle
     :return: Output image as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'x': x, 'y': y, 'width': width, 'height': height, 'fill_value': fill_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'x': x, 'y': y, 'w': w, 'h': h, 'value': value}
+    return Matrix(img.sds_context,
         'img_cutout',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py
index 2dd0c52239f..bc3e902ea4f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py
@@ -28,31 +28,31 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_cutout_linearized(img_in: Matrix,
+def img_cutout_linearized(img: Matrix,
                           x: int,
                           y: int,
-                          width: int,
-                          height: int,
-                          fill_value: float,
-                          s_cols: int,
-                          s_rows: int):
+                          w: int,
+                          h: int,
+                          value: float,
+                          sW: int,
+                          sH: int):
     """
      Image Cutout function replaces a rectangular section of an image with a constant value.
     
     
     
-    :param img_in: Input images as linearized 2D matrix with top left corner at [1, 1]
+    :param img: Input images as linearized 2D matrix with top left corner at [1, 1]
     :param x: Column index of the top left corner of the rectangle (starting at 1)
     :param y: Row index of the top left corner of the rectangle (starting at 1)
-    :param width: Width of the rectangle (must be positive)
-    :param height: Height of the rectangle (must be positive)
-    :param fill_value: The value to set for the rectangle
-    :param s_cols: Width of a single image
-    :param s_rows: Height of a single image
+    :param w: Width of the rectangle (must be positive)
+    :param h: Height of the rectangle (must be positive)
+    :param value: The value to set for the rectangle
+    :param sW: Width of a single image
+    :param sH: Height of a single image
     :return: Output images as linearized 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'x': x, 'y': y, 'width': width, 'height': height, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'x': x, 'y': y, 'w': w, 'h': h, 'value': value, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_cutout_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_invert.py b/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
index a555f23708c..f2c0e683ba1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
@@ -28,19 +28,19 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_invert(img_in: Matrix,
-               max_value: float):
+def img_invert(img: Matrix,
+               maxValue: float):
     """
      This is an image data augmentation function. It inverts an image.
     
     
     
-    :param img_in: Input image
-    :param max_value: The maximum value pixels can have
+    :param img: Input image
+    :param maxValue: The maximum value pixels can have
     :return: Output image
     """
 
-    params_dict = {'img_in': img_in, 'max_value': max_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'maxValue': maxValue}
+    return Matrix(img.sds_context,
         'img_invert',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py
index 2f66a0b8be2..4b752dabd99 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py
@@ -28,19 +28,19 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_invert_linearized(img_in: Matrix,
-                          max_value: float):
+def img_invert_linearized(img: Matrix,
+                          maxValue: float):
     """
      This is an image data augmentation function. It inverts an image.It can handle one or multiple images 
     
     
     
-    :param img_in: Input matrix/image (every row of the matrix represents a linearized image)
-    :param max_value: The maximum value pixels can have
+    :param img: Input matrix/image (every row of the matrix represents a linearized image)
+    :param maxValue: The maximum value pixels can have
     :return: Output images (every row of the matrix represents a linearized image)
     """
 
-    params_dict = {'img_in': img_in, 'max_value': max_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'maxValue': maxValue}
+    return Matrix(img.sds_context,
         'img_invert_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py b/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
index 285d25fbf29..a4417949750 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
@@ -28,20 +28,20 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_mirror(img_in: Matrix,
-               horizontal_axis: bool):
+def img_mirror(img: Matrix,
+               horizontal: bool):
     """
      This function is an image data augmentation function.
      It flips an image on the X (horizontal) or Y (vertical) axis.
     
     
     
-    :param img_in: Input matrix/image
-    :param max_value: The maximum value pixels can have
+    :param img: Input matrix/image
+    :param horizontal: Boolean, True for horizental
     :return: Flipped matrix/image
     """
 
-    params_dict = {'img_in': img_in, 'horizontal_axis': horizontal_axis}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'horizontal': horizontal}
+    return Matrix(img.sds_context,
         'img_mirror',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py
index 1c6ae58ad03..19d63df64a5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py
@@ -28,24 +28,24 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_mirror_linearized(img_matrix: Matrix,
-                          horizontal_axis: bool,
-                          original_rows: int,
-                          original_cols: int):
+def img_mirror_linearized(img: Matrix,
+                          horizontal: bool,
+                          sH: int,
+                          sW: int):
     """
      This function has  the same functionality with img_mirror but it handles multiple images at
      the same time. Each row of the input and output matrix represents a linearized image/matrix
      It flips an image on the X (horizontal) or Y (vertical) axis.
     
     
-    :param img_matrix: Input matrix/image (every row represents a linearized matrix/image)
-    :param horizontal_axis: flip either in X or Y axis
-    :param original_rows: number of rows in the original 2-D images
-    :param original_cols: number of cols in the original 2-D images
+    :param img: Input matrix/image (every row represents a linearized matrix/image)
+    :param horizontal: flip either in X or Y axis
+    :param sH: Height of a single image
+    :param sW: Width of a single image
     :return: Output matrix/image  (every row represents a linearized matrix/image)
     """
 
-    params_dict = {'img_matrix': img_matrix, 'horizontal_axis': horizontal_axis, 'original_rows': original_rows, 'original_cols': original_cols}
-    return Matrix(img_matrix.sds_context,
+    params_dict = {'img': img, 'horizontal': horizontal, 'sH': sH, 'sW': sW}
+    return Matrix(img.sds_context,
         'img_mirror_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py b/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
index e314b0e7ca0..6ad839b667e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
@@ -28,7 +28,7 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_posterize(img_in: Matrix,
+def img_posterize(img: Matrix,
                   bits: int):
     """
      The Image Posterize function limits pixel values to 2^bits different values in the range [0, 255].
@@ -36,13 +36,13 @@ def img_posterize(img_in: Matrix,
     
     
     
-    :param img_in: Input image
+    :param img: Input image
     :param bits: The number of bits keep for the values.
         1 means black and white, 8 means every integer between 0 and 255.
     :return: Output image
     """
 
-    params_dict = {'img_in': img_in, 'bits': bits}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'bits': bits}
+    return Matrix(img.sds_context,
         'img_posterize',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py
index 286ce0222df..1ad83c1a153 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py
@@ -28,7 +28,7 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_posterize_linearized(img_in: Matrix,
+def img_posterize_linearized(img: Matrix,
                              bits: int):
     """
      The Linearized Image Posterize function limits pixel values to 2^bits different values in the range [0, 255].
@@ -36,13 +36,13 @@ def img_posterize_linearized(img_in: Matrix,
     
     
     
-    :param img_in: Row linearized input images as 2D matrix
+    :param img: Row linearized input images as 2D matrix
     :param bits: The number of bits keep for the values.
         1 means black and white, 8 means every integer between 0 and 255.
     :return: Row linearized output images as 2D matrix
     """
 
-    params_dict = {'img_in': img_in, 'bits': bits}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'bits': bits}
+    return Matrix(img.sds_context,
         'img_posterize_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_rotate.py b/src/main/python/systemds/operator/algorithm/builtin/img_rotate.py
index b8ab1ec0687..0c2e6596543 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_rotate.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_rotate.py
@@ -28,22 +28,22 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_rotate(img_in: Matrix,
+def img_rotate(img: Matrix,
                radians: float,
-               fill_value: float):
+               value: float):
     """
      The Image Rotate function rotates the input image counter-clockwise around the center.
      Uses nearest neighbor sampling.
     
     
     
-    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
+    :param img: Input image as 2D matrix with top left corner at [1, 1]
     :param radians: The value by which to rotate in radian.
-    :param fill_value: The background color revealed by the rotation
+    :param value: The background color revealed by the rotation
     :return: Output image as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'radians': radians, 'fill_value': fill_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'radians': radians, 'value': value}
+    return Matrix(img.sds_context,
         'img_rotate',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_rotate_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_rotate_linearized.py
index 94e7ecbff2b..aed924aff41 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_rotate_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_rotate_linearized.py
@@ -28,24 +28,24 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_rotate_linearized(img_in: Matrix,
+def img_rotate_linearized(img: Matrix,
                           radians: float,
-                          fill_value: float,
-                          s_cols: int,
-                          s_rows: int):
+                          value: float,
+                          sW: int,
+                          sH: int):
     """
      The Linearized Image Rotate function rotates the linearized input images counter-clockwise around the center.
      Uses nearest neighbor sampling.
     
     
     
-    :param img_in: Linearized input images as 2D matrix with top left corner at [1, 1]
+    :param img: inearized input images as 2D matrix with top left corner at [1, 1]
     :param radians: The value by which to rotate in radian.
-    :param fill_value: The background color revealed by the rotation
+    :param value: The background color revealed by the rotation
     :return: Output images in linearized form as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'radians': radians, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'radians': radians, 'value': value, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_rotate_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing.py b/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing.py
index 892c524baf7..8fccf96cbf5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing.py
@@ -28,22 +28,22 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_sample_pairing(img_in1: Matrix,
-                       img_in2: Matrix,
+def img_sample_pairing(img1: Matrix,
+                       img2: Matrix,
                        weight: float):
     """
      The image sample pairing function blends two images together.
     
     
     
-    :param img_in1: First input image
-    :param img_in2: Second input image
+    :param img1: First input image
+    :param img2: Second input image
     :param weight: The weight given to the second image.
-        0 means only img_in1, 1 means only img_in2 will be visible
+        0 means only img1, 1 means only img2 will be visible
     :return: Output image
     """
 
-    params_dict = {'img_in1': img_in1, 'img_in2': img_in2, 'weight': weight}
-    return Matrix(img_in1.sds_context,
+    params_dict = {'img1': img1, 'img2': img2, 'weight': weight}
+    return Matrix(img1.sds_context,
         'img_sample_pairing',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing_linearized.py
index a7f08c74f41..68c749de176 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_sample_pairing_linearized.py
@@ -28,22 +28,22 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_sample_pairing_linearized(img_in1: Matrix,
-                                  img_in2: Matrix,
+def img_sample_pairing_linearized(img1: Matrix,
+                                  img2: Matrix,
                                   weight: float):
     """
      The image sample pairing function blends two images together.
     
     
     
-    :param img_in1: input matrix/image (every row is a linearized image)
-    :param img_in2: Second input image (one image represented as a single row linearized matrix)
+    :param img1: input matrix/image (every row is a linearized image)
+    :param img2: Second input image (one image represented as a single row linearized matrix)
     :param weight: The weight given to the second image.
-        0 means only img_in1, 1 means only img_in2 will be visible
+        0 means only img1, 1 means only img2 will be visible
     :return: Output image
     """
 
-    params_dict = {'img_in1': img_in1, 'img_in2': img_in2, 'weight': weight}
-    return Matrix(img_in1.sds_context,
+    params_dict = {'img1': img1, 'img2': img2, 'weight': weight}
+    return Matrix(img1.sds_context,
         'img_sample_pairing_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_shear.py b/src/main/python/systemds/operator/algorithm/builtin/img_shear.py
index 44ad9f6883a..6e84405f18d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_shear.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_shear.py
@@ -28,24 +28,24 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_shear(img_in: Matrix,
-              shear_x: float,
-              shear_y: float,
-              fill_value: float):
+def img_shear(img: Matrix,
+              shearX: float,
+              shearY: float,
+              value: float):
     """
      This function applies a shearing transformation to an image.
      Uses nearest neighbor sampling.
     
     
     
-    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
-    :param shear_x: Shearing factor for horizontal shearing
-    :param shear_y: Shearing factor for vertical shearing
-    :param fill_value: The background color revealed by the shearing
+    :param img: Input image as 2D matrix with top left corner at [1, 1]
+    :param shearX: Shearing factor for horizontal shearing
+    :param shearY: Shearing factor for vertical shearing
+    :param value: The background color revealed by the shearing
     :return: Output image as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'shear_x': shear_x, 'shear_y': shear_y, 'fill_value': fill_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'shearX': shearX, 'shearY': shearY, 'value': value}
+    return Matrix(img.sds_context,
         'img_shear',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_shear_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_shear_linearized.py
index a470c8a08c2..01853d24792 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_shear_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_shear_linearized.py
@@ -28,26 +28,26 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_shear_linearized(img_in: Matrix,
-                         shear_x: float,
-                         shear_y: float,
-                         fill_value: float,
-                         s_cols: int,
-                         s_rows: int):
+def img_shear_linearized(img: Matrix,
+                         shearX: float,
+                         shearY: float,
+                         value: float,
+                         sW: int,
+                         sH: int):
     """
      This function applies a shearing transformation to linearized images.
      Uses nearest neighbor sampling.
     
     
     
-    :param img_in: Linearized input images as 2D matrix with top left corner at [1, 1]
-    :param shear_x: Shearing factor for horizontal shearing
-    :param shear_y: Shearing factor for vertical shearing
-    :param fill_value: The background color revealed by the shearing
+    :param img: Linearized input images as 2D matrix with top left corner at [1, 1]
+    :param shearX: Shearing factor for horizontal shearing
+    :param shearY: Shearing factor for vertical shearing
+    :param value: The background color revealed by the shearing
     :return: Output images in linearized form as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'shear_x': shear_x, 'shear_y': shear_y, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'shearX': shearX, 'shearY': shearY, 'value': value, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_shear_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_transform.py b/src/main/python/systemds/operator/algorithm/builtin/img_transform.py
index 73095816bdf..e1a6f175526 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_transform.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_transform.py
@@ -28,16 +28,16 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_transform(img_in: Matrix,
-                  out_w: int,
-                  out_h: int,
+def img_transform(img: Matrix,
+                  w: int,
+                  h: int,
                   a: float,
                   b: float,
                   c: float,
                   d: float,
                   e: float,
                   f: float,
-                  fill_value: float):
+                  value: float):
     """
      The Image Transform function applies an affine transformation to an image.
      Optionally resizes the image (without scaling).
@@ -45,15 +45,15 @@ def img_transform(img_in: Matrix,
     
     
     
-    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
-    :param out_w: Width of the output image
-    :param out_h: Height of the output image
+    :param img: Input image as 2D matrix with top left corner at [1, 1]
+    :param w: Width of the output image
+    :param h: Height of the output image
     :param a,b,c,d,e,f: The first two rows of the affine matrix in row-major order
-    :param fill_value: The background of the image
+    :param value: The background of the image
     :return: Output image as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'out_w': out_w, 'out_h': out_h, 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f, 'fill_value': fill_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'w': w, 'h': h, 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f, 'value': value}
+    return Matrix(img.sds_context,
         'img_transform',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py
index 8020b22a54e..8d0690aaf2b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py
@@ -28,18 +28,18 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_transform_linearized(img_in: Matrix,
-                             out_w: int,
-                             out_h: int,
+def img_transform_linearized(img: Matrix,
+                             w: int,
+                             h: int,
                              a: float,
                              b: float,
                              c: float,
                              d: float,
                              e: float,
                              f: float,
-                             fill_value: float,
-                             s_cols: int,
-                             s_rows: int):
+                             value: float,
+                             sW: int,
+                             sH: int):
     """
      The Linearized Image Transform function applies an affine transformation to linearized images.
      Optionally resizes the image (without scaling).
@@ -47,15 +47,15 @@ def img_transform_linearized(img_in: Matrix,
     
     
     
-    :param img_in: Linearized input images as 2D matrix with top left corner at [1, 1]
-    :param out_w: Width of the output matrix
-    :param out_h: Height of the output matrix
+    :param img: Linearized input images as 2D matrix with top left corner at [1, 1]
+    :param w: Width of the output matrix
+    :param h: Height of the output matrix
     :param a,b,c,d,e,f: The first two rows of the affine matrix in row-major order
-    :param fill_value: The background of an image
+    :param value: The background of an image
     :return: Output images in linearized form as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'out_w': out_w, 'out_h': out_h, 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'w': w, 'h': h, 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f, 'value': value, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_transform_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_translate.py b/src/main/python/systemds/operator/algorithm/builtin/img_translate.py
index 9cfc991ca57..d4426089004 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_translate.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_translate.py
@@ -28,12 +28,12 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_translate(img_in: Matrix,
-                  offset_x: float,
-                  offset_y: float,
-                  out_w: int,
-                  out_h: int,
-                  fill_value: float):
+def img_translate(img: Matrix,
+                  offsetX: float,
+                  offsetY: float,
+                  w: int,
+                  h: int,
+                  value: float):
     """
      The Image Translate function translates the image.
      Optionally resizes the image (without scaling).
@@ -41,16 +41,16 @@ def img_translate(img_in: Matrix,
     
     
     
-    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
-    :param offset_x: The distance to move the image in x direction
-    :param offset_y: The distance to move the image in y direction
-    :param out_w: Width of the output image
-    :param out_h: Height of the output image
-    :param fill_value: The background of the image
+    :param img: Input image as 2D matrix with top left corner at [1, 1]
+    :param offsetX: The distance to move the image in x direction
+    :param offsetY: The distance to move the image in y direction
+    :param w: Width of the output image
+    :param h: Height of the output image
+    :param value: The background of the image
     :return: Output image as 2D matrix with top left corner at [1, 1]
     """
 
-    params_dict = {'img_in': img_in, 'offset_x': offset_x, 'offset_y': offset_y, 'out_w': out_w, 'out_h': out_h, 'fill_value': fill_value}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'offsetX': offsetX, 'offsetY': offsetY, 'w': w, 'h': h, 'value': value}
+    return Matrix(img.sds_context,
         'img_translate',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py
index 92098dab2c7..f5a234221bd 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py
@@ -28,32 +28,32 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def img_translate_linearized(img_in: Matrix,
-                             offset_x: float,
-                             offset_y: float,
-                             out_w: int,
-                             out_h: int,
-                             fill_value: float,
-                             o_w: int,
-                             o_h: int):
+def img_translate_linearized(img: Matrix,
+                             offsetX: float,
+                             offsetY: float,
+                             w: int,
+                             h: int,
+                             value: float,
+                             sW: int,
+                             sH: int):
     """
      This function has  the same functionality with img_translate but it handles multiple images at
      the same time. Each row of the input and output matrix represents a linearized image/matrix
      It translates the image and Optionally resizes the image (without scaling).
     
     
-    :param img_in: Input matrix/image (every row represents a linearized matrix/image)
-    :param offset_x: The distance to move the image in x direction
-    :param offset_y: The distance to move the image in y direction
-    :param out_w: Width of the output image
-    :param out_h: Height of the output image
-    :param fill_value: The background of the image
-    :param o_w: Width of the original 2D images
-    :param o_h: Height of the original 2D images
+    :param img: Input matrix/image (every row represents a linearized matrix/image)
+    :param offsetX: The distance to move the image in x direction
+    :param offsetY: The distance to move the image in y direction
+    :param w: Width of the output image
+    :param h: Height of the output image
+    :param value: The background of the image
+    :param sW: Width of the original 2D images
+    :param sH: Height of the original 2D images
     :return: Output matrix/image  (every row represents a linearized matrix/image)
     """
 
-    params_dict = {'img_in': img_in, 'offset_x': offset_x, 'offset_y': offset_y, 'out_w': out_w, 'out_h': out_h, 'fill_value': fill_value, 'o_w': o_w, 'o_h': o_h}
-    return Matrix(img_in.sds_context,
+    params_dict = {'img': img, 'offsetX': offsetX, 'offsetY': offsetY, 'w': w, 'h': h, 'value': value, 'sW': sW, 'sH': sH}
+    return Matrix(img.sds_context,
         'img_translate_linearized',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/impurityMeasures.py b/src/main/python/systemds/operator/algorithm/builtin/impurityMeasures.py
index 3b05d860827..e28840e98b7 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/impurityMeasures.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/impurityMeasures.py
@@ -43,7 +43,7 @@ def impurityMeasures(X: Matrix,
     :param Y: Target vector containing 0 and 1 values.
     :param R: Vector indicating whether a feature is categorical or continuous.
         1 denotes a continuous feature, 2 denotes a categorical feature.
-    :param n_bins: Number of bins for binning in case of scale features.
+    :param n: Number of bins for binning in case of scale features.
     :param method: String indicating the method to use; either "entropy" or "gini".
     :return: (1 x ncol(X)) row vector containing information/gini gain for
         each feature of the dataset.
diff --git a/src/main/python/systemds/operator/algorithm/builtin/imputeByFDApply.py b/src/main/python/systemds/operator/algorithm/builtin/imputeByFDApply.py
index edae45e2a40..4482a7a469a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByFDApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByFDApply.py
@@ -29,20 +29,18 @@
 
 
 def imputeByFDApply(X: Matrix,
-                    Y_imp: Matrix):
+                    imputedVec: Matrix):
     """
      Implements builtin for imputing missing values from observed values (if exist) using robust functional dependencies
     
     
     
     :param X: Matrix X
-    :param source: source attribute to use for imputation and error correction
-    :param target: attribute to be fixed
-    :param threshold: threshold value in interval [0, 1] for robust FDs
-    :return: Matrix with possible imputations
+    :param imputedVec: source attribute to use for imputation and error correction
+    :return: imputed dataset
     """
 
-    params_dict = {'X': X, 'Y_imp': Y_imp}
+    params_dict = {'X': X, 'imputedVec': imputedVec}
     return Matrix(X.sds_context,
         'imputeByFDApply',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py b/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
index fcc096180b9..d75726bbc80 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
@@ -25,13 +25,38 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def imputeByKNN(X: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Imputes missing values, indicated by NaNs, using KNN-based methods
+     (k-nearest neighbors by euclidean distance). In order to avoid NaNs in
+     distance computation and meaningful nearest neighbor search, we initialize
+     the missing values by column means. Currently, only the column with the most
+     missing values is actually imputed.
     
+     ------------------------------------------------------------------------------
+    
+    
+    :param X: Matrix with missing values, which are represented as NaNs
+    :param method: Method used for imputing missing values with different performance
+        and accuracy tradeoffs:
+        'dist' (default): Compute all-pairs distances and impute the
+        missing values by closest. O(N^2 * #features)
+        'dist_missing':   Compute distances between data and records with
+        missing values. O(N*M * #features), assuming
+        that the number of records with MV is M<<N.
+        'dist_sample':    Compute distances between sample of data and
+        records with missing values. O(S*M * #features)
+        with M<<N and S<<N, but suboptimal imputation.
+    :param seed: Root seed value for random/sample calls for deterministic behavior
+        -1 for true randomization
+    :param sampleFrac: Sample fraction for 'dist_sample' (value between 0 and 1)
+    :return: Imputed dataset
+    """
+
     params_dict = {'X': X}
     params_dict.update(kwargs)
     return Matrix(X.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/km.py b/src/main/python/systemds/operator/algorithm/builtin/km.py
index a5e95747e0e..7685b15b082 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/km.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/km.py
@@ -47,12 +47,12 @@ def km(X: Matrix,
     :param SI: Column indices of X corresponding to the factors to be used for stratifying
     :param alpha: Parameter to compute 100*(1-alpha)% confidence intervals for the survivor
         function and its median
-    :param err_type: "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
-    :param conf_type: Parameter to modify the confidence interval; "plain" keeps the lower and
+    :param err: "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
+    :param conf: Parameter to modify the confidence interval; "plain" keeps the lower and
         upper bound of the confidence interval unmodified, "log" (the default)
         corresponds to logistic transformation and "log-log" corresponds to the
         complementary log-log transformation
-    :param test_type: If survival data for multiple groups is available specifies which test to
+    :param test: If survival data for multiple groups is available specifies which test to
         perform for comparing survival data across multiple groups: "none" (the default)
         "log-rank" or "wilcoxon" test
     :return: Matrix KM whose dimension depends on the number of groups (denoted by g) and
diff --git a/src/main/python/systemds/operator/algorithm/builtin/kmeans.py b/src/main/python/systemds/operator/algorithm/builtin/kmeans.py
index 043cee95f28..f19193b19c1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/kmeans.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/kmeans.py
@@ -38,10 +38,10 @@ def kmeans(X: Matrix,
     :param X: The input Matrix to do KMeans on.
     :param k: Number of centroids
     :param runs: Number of runs (with different initial centroids)
-    :param max_iter: Maximum number of iterations per run
-    :param eps: Tolerance (epsilon) for WCSS change ratio
-    :param is_verbose: do not print per-iteration stats
-    :param avg_sample_size_per_centroid: Average number of records per centroid in data samples
+    :param maxIter: Maximum number of iterations per run
+    :param tol: Tolerance (epsilon) for WCSS change ratio
+    :param verbose: do not print per-iteration stats
+    :param avgSampleSizePerCentroid: Average number of records per centroid in data samples
     :param seed: The seed used for initial sampling. If set to -1
         random seeds are selected.
     :return: The mapping of records to centroids
diff --git a/src/main/python/systemds/operator/algorithm/builtin/knn.py b/src/main/python/systemds/operator/algorithm/builtin/knn.py
index 02014eae649..8e6ecb6fc24 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/knn.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/knn.py
@@ -31,7 +31,7 @@
 def knn(Train: Matrix,
         Test: Matrix,
         CL: Matrix,
-        START_SELECTED: Matrix,
+        initSelectFeature: Matrix,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This script implements KNN (K Nearest Neighbor) algorithm.
@@ -41,29 +41,29 @@ def knn(Train: Matrix,
     :param Train: The input matrix as features
     :param Test: The input matrix for nearest neighbor search
     :param CL: The input matrix as target
-    :param CL_T: The target type of matrix CL whether
+    :param ctypes: The target type of matrix CL whether
         columns in CL are continuous ( =1 ) or
         categorical ( =2 ) or not specified ( =0 )
-    :param trans_continuous: Option flag for continuous feature transformed to [-1,1]:
+    :param transCont: Option flag for continuous feature transformed to [-1,1]:
         FALSE = do not transform continuous variable;
         TRUE = transform continuous variable;
-    :param k_value: k value for KNN, ignore if select_k enable
-    :param select_k: Use k selection algorithm to estimate k (TRUE means yes)
-    :param k_min: Min k value(  available if select_k = 1 )
-    :param k_max: Max k value(  available if select_k = 1 )
-    :param select_feature: Use feature selection algorithm to select feature (TRUE means yes)
-    :param feature_max: Max feature selection
-    :param interval: Interval value for K selecting (  available if select_k = 1 )
-    :param feature_importance: Use feature importance algorithm to estimate each feature
+    :param k: k value for KNN, ignore if selectK enable
+    :param selectK: Use k selection algorithm to estimate k (TRUE means yes)
+    :param minK: Min k value(  available if selectK = 1 )
+    :param maxK: Max k value(  available if selectK = 1 )
+    :param selectFeature: Use feature selection algorithm to select feature (TRUE means yes)
+    :param maxFeatures: Max feature selection
+    :param interval: Interval value for K selecting (  available if selectK = 1 )
+    :param featureImportance: Use feature importance algorithm to estimate each feature
         (TRUE means yes)
-    :param predict_con_tg: Continuous  target predict function: mean(=0) or median(=1)
-    :param START_SELECTED: feature selection initial value
+    :param predictCont: Continuous target predict function: mean(=0) or median(=1)
+    :param initSelectFeature: initial feature selection matrix
     :return: Applied clusters to X
     :return: Cluster matrix
     :return: Feature importance value
     """
 
-    params_dict = {'Train': Train, 'Test': Test, 'CL': CL, 'START_SELECTED': START_SELECTED}
+    params_dict = {'Train': Train, 'Test': Test, 'CL': CL, 'initSelectFeature': initSelectFeature}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(Train.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/knnbf.py b/src/main/python/systemds/operator/algorithm/builtin/knnbf.py
index 7d1f563c990..50707084ac1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/knnbf.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/knnbf.py
@@ -38,7 +38,7 @@ def knnbf(X: Matrix,
     
     :param X: ---
     :param T: ---
-    :param k_value: ---
+    :param k: ---
     :return: ---
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
index d51f2fb9322..362c0467f6d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
@@ -41,11 +41,11 @@ def l2svm(X: Matrix,
     :param Y: Label vector y of class labels (shape: m x 1), assumed binary
         in -1/+1 or 1/2 encoding.
     :param intercept: Indicator if a bias column should be added to X and the model
-    :param epsilon: Tolerance for early termination if the reduction of objective
-        function is less than epsilon times the initial objective
+    :param tol: Tolerance (epsilon) for early termination if the reduction of objective
+        function is less than tolerance times the initial objective
     :param reg: Regularization parameter (lambda) for L2 regularization
-    :param maxIterations: Maximum number of conjugate gradient (outer) iterations
-    :param maxii: Maximum number of line search (inner) iterations
+    :param maxIter: Maximum number of conjugate gradient (outer) iterations
+    :param maxInnerIter: Maximum number of line search (inner) iterations
     :param verbose: Indicator if training details should be printed
     :param columnId: An optional class ID used in verbose print output,
         eg. used when L2SVM is used in MSVM.
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lasso.py b/src/main/python/systemds/operator/algorithm/builtin/lasso.py
index d07f6dc56f8..4fce42b771f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lasso.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lasso.py
@@ -42,7 +42,7 @@ def lasso(X: Matrix,
     :param tol: target convergence tolerance
     :param M: history length
     :param tau: regularization component
-    :param maxi: maximum number of iterations until convergence
+    :param maxIter: maximum number of iterations until convergence
     :param verbose: if the builtin should be verbose
     :return: model matrix
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
index 6d18a17302f..ccfa672ae29 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
@@ -30,9 +30,9 @@
 
 def lenetPredict(model: List,
                  X: Matrix,
-                 C: int,
-                 Hin: int,
-                 Win: int,
+                 c: int,
+                 h: int,
+                 w: int,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This builtin function makes prediction given data and trained LeNet model
@@ -40,15 +40,15 @@ def lenetPredict(model: List,
     
     
     :param model: Trained LeNet model
-    :param X: Input data matrix, of shape (N, C*Hin*Win)
-    :param C: Number of input channels
-    :param Hin: Input height
-    :param Win: Input width
-    :param batch_size: Batch size
+    :param X: Input data matrix, of shape (N, c*h*w)
+    :param c: Number of input channels
+    :param h: Input height
+    :param w: Input width
+    :param batchSize: Batch size
     :return: Predicted values
     """
 
-    params_dict = {'model': model, 'X': X, 'C': C, 'Hin': Hin, 'Win': Win}
+    params_dict = {'model': model, 'X': X, 'c': c, 'h': h, 'w': w}
     params_dict.update(kwargs)
     return Matrix(model.sds_context,
         'lenetPredict',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
index 6dda21d48ad..3f5c04c82f0 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
@@ -30,11 +30,11 @@
 
 def lenetTrain(X: Matrix,
                Y: Matrix,
-               X_val: Matrix,
-               Y_val: Matrix,
-               C: int,
-               Hin: int,
-               Win: int,
+               Xtest: Matrix,
+               Ytest: Matrix,
+               c: int,
+               h: int,
+               w: int,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This builtin function trains LeNet CNN. The architecture of the
@@ -43,14 +43,14 @@ def lenetTrain(X: Matrix,
     
     
     
-    :param X: Input data matrix, of shape (N, C*Hin*Win)
+    :param X: Input data matrix, of shape (N, c*h*w)
     :param Y: Target matrix, of shape (N, K)
-    :param X_val: Validation data matrix, of shape (N, C*Hin*Win)
-    :param Y_val: Validation target matrix, of shape (N, K)
-    :param C: Number of input channels (dimensionality of input depth)
-    :param Hin: Input width
-    :param Win: Input height
-    :param batch_size: Batch size
+    :param Xtest: Validation data matrix, of shape (N, c*h*w)
+    :param Ytest: Validation target matrix, of shape (N, K)
+    :param c: Number of input channels (dimensionality of input depth)
+    :param h: Input width
+    :param w: Input height
+    :param batchSize: Batch size
     :param epochs: Number of epochs
     :param lr: Learning rate
     :param mu: Momentum value
@@ -61,7 +61,7 @@ def lenetTrain(X: Matrix,
     :return: Trained model which can be used in lenetPredict
     """
 
-    params_dict = {'X': X, 'Y': Y, 'X_val': X_val, 'Y_val': Y_val, 'C': C, 'Hin': Hin, 'Win': Win}
+    params_dict = {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'c': c, 'h': h, 'w': w}
     params_dict.update(kwargs)
     return Matrix(X.sds_context,
         'lenetTrain',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lm.py b/src/main/python/systemds/operator/algorithm/builtin/lm.py
index e67bbb366fc..0370e95f35f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lm.py
@@ -40,12 +40,12 @@ def lm(X: Matrix,
     
     :param X: Matrix of feature vectors.
     :param y: 1-column matrix of response values.
-    :param icpt: Intercept presence, shifting and rescaling the columns of X
+    :param intercept: Intercept presence, shifting and rescaling the columns of X
     :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero
         for highly dependant/sparse/numerous features
     :param tol: Tolerance (epsilon); conjugate gradient procedure terminates early if L2
         norm of the beta-residual is less than tolerance * its initial norm
-    :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum
+    :param maxIter: Maximum number of conjugate gradient iterations. 0 = no maximum
     :param verbose: If TRUE print messages are activated
     :return: The model fit beta that can be used as input in lmPredict
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmCG.py b/src/main/python/systemds/operator/algorithm/builtin/lmCG.py
index c45d3cf93dd..74e824e58d7 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lmCG.py
@@ -38,12 +38,12 @@ def lmCG(X: Matrix,
     
     :param X: Matrix of feature vectors.
     :param y: 1-column matrix of response values.
-    :param icpt: Intercept presence, shifting and rescaling the columns of X
+    :param intercept: Intercept presence, shifting and rescaling the columns of X
     :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero
         for highly dependant/sparse/numerous features
     :param tol: Tolerance (epsilon) conjugate gradient procedure terminates early if L2
         norm of the beta-residual is less than tolerance * its initial norm
-    :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum
+    :param maxIter: Maximum number of conjugate gradient iterations. 0 = no maximum
     :param verbose: If TRUE print messages are activated
     :return: The model fit beta that can be used as input in lmPredict
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmDS.py b/src/main/python/systemds/operator/algorithm/builtin/lmDS.py
index 0c6580aa335..c439e8a2be6 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lmDS.py
@@ -38,12 +38,11 @@ def lmDS(X: Matrix,
     
     :param X: Matrix of feature vectors.
     :param y: 1-column matrix of response values.
-    :param icpt: Intercept presence, shifting and rescaling the columns of X
+    :param intercept: Intercept presence, shifting and rescaling the columns of X
     :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero
         for highly dependant/sparse/numerous features
     :param tol: Tolerance (epsilon) conjugate gradient procedure terminates early if L2
         norm of the beta-residual is less than tolerance * its initial norm
-    :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum
     :param verbose: If TRUE print messages are activated
     :return: The model fit beta that can be used as input in lmPredict
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
index cc6ac921861..8506c4e7e00 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
@@ -38,9 +38,9 @@ def lmPredict(X: Matrix,
     
     :param X: Matrix of feature vectors
     :param B: 1-column matrix of weights.
-    :param ytest: test labels, used only for verbose output. can be set to matrix(0,1,1)
+    :param Ytest: test labels, used only for verbose output. can be set to matrix(0,1,1)
         if verbose output is not wanted
-    :param icpt: Intercept presence, shifting and rescaling the columns of X
+    :param intercept: Intercept presence, shifting and rescaling the columns of X
     :param verbose: If TRUE print messages are activated
     :return: 1-column matrix of classes
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py b/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py
index df57dcf2a70..6cfa247c73b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py
@@ -28,8 +28,8 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def lmPredictStats(yhat: Matrix,
-                   ytest: Matrix,
+def lmPredictStats(Y: Matrix,
+                   Ytest: Matrix,
                    lm: bool):
     """
      This builtin function computes and prints a summary of accuracy
@@ -37,13 +37,13 @@ def lmPredictStats(yhat: Matrix,
     
     
     
-    :param yhat: A column vector of predicted response values y
-    :param ytest: A column vector of actual response values y
+    :param Y: A column vector of predicted response values y
+    :param Ytest: A column vector of actual response values y
     :param lm: An indicator if used for linear regression model
     :return: A column vector holding avg_res, ss_avg_res, and R2
     """
 
-    params_dict = {'yhat': yhat, 'ytest': ytest, 'lm': lm}
-    return Matrix(yhat.sds_context,
+    params_dict = {'Y': Y, 'Ytest': Ytest, 'lm': lm}
+    return Matrix(Y.sds_context,
         'lmPredictStats',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/matrixProfile.py b/src/main/python/systemds/operator/algorithm/builtin/matrixProfile.py
index 8d387bc2297..237d008a124 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/matrixProfile.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/matrixProfile.py
@@ -46,11 +46,11 @@ def matrixProfile(ts: Matrix,
     
     
     :param ts: Time series to profile
-    :param window_size: Sliding window size
-    :param sample_percent: Degree of approximation
+    :param windowSize: Sliding window size
+    :param sampleFrac: Degree of approximation
         between zero and one (1
         computes the exact solution)
-    :param is_verbose: Print debug information
+    :param verbose: Print debug information
     :return: The computed matrix profile
     :return: Indices of least distances
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/mdedup.py b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
index 85d93d5c2cc..cbcc15d43b2 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/mice.py b/src/main/python/systemds/operator/algorithm/builtin/mice.py
index c046b6c1b0b..ab1d9ac66c3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/mice.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mice.py
@@ -29,7 +29,7 @@
 
 
 def mice(X: Matrix,
-         cMask: Matrix,
+         ctypes: Matrix,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This Builtin function implements multiple imputation using Chained Equations (MICE)
@@ -41,8 +41,8 @@ def mice(X: Matrix,
     
     
     :param X: Data Matrix (Recoded Matrix for categorical features)
-    :param cMask: A 0/1 row vector for identifying numeric (0) and categorical features (1)
-    :param iter: Number of iteration for multiple imputations
+    :param ctypes: A 0/1 row vector for identifying numeric (0) and categorical features (1)
+    :param maxIter: Number of iteration for multiple imputations
     :param threshold: confidence value [0, 1] for robust imputation, values will only be imputed
         if the predicted value has probability greater than threshold,
         only applicable for categorical data
@@ -50,7 +50,7 @@ def mice(X: Matrix,
     :return: imputed dataset
     """
 
-    params_dict = {'X': X, 'cMask': cMask}
+    params_dict = {'X': X, 'ctypes': ctypes}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
index 187437e53d5..a930e319dd1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
@@ -43,7 +43,7 @@ def miceApply(X: Matrix,
     
     
     :param X: Data Matrix (Recoded Matrix for categorical features)
-    :param mtea: A meta matrix with each rows storing values 1) mask of original matrix,
+    :param meta: A meta matrix with each rows storing values 1) mask of original matrix,
         2) information of columns with missing values on  original data 0 for no missing value in column and 1 otherwise
         3) dist values in each columns in original data 1 for continuous columns and colMax for categorical
     :param threshold: confidence value [0, 1] for robust imputation, values will only be imputed
diff --git a/src/main/python/systemds/operator/algorithm/builtin/msvm.py b/src/main/python/systemds/operator/algorithm/builtin/msvm.py
index 3ad04ae92b7..400d1ccc688 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/msvm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/msvm.py
@@ -42,10 +42,10 @@ def msvm(X: Matrix,
     :param Y: Label vector y of class labels (shape: m x 1),
         where max(Y) is assumed to be the number of classes
     :param intercept: Indicator if a bias column should be added to X and the model
-    :param epsilon: Tolerance for early termination if the reduction of objective
-        function is less than epsilon times the initial objective
+    :param tol: Tolerance for early termination if the reduction of objective
+        function is less than tolerance times the initial objective
     :param reg: Regularization parameter (lambda) for L2 regularization
-    :param maxIterations: Maximum number of conjugate gradient (outer l2svm) iterations
+    :param maxIter: Maximum number of conjugate gradient (outer l2svm) iterations
     :param verbose: Indicator if training details should be printed
     :return: Trained model/weights (shape: n x max(Y), w/ intercept: n+1)
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/multiLogReg.py b/src/main/python/systemds/operator/algorithm/builtin/multiLogReg.py
index 36411919266..0ca680ef5c6 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/multiLogReg.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/multiLogReg.py
@@ -41,13 +41,13 @@ def multiLogReg(X: Matrix,
     
     :param X: Location to read the matrix of feature vectors
     :param Y: Location to read the matrix with category labels
-    :param icpt: Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+    :param intercept: Intercept presence, shifting and rescaling X columns: 0 = no intercept,
         no shifting, no rescaling; 1 = add intercept, but neither shift nor
         rescale X; 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param tol: tolerance ("epsilon")
     :param reg: regularization parameter (lambda = 1/C); intercept is not regularized
-    :param maxi: max. number of outer (Newton) iterations
-    :param maxii: max. number of inner (conjugate gradient) iterations, 0 = no max
+    :param maxIter: max. number of outer (Newton) iterations
+    :param maxInnerIter: max. number of inner (conjugate gradient) iterations, 0 = no max
     :param verbose: flag specifying if logging information should be printed
     :return: regression betas as output for prediction
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/outlierByArima.py b/src/main/python/systemds/operator/algorithm/builtin/outlierByArima.py
index 667cefe2712..f8aa6fc51d7 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/outlierByArima.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/outlierByArima.py
@@ -47,7 +47,7 @@ def outlierByArima(X: Matrix,
     :param D: seasonal differencing order
     :param Q: seasonal MA order
     :param s: period in terms of number of time-steps
-    :param include_mean: If the mean should be included
+    :param includeMean: If the mean should be included
     :param solver: solver, is either "cg" or "jacobi"
     :return: Matrix X with no outliers
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/outlierByIQR.py b/src/main/python/systemds/operator/algorithm/builtin/outlierByIQR.py
index a4ae5f1c718..b258dc39e1f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/outlierByIQR.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/outlierByIQR.py
@@ -30,7 +30,7 @@
 
 def outlierByIQR(X: Matrix,
                  k: float,
-                 max_iterations: int,
+                 maxIter: int,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Builtin function for detecting and repairing outliers using standard deviation 
@@ -39,17 +39,16 @@ def outlierByIQR(X: Matrix,
     
     :param X: Matrix X
     :param k: a constant used to discern outliers k*IQR
-    :param isIterative: iterative repair or single repair
     :param repairMethod: values: 0 = delete rows having outliers,
         1 = replace outliers with zeros
         2 = replace outliers as missing values
-    :param max_iterations: values: 0 = arbitrary number of iteraition until all outliers are removed,
+    :param maxIter: values: 0 = arbitrary number of iteraition until all outliers are removed,
         n = any constant defined by user
     :param verbose: flag specifying if logging information should be printed
     :return: Matrix X with no outliers
     """
 
-    params_dict = {'X': X, 'k': k, 'max_iterations': max_iterations}
+    params_dict = {'X': X, 'k': k, 'maxIter': maxIter}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/outlierBySd.py b/src/main/python/systemds/operator/algorithm/builtin/outlierBySd.py
index fee0c87196a..54551b0c774 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/outlierBySd.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/outlierBySd.py
@@ -29,7 +29,7 @@
 
 
 def outlierBySd(X: Matrix,
-                max_iterations: int,
+                maxIter: int,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      Builtin function for detecting and repairing outliers using standard deviation
@@ -40,12 +40,12 @@ def outlierBySd(X: Matrix,
     :param k: threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
     :param repairMethod: values: 0 = delete rows having outliers, 1 = replace outliers as  zeros
         2 = replace outliers as missing values
-    :param max_iterations: values: 0 = arbitrary number of iteration until all outliers are removed,
+    :param maxIter: values: 0 = arbitrary number of iteration until all outliers are removed,
         n = any constant defined by user
     :return: Matrix X with no outliers
     """
 
-    params_dict = {'X': X, 'max_iterations': max_iterations}
+    params_dict = {'X': X, 'maxIter': maxIter}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/outlierBySdApply.py b/src/main/python/systemds/operator/algorithm/builtin/outlierBySdApply.py
index 4ef272610de..7719aba8e94 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/outlierBySdApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/outlierBySdApply.py
@@ -41,12 +41,9 @@ def outlierBySdApply(X: Matrix,
     :param X: Matrix X
     :param colMean: Matrix X
     :param k: a constant used to discern outliers k*IQR
-    :param isIterative: iterative repair or single repair
     :param repairMethod: values: 0 = delete rows having outliers,
         1 = replace outliers with zeros
         2 = replace outliers as missing values
-    :param max_iterations: values: 0 = arbitrary number of iteraition until all outliers are removed,
-        n = any constant defined by user
     :param verbose: flag specifying if logging information should be printed
     :return: Matrix X with no outliers
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/pageRank.py b/src/main/python/systemds/operator/algorithm/builtin/pageRank.py
index 24728148226..54c9ed26cc2 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/pageRank.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/pageRank.py
@@ -41,7 +41,7 @@ def pageRank(G: Matrix,
     :param e: additional customization, default vector of ones
     :param u: personalization vector (number of nodes), default vector of ones
     :param alpha: teleport probability
-    :param max_iter: maximum number of iterations
+    :param maxIter: maximum number of iterations
     :param seed: seed for default rand initialization of page rank vector
     :return: computed pagerank
     """
diff --git a/src/main/python/systemds/operator/algorithm/builtin/pnmf.py b/src/main/python/systemds/operator/algorithm/builtin/pnmf.py
index 7f2b0936b59..c783cd088e8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/pnmf.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/pnmf.py
@@ -29,7 +29,7 @@
 
 
 def pnmf(X: Matrix,
-         rnk: int,
+         rank: int,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      The pnmf-function implements Poisson Non-negative Matrix Factorization (PNMF). Matrix X is factorized into two
@@ -43,15 +43,15 @@ def pnmf(X: Matrix,
     
     
     :param X: Matrix of feature vectors.
-    :param rnk: Number of components into which matrix X is to be factored.
-    :param eps: Tolerance
-    :param maxi: Maximum number of conjugate gradient iterations.
+    :param rank: Number of components into which matrix X is to be factored.
+    :param tol: Tolerance
+    :param maxIter: Maximum number of conjugate gradient iterations.
     :param verbose: If TRUE, 'iter' and 'obj' are printed.
     :return: List of pattern matrices, one for each repetition.
     :return: List of amplitude matrices, one for each repetition.
     """
 
-    params_dict = {'X': X, 'rnk': rnk}
+    params_dict = {'X': X, 'rank': rank}
     params_dict.update(kwargs)
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ppca.py b/src/main/python/systemds/operator/algorithm/builtin/ppca.py
index 83793bc42b0..e63af59d421 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ppca.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ppca.py
@@ -39,9 +39,9 @@ def ppca(X: Matrix,
     
     :param X: n x m input feature matrix
     :param k: indicates dimension of the new vector space constructed from eigen vectors
-    :param maxi: maximum number of iterations until convergence
-    :param tolobj: objective function tolerance value to stop ppca algorithm
-    :param tolrecerr: reconstruction error tolerance value to stop the algorithm
+    :param maxIter: maximum number of iterations until convergence
+    :param tol: objective function tolerance value to stop ppca algorithm
+    :param tolRecon: reconstruction error tolerance value to stop the algorithm
     :param verbose: verbose debug output
     :return: Output feature matrix with K columns
     :return: Output dominant eigen vectors (can be used for projections)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py b/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py
new file mode 100644
index 00000000000..7d49d51825a
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py
@@ -0,0 +1,83 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/quantizeByCluster.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def quantizeByCluster(X: Matrix,
+                      **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     The quantizeByCluster-function implements product quantization. Initially, it
+     divides the original vector space into M subspaces. The resulting lower dimensional
+     subvectors are then quantized. If the column count is not divisible by the number of
+     subspaces M, the data is padded with zeros. Optimal space decomposition can be
+     computed, when the data follows a Gaussian distribution. The function uses kmeans for
+     quantizing and svd to compute the space decomposition.
+    
+    
+    
+    :param X: The input matrix to perform product quantization on
+    :param M: Number of subspaces
+    :param k: Number of vectors in the subcodebooks
+    :param runs: Number of runs (with different initial centroids)
+    :param maxIter: Maximum number of iterations per run
+    :param tol: Tolerance (epsilon) for WCSS change ratio
+    :param avgSampleSizePerCentroid: Average number of records per centroid in data samples
+    :param separate: Cluster subspaces separately. If value is set to true,
+        kmeans is run M times, once for each subspace. Otherwise
+        kmeans is run only once.
+    :param spaceDecomp: Decompose the vector space by multiplying the input
+        matrix X with an orthogonal matrix R. Assumes the data
+        follows a parametric Gaussian distribution.
+        Time complexity in O(nrow(X)^2 * min(nrow(X), ncol(X))).
+    :param seed: The seed used for initial sampling. If set to -1 random
+        seeds are selected.
+    :return: The matrix containing the centroids. If clustered separately, the ith
+        subcodebook is the ith chunk of size k. The codebook matrix has the dimensions
+        [k*M x ncol(X)/M].
+    :return: The mapping of vectors to centroids. Each vector of the input matrix X is mapped
+        onto a vector of codes. The entries in the codes matrix are the indices of
+        the vectors in the codebook. The codes matrix has the dimensions [nrow(X) x M].
+    :return: The orthogonal matrix R which is applied to the input matrix X before performing
+        the product quantization. Only relevant when spaceDecomp = TRUE.
+    """
+
+    params_dict = {'X': X}
+    params_dict.update(kwargs)
+    
+    vX_0 = Matrix(X.sds_context, '')
+    vX_1 = Matrix(X.sds_context, '')
+    vX_2 = Matrix(X.sds_context, '')
+    output_nodes = [vX_0, vX_1, vX_2, ]
+
+    op = MultiReturn(X.sds_context, 'quantizeByCluster', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+    vX_2._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
index 88b1c9145b8..4d73a7bb019 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
@@ -55,7 +55,7 @@ def randomForest(X: Matrix,
         [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
         |(L1)| |  (L2)   | |        (L3)         |
     
-       With feature sampling (feature_frac < 1), each tree is
+       With feature sampling (featureFrac < 1), each tree is
        prefixed by a one-hot vector of sampled features
        (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
     
@@ -66,16 +66,16 @@ def randomForest(X: Matrix,
     :param y: Label matrix in recoded/binned representation
     :param ctypes: Row-Vector of column types [1 scale/ordinal, 2 categorical]
         of shape 1-by-(ncol(X)+1), where the last entry is the y type
-    :param num_trees: Number of trees to be learned in the random forest model
-    :param sample_frac: Sample fraction of examples for each tree in the forest
-    :param feature_frac: Sample fraction of features for each tree in the forest
-    :param max_depth: Maximum depth of the learned tree (stopping criterion)
-    :param min_leaf: Minimum number of samples in leaf nodes (stopping criterion)
-    :param min_split: Minimum number of samples in leaf for attempting a split
-    :param max_features: Parameter controlling the number of features used as split
-        candidates at tree nodes: m = ceil(num_features^max_features)
-    :param max_values: Parameter controlling the number of values per feature used
-        as split candidates: nb = ceil(num_values^max_values)
+    :param numTrees: Number of trees to be learned in the random forest model
+    :param sampleFrac: Sample fraction of examples for each tree in the forest
+    :param featureFrac: Sample fraction of features for each tree in the forest
+    :param maxDepth: Maximum depth of the learned tree (stopping criterion)
+    :param minLeaf: Minimum number of samples in leaf nodes (stopping criterion)
+    :param minSplit: Minimum number of samples in leaf for attempting a split
+    :param maxFeatures: Parameter controlling the number of features used as split
+        candidates at tree nodes: m = ceil(numFeatures^maxFeatures)
+    :param maxValues: Parameter controlling the number of values per feature used
+        as split candidates: nb = ceil(num_values^maxValues)
     :param impurity: Impurity measure: entropy, gini (default), rss (regression)
     :param seed: Fixed seed for randomization of samples and split candidates
     :param verbose: Flag indicating verbose debug output
diff --git a/src/main/python/systemds/operator/algorithm/builtin/selectByVarThresh.py b/src/main/python/systemds/operator/algorithm/builtin/selectByVarThresh.py
index f7bc5e80b9c..4c6abf064be 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/selectByVarThresh.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/selectByVarThresh.py
@@ -36,7 +36,7 @@ def selectByVarThresh(X: Matrix,
     
     
     :param X: Matrix of feature vectors.
-    :param thresh: The threshold for to drop
+    :param threshold: The threshold for to drop
     :return: Matrix of feature vectors with <= thresh variance.
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py b/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py
new file mode 100644
index 00000000000..9f9ed8185f2
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py
@@ -0,0 +1,78 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/shapExplainer.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def shapExplainer(model: str,
+                  modelArgs: List,
+                  X: Matrix,
+                  Xbg: Matrix,
+                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Computes shapley values for multiple instances in parallel using antithetic permutation sampling.
+     The resulting matrix phis holds the shapley values for each feature in the column given by the index of the feature in the sample.
+    
+     This method first creates two large matrices for masks and masked background data for all permutations and
+     then runs in paralell on all instances in x.
+     While the prepared matrices can become very large (2 * #features * #permuations * #n_samples * #features),
+     the preparation of a row for the model call breaks down to a single element-wise multiplication of this mask with the row and
+     an addition to the masked background data, since masks can be reused for each instance.
+    
+    
+    
+    :param model: The function of the model to be evaluated as a String. This function has to take a matrix of samples
+        and return a vector of predictions.
+        It might be usefull to wrap the model into a function the takes and returns the desired shapes and
+        use this wrapper here.
+    :param modelArgs: Arguments in order for the model, if desired. This will be prepended by the created instances-matrix.
+    :param X: Multiple instances as rows for which to compute the shapley values.
+    :param Xbg: The background dataset from which to pull the random samples to perform Monte Carlo integration.
+    :param nPermutations: The number of permutaions. Defaults to 10. Theoretical 1 should already be enough for models with up
+        to second order interaction effects.
+    :param nSamples: Number of samples from Xbg used for marginalization.
+    :param removeNonVar: EXPERIMENTAL: If set, for every instance the varaince of each feature is checked against this feature in the
+        background data. If it does not change, we do not run any model cals for it.
+    :param seed: A seed, in case the sampling has to be deterministic.
+    :param verbose: A boolean to enable logging of each step of the function.
+    :return: Matrix holding the shapley values along the cols, one row per instance.
+    :return: Double holding the average prediction of all instances.
+    """
+
+    params_dict = {'model': model, 'modelArgs': modelArgs, 'X': X, 'Xbg': Xbg}
+    params_dict.update(kwargs)
+    
+    vX_0 = Matrix(model.sds_context, '')
+    vX_1 = Scalar(model.sds_context, '')
+    output_nodes = [vX_0, vX_1, ]
+
+    op = MultiReturn(model.sds_context, 'shapExplainer', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/sherlock.py b/src/main/python/systemds/operator/algorithm/builtin/sherlock.py
index 667c50eeafb..7c8ab0963f6 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/sherlock.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/sherlock.py
@@ -28,8 +28,8 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def sherlock(X_train: Matrix,
-             y_train: Matrix):
+def sherlock(X: Matrix,
+             Y: Matrix):
     """
      This function implements training phase of Sherlock: A Deep Learning Approach to Semantic Data Type Detection
     
@@ -42,8 +42,8 @@ def sherlock(X_train: Matrix,
     
     
     
-    :param X_train: matrix of feature vectors
-    :param y_train: matrix Y of class labels of semantic data type
+    :param X: matrix of feature vectors
+    :param Y: matrix Y of class labels of semantic data type
     :return: weights (parameters) matrices for character distributions
     :return: biases vectors for character distributions
     :return: weights (parameters) matrices for word embeddings
@@ -56,41 +56,41 @@ def sherlock(X_train: Matrix,
     :return: biases vectors for combining all trained features (final)
     """
 
-    params_dict = {'X_train': X_train, 'y_train': y_train}
+    params_dict = {'X': X, 'Y': Y}
     
-    vX_0 = Matrix(X_train.sds_context, '')
-    vX_1 = Matrix(X_train.sds_context, '')
-    vX_2 = Matrix(X_train.sds_context, '')
-    vX_3 = Matrix(X_train.sds_context, '')
-    vX_4 = Matrix(X_train.sds_context, '')
-    vX_5 = Matrix(X_train.sds_context, '')
-    vX_6 = Matrix(X_train.sds_context, '')
-    vX_7 = Matrix(X_train.sds_context, '')
-    vX_8 = Matrix(X_train.sds_context, '')
-    vX_9 = Matrix(X_train.sds_context, '')
-    vX_10 = Matrix(X_train.sds_context, '')
-    vX_11 = Matrix(X_train.sds_context, '')
-    vX_12 = Matrix(X_train.sds_context, '')
-    vX_13 = Matrix(X_train.sds_context, '')
-    vX_14 = Matrix(X_train.sds_context, '')
-    vX_15 = Matrix(X_train.sds_context, '')
-    vX_16 = Matrix(X_train.sds_context, '')
-    vX_17 = Matrix(X_train.sds_context, '')
-    vX_18 = Matrix(X_train.sds_context, '')
-    vX_19 = Matrix(X_train.sds_context, '')
-    vX_20 = Matrix(X_train.sds_context, '')
-    vX_21 = Matrix(X_train.sds_context, '')
-    vX_22 = Matrix(X_train.sds_context, '')
-    vX_23 = Matrix(X_train.sds_context, '')
-    vX_24 = Matrix(X_train.sds_context, '')
-    vX_25 = Matrix(X_train.sds_context, '')
-    vX_26 = Matrix(X_train.sds_context, '')
-    vX_27 = Matrix(X_train.sds_context, '')
-    vX_28 = Matrix(X_train.sds_context, '')
-    vX_29 = Matrix(X_train.sds_context, '')
+    vX_0 = Matrix(X.sds_context, '')
+    vX_1 = Matrix(X.sds_context, '')
+    vX_2 = Matrix(X.sds_context, '')
+    vX_3 = Matrix(X.sds_context, '')
+    vX_4 = Matrix(X.sds_context, '')
+    vX_5 = Matrix(X.sds_context, '')
+    vX_6 = Matrix(X.sds_context, '')
+    vX_7 = Matrix(X.sds_context, '')
+    vX_8 = Matrix(X.sds_context, '')
+    vX_9 = Matrix(X.sds_context, '')
+    vX_10 = Matrix(X.sds_context, '')
+    vX_11 = Matrix(X.sds_context, '')
+    vX_12 = Matrix(X.sds_context, '')
+    vX_13 = Matrix(X.sds_context, '')
+    vX_14 = Matrix(X.sds_context, '')
+    vX_15 = Matrix(X.sds_context, '')
+    vX_16 = Matrix(X.sds_context, '')
+    vX_17 = Matrix(X.sds_context, '')
+    vX_18 = Matrix(X.sds_context, '')
+    vX_19 = Matrix(X.sds_context, '')
+    vX_20 = Matrix(X.sds_context, '')
+    vX_21 = Matrix(X.sds_context, '')
+    vX_22 = Matrix(X.sds_context, '')
+    vX_23 = Matrix(X.sds_context, '')
+    vX_24 = Matrix(X.sds_context, '')
+    vX_25 = Matrix(X.sds_context, '')
+    vX_26 = Matrix(X.sds_context, '')
+    vX_27 = Matrix(X.sds_context, '')
+    vX_28 = Matrix(X.sds_context, '')
+    vX_29 = Matrix(X.sds_context, '')
     output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, vX_9, vX_10, vX_11, vX_12, vX_13, vX_14, vX_15, vX_16, vX_17, vX_18, vX_19, vX_20, vX_21, vX_22, vX_23, vX_24, vX_25, vX_26, vX_27, vX_28, vX_29, ]
 
-    op = MultiReturn(X_train.sds_context, 'sherlock', output_nodes, named_input_nodes=params_dict)
+    op = MultiReturn(X.sds_context, 'sherlock', output_nodes, named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
diff --git a/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py b/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py
index f2e0afcaa84..1a83e47e412 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py
@@ -45,7 +45,7 @@ def shortestPath(G: Matrix,
         The values of G can be 0/1 (just specifying whether the nodes
         are connected or not) or integer values (representing the weight
         of the edges or the distances between nodes, 0 if not connected).
-    :param maxi: Integer max number of iterations accepted (0 for FALSE, i.e.
+    :param maxIter: Integer max number of iterations accepted (0 for FALSE, i.e.
         max number of iterations not defined)
     :param sourceNode: node index to calculate the shortest paths to all other nodes.
     :param verbose: flag for verbose debug output
diff --git a/src/main/python/systemds/operator/algorithm/builtin/steplm.py b/src/main/python/systemds/operator/algorithm/builtin/steplm.py
index 6be7e25df07..49f58d84013 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/steplm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/steplm.py
@@ -39,12 +39,12 @@ def steplm(X: Matrix,
     
      .. code-block:: 
     
-       return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-               OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-       icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-       icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-       icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-                              Col.2: betas for shifted/rescaled X and intercept
+       return: Matrix of regression parameters (the betas) and its size depend on intercept input value:
+                    OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+       intercept=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+       intercept=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+       intercept=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+                                   Col.2: betas for shifted/rescaled X and intercept
     
      In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
      name-value pair per each line, as follows:
@@ -53,15 +53,15 @@ def steplm(X: Matrix,
     
     :param X: Matrix X of feature vectors
     :param Y: Single-column Matrix Y of response values
-    :param icpt: Intercept presence, shifting and rescaling the columns of X:
+    :param intercept: Intercept presence, shifting and rescaling the columns of X:
         0 = no intercept, no shifting, no rescaling;
         1 = add intercept, but neither shift nor rescale X;
         2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param reg: Regularization parameter, 0 for no penalty
     :param tol: Tolerance threshold to train until achieved
-    :param maxi: Maximum iterations 0 means until tolerance is reached
+    :param maxIter: Maximum iterations 0 means until tolerance is reached
     :param verbose: Indicator for verbose debug output
-    :return: Matrix of regression parameters (the betas) and its size depend on icpt input value.
+    :return: Matrix of regression parameters (the betas) and its size depend on intercept input value.
     :return: Matrix of selected features ordered as computed by the algorithm.
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/tSNE.py b/src/main/python/systemds/operator/algorithm/builtin/tSNE.py
index 05e7049e2a6..b94e14f9158 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/tSNE.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/tSNE.py
@@ -48,18 +48,18 @@ def tSNE(X: Matrix,
     
     :param X: Data Matrix of shape
         (number of data points, input dimensionality)
-    :param reduced_dims: Output dimensionality
+    :param reducedDims: Output dimensionality
     :param perplexity: Perplexity Parameter
     :param lr: Learning rate
     :param momentum: Momentum Parameter
-    :param max_iter: Number of iterations
+    :param maxIter: Number of iterations
     :param tol: Tolerance for early stopping in gradient descent
     :param seed: The seed used for initial values.
         If set to -1 random seeds are selected.
-    :param is_verbose: Print debug information
-    :param print_iter: Intervals of printing out the L1 norm values. Parameter not relevant if
-        is_verbose = FALSE.
-    :return: Data Matrix of shape (number of data points, reduced_dims)
+    :param verbose: Print debug information
+    :param printIter: Intervals of printing out the L1 norm values. Parameter not relevant if
+        verbose = FALSE.
+    :return: Data Matrix of shape (number of data points, reducedDims)
     """
 
     params_dict = {'X': X}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
index 16a20d20e08..82a8a8891b9 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
@@ -38,6 +37,37 @@ def topk_cleaning(dataTrain: Frame,
     """
      This function cleans top-K item (where K is given as input)for a given list of users.
      metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
+    
+    
+    
+    :param dataTrain: Frame[Unknown]
+    :param dataTest: Frame[Unknown]
+    :param metaData: Frame[Unknown]
+    :param primitives: Frame[Unknown]
+    :param parameters: Frame[Unknown]
+    :param refSol: Frame[Unknown]
+    :param evaluationFunc: String
+    :param evalFunHp: Matrix[Double]
+    :param topK: Integer
+    :param resourceVal: Integer
+    :param maxIter: Integer
+    :param lq: Double
+    :param uq: Double
+    :param sample: Double
+    :param expectedIncrease: Double
+    :param seed: Integer
+    :param cv: Boolean
+    :param cvk: Integer
+    :param isLastLabel: Boolean
+    :param rowCount: Integer
+    :param correctTypos: Boolean
+    :param enablePruning: Boolean
+    :return: ---
+    :return: ---
+    :return: ---
+    :return: ---
+    :return: ---
+    :return: ---
     """
 
     params_dict = {'dataTrain': dataTrain, 'primitives': primitives, 'parameters': parameters, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/univar.py b/src/main/python/systemds/operator/algorithm/builtin/univar.py
index ecb814c3741..39f0df09752 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/univar.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/univar.py
@@ -29,19 +29,19 @@
 
 
 def univar(X: Matrix,
-           types: Matrix):
+           ctypes: Matrix):
     """
      Computes univariate statistics for all attributes in a given data set
     
     
     
     :param X: Input matrix of the shape (N, D)
-    :param TYPES: Matrix of the shape (1, D) with features types:
+    :param CTYPES: Row-Vector of the shape (1, D) of column types:
         1 for scale, 2 for nominal, 3 for ordinal
     :return: univariate statistics for all attributes
     """
 
-    params_dict = {'X': X, 'types': types}
+    params_dict = {'X': X, 'ctypes': ctypes}
     return Matrix(X.sds_context,
         'univar',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/wer.py b/src/main/python/systemds/operator/algorithm/builtin/wer.py
new file mode 100644
index 00000000000..99d278461cf
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/wer.py
@@ -0,0 +1,48 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/wer.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def wer(R: Frame,
+        H: Frame):
+    """
+     This built-in function computes the word error rate (WER)
+     defined as wer = (numSubst + numDel + numIns) / length(r)
+    
+    
+    
+    :param R: Input frame of reference strings, shape: [N x 1]
+    :param H: Input frame of hypothesis strings, shape: [N x 1]
+    :return: Output matrix of word error rate per pair of strings,
+        shape: [N x 1], where W[i,1] = wer(R[i,1], H[i,1])
+    """
+
+    params_dict = {'R': R, 'H': H}
+    return Matrix(R.sds_context,
+        'wer',
+        named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/winsorizeApply.py b/src/main/python/systemds/operator/algorithm/builtin/winsorizeApply.py
index caf68926233..85ee724337c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/winsorizeApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/winsorizeApply.py
@@ -29,8 +29,8 @@
 
 
 def winsorizeApply(X: Matrix,
-                   qLower: Matrix,
-                   qUpper: Matrix):
+                   QL: Matrix,
+                   QU: Matrix):
     """
      winsorizeApply takes the upper and lower quantile values per column, and
      remove outliers by replacing them with these upper and lower bound values.
@@ -38,12 +38,12 @@ def winsorizeApply(X: Matrix,
     
     
     :param X: Input feature matrix
-    :param qLower: row vector of upper bounds per column
-    :param qUpper: row vector of lower bounds per column
+    :param QL: row vector of upper bounds per column
+    :param QU: row vector of lower bounds per column
     :return: Matrix without outlier values
     """
 
-    params_dict = {'X': X, 'qLower': qLower, 'qUpper': qUpper}
+    params_dict = {'X': X, 'QL': QL, 'QU': QU}
     return Matrix(X.sds_context,
         'winsorizeApply',
         named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/xgboost.py b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py
index 454510e11d7..8762feab9de 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/xgboost.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py
@@ -61,11 +61,11 @@ def xgboost(X: Matrix,
         - R[,2]: 2 (categorical feature)
         Feature 1 is a scalar feature and features 2 is a categorical feature
         If R is not provided by default all variables are assumed to be scale (1)
-    :param sml_type: Supervised machine learning type: 1 = Regression(default), 2 = Classification
-    :param num_trees: Number of trees to be created in the xgboost model
-    :param learning_rate: Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
-    :param max_depth: Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
-    :param lambda: L2 regularization term on weights. Increasing this value will make model more conservative and reduce amount of leaves of a tree
+    :param smlType: Supervised machine learning type: 1 = Regression(default), 2 = Classification
+    :param numTrees: Number of trees to be created in the xgboost model
+    :param lr: Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
+    :param maxDepth: Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
+    :param reg: L2 regularization term on weights (lambda). Increasing this value will make model more conservative and reduce amount of leaves of a tree
     :return: Matrix M where each column corresponds to a node in the learned tree
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictClassification.py b/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictClassification.py
index 32d8e8d550e..c9db17f1b3c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictClassification.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictClassification.py
@@ -39,7 +39,7 @@ def xgboostPredictClassification(X: Matrix,
     
     :param X: Matrix of feature vectors we want to predict (X_test)
     :param M: The model created at xgboost
-    :param learning_rate: The learning rate used in the model
+    :param lr: The learning rate used in the model
     :return: The predictions of the samples using the given xgboost model. (y_prediction)
     """
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictRegression.py b/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictRegression.py
index f0add1cf891..d29df2eef1a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictRegression.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/xgboostPredictRegression.py
@@ -39,7 +39,7 @@ def xgboostPredictRegression(X: Matrix,
     
     :param X: Matrix of feature vectors we want to predict (X_test)
     :param M: The model created at xgboost
-    :param learning_rate: The learning rate used in the model
+    :param lr: The learning rate used in the model
     :return: The predictions of the samples using the given xgboost model. (y_prediction)
     """
 
diff --git a/src/main/python/tests/algorithms/test_gmm.py b/src/main/python/tests/algorithms/test_gmm.py
index cbcccc260d1..c5d80982744 100644
--- a/src/main/python/tests/algorithms/test_gmm.py
+++ b/src/main/python/tests/algorithms/test_gmm.py
@@ -49,7 +49,7 @@ def test_lm_simple(self):
         n_gaussian = 4
 
         [_, _, _, _, mu, precision_cholesky, weight] = gmm(
-            features, n_components=n_gaussian, seed=10
+            features, nComponents=n_gaussian, seed=10
         )
 
         [_, pp] = gmmPredict(
diff --git a/src/main/python/tests/algorithms/test_gmm_train_predict.py b/src/main/python/tests/algorithms/test_gmm_train_predict.py
index 12c86f6ad00..80fbb620d87 100644
--- a/src/main/python/tests/algorithms/test_gmm_train_predict.py
+++ b/src/main/python/tests/algorithms/test_gmm_train_predict.py
@@ -47,7 +47,7 @@ def train(self):
             n_gaussian = 4
 
             [_, _, _, _, mu, precision_cholesky, weight] = gmm(
-                features, n_components=n_gaussian, seed=10
+                features, nComponents=n_gaussian, seed=10
             )
 
             model = sds_train.list(mu, precision_cholesky, weight)
diff --git a/src/main/python/tests/frame/test_hyperband.py b/src/main/python/tests/frame/test_hyperband.py
index 1f4973c611b..c61a014ee66 100644
--- a/src/main/python/tests/frame/test_hyperband.py
+++ b/src/main/python/tests/frame/test_hyperband.py
@@ -61,10 +61,10 @@ def test_hyperband(self):
         paramRanges = self.sds.from_numpy(self.param_ranges)
         params = self.params
         [best_weights_mat, opt_hyper_params_df] = hyperband(
-            X_train=x_train,
-            y_train=y_train,
-            X_val=x_val,
-            y_val=y_val,
+            X=x_train,
+            Y=y_train,
+            Xtest=x_val,
+            Ytest=y_val,
             params=params,
             paramRanges=paramRanges,
             verbose=False,
diff --git a/src/test/java/org/apache/sysds/test/functions/mlcontext/MLContextLinregTest.java b/src/test/java/org/apache/sysds/test/functions/mlcontext/MLContextLinregTest.java
index 31b31933936..2fc9bd8357e 100644
--- a/src/test/java/org/apache/sysds/test/functions/mlcontext/MLContextLinregTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/mlcontext/MLContextLinregTest.java
@@ -87,7 +87,7 @@ private void runLinregTestMLC(LinregType type, boolean sparse) {
 			Script lrcg = new Script(
 				  "X = read($X);\n"
 				+ "y = read($Y);\n"
-				+ "beta_out = lmCG(X=X, y=y, icpt=$icpt, tol=$tol, maxi=$maxi, reg=$reg);\n");
+				+ "beta_out = lmCG(X=X, y=y, intercept=$icpt, tol=$tol, maxIter=$maxi, reg=$reg);\n");
 			lrcg.in("X", X).in("y", Y).in("$icpt", "0").in("$tol", "0.000001").in("$maxi", "0").in("$reg", "0.000001")
 					.out("beta_out");
 			outmat = ml.execute(lrcg).getMatrix("beta_out").toMatrixBlock();
@@ -98,7 +98,7 @@ private void runLinregTestMLC(LinregType type, boolean sparse) {
 			Script lrds = new Script(
 				  "X = read($X);\n"
 				+ "y = read($Y);\n"
-				+ "beta_out = lmDS(X=X, y=y, icpt=$icpt, reg=$reg);\n");
+				+ "beta_out = lmDS(X=X, y=y, intercept=$icpt, reg=$reg);\n");
 			lrds.in("X", X).in("y", Y).in("$icpt", "0").in("$reg", "0.000001").out("beta_out");
 			outmat = ml.execute(lrds).getMatrix("beta_out").toMatrixBlock();
 
diff --git a/src/test/scripts/applications/glm/GLM.dml b/src/test/scripts/applications/glm/GLM.dml
index f81d51c5e4e..e6099892827 100644
--- a/src/test/scripts/applications/glm/GLM.dml
+++ b/src/test/scripts/applications/glm/GLM.dml
@@ -36,7 +36,7 @@ moi = ifdef ($moi, 200);
 mii = ifdef ($mii, 0);
 
 beta_out = glm(X=X, Y=Y, dfam=dfam, vpow=vpow, link=link, lpow=lpow,
-  yneg=yneg, icpt=icpt, disp=disp, reg=reg, tol=eps, moi=moi, mii=mii);
+  yneg=yneg, intercept=icpt, disp=disp, reg=reg, tol=eps, maxIter=moi, maxInnerIter=mii);
 
 write (beta_out, $B, format=fmtB);
 
diff --git a/src/test/scripts/applications/page_rank/PageRank.dml b/src/test/scripts/applications/page_rank/PageRank.dml
index cf974f25e75..d24c7ea581b 100644
--- a/src/test/scripts/applications/page_rank/PageRank.dml
+++ b/src/test/scripts/applications/page_rank/PageRank.dml
@@ -26,7 +26,7 @@ u = read($4);
 alpha = $5;
 maxi = $6;
 
-p = pageRank(G=G, p=p, e=e, u=u, alpha=alpha, max_iter=maxi);
+p = pageRank(G=G, p=p, e=e, u=u, alpha=alpha, maxIter=maxi);
 
 write(p, $7, format="text");
 
diff --git a/src/test/scripts/component/compress/workload/functions/lmCG.dml b/src/test/scripts/component/compress/workload/functions/lmCG.dml
index ecc25182515..e6f546a0846 100644
--- a/src/test/scripts/component/compress/workload/functions/lmCG.dml
+++ b/src/test/scripts/component/compress/workload/functions/lmCG.dml
@@ -22,5 +22,5 @@
 A = read($1)
 b = rand(rows= nrow(A), cols = 1, min = 0, max =1)
 b = round(b)
-m = lmCG(X=A ,y=b, verbose=$2, icpt=$3)
+m = lmCG(X=A ,y=b, verbose=$2, intercept=$3)
 print(mean(m))
diff --git a/src/test/scripts/component/compress/workload/functions/lmDS.dml b/src/test/scripts/component/compress/workload/functions/lmDS.dml
index 3212c0410f7..102764e7746 100644
--- a/src/test/scripts/component/compress/workload/functions/lmDS.dml
+++ b/src/test/scripts/component/compress/workload/functions/lmDS.dml
@@ -22,5 +22,5 @@
 A = read($1)
 b = rand(rows= nrow(A), cols = 1, min = 0, max =1)
 b = round(b)
-m = lmDS(X=A ,y=b, verbose=$2, icpt=$3)
+m = lmDS(X=A ,y=b, verbose=$2, intercept=$3)
 print(mean(m))
diff --git a/src/test/scripts/component/resource/Algorithm_L2SVM.dml b/src/test/scripts/component/resource/Algorithm_L2SVM.dml
index 5b7b16aeccf..4d04f430eea 100644
--- a/src/test/scripts/component/resource/Algorithm_L2SVM.dml
+++ b/src/test/scripts/component/resource/Algorithm_L2SVM.dml
@@ -23,6 +23,6 @@ m = ifdef($m, 10000);
 n = ifdef($n, 10);
 X = rand(rows=m, cols=n);
 Y = X %*% rand(rows=n, cols=1);
-w = l2svm(X=X, Y=Y, intercept=1, epsilon=1e-6, reg=0.01, maxIterations=20);
+w = l2svm(X=X, Y=Y, intercept=1, tol=1e-6, reg=0.01, maxIter=20);
 print(sum(w));
 
diff --git a/src/test/scripts/component/resource/Algorithm_Linreg.dml b/src/test/scripts/component/resource/Algorithm_Linreg.dml
index ece32b91506..a5dc71532a2 100644
--- a/src/test/scripts/component/resource/Algorithm_Linreg.dml
+++ b/src/test/scripts/component/resource/Algorithm_Linreg.dml
@@ -23,6 +23,6 @@ m = ifdef($m, 10000);
 n = ifdef($n, 10);
 X = rand(rows=m, cols=n);
 Y = X %*% rand(rows=n, cols=1);
-w = lm(X=X, y=Y, icpt=2, tol=1e-8, reg=0.1, maxi=20);
+w = lm(X=X, y=Y, intercept=2, tol=1e-8, reg=0.1, maxIter=20);
 print(sum(w));
 
diff --git a/src/test/scripts/component/resource/Algorithm_PNMF.dml b/src/test/scripts/component/resource/Algorithm_PNMF.dml
index 49d72e21805..4293e8cfea6 100644
--- a/src/test/scripts/component/resource/Algorithm_PNMF.dml
+++ b/src/test/scripts/component/resource/Algorithm_PNMF.dml
@@ -24,6 +24,6 @@ n = ifdef($n, 1000);
 rank = ifdef($rank, 10);
 X = rand(rows=m, cols=n);
 
-[w, h] = pnmf(X=X, rnk=rank, verbose=FALSE);
+[w, h] = pnmf(X=X, rank=rank, verbose=FALSE);
 print(sum(w));
 
diff --git a/src/test/scripts/functions/async/CheckpointSharedOps2.dml b/src/test/scripts/functions/async/CheckpointSharedOps2.dml
index 61e2fefc2f1..ebc6b21a912 100644
--- a/src/test/scripts/functions/async/CheckpointSharedOps2.dml
+++ b/src/test/scripts/functions/async/CheckpointSharedOps2.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 X = rand(rows=13000, cols=150, seed=42); #sp_rand
-[W, H] = pnmf(X=X, rnk=100, verbose=FALSE);
+[W, H] = pnmf(X=X, rank=100, verbose=FALSE);
 
 print(sum(W %*% H));
 R = sum(W %*% H);
diff --git a/src/test/scripts/functions/async/CheckpointSharedOps3.dml b/src/test/scripts/functions/async/CheckpointSharedOps3.dml
index 7b38efa8317..7b156af0fa4 100644
--- a/src/test/scripts/functions/async/CheckpointSharedOps3.dml
+++ b/src/test/scripts/functions/async/CheckpointSharedOps3.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 X = rand(rows=1000, cols=150, seed=42); #sp_rand
-[U, V] = alsCG(X=X, reg=0.0, maxi=10, verbose=FALSE);
+[U, V] = alsCG(X=X, reg=0.0, maxIterations=10, verbose=FALSE);
 
 print(sum(U %*% V));
 R = sum(U %*% V);
diff --git a/src/test/scripts/functions/async/LineageReuseSpark3.dml b/src/test/scripts/functions/async/LineageReuseSpark3.dml
index 04c54615112..3e39881cca6 100644
--- a/src/test/scripts/functions/async/LineageReuseSpark3.dml
+++ b/src/test/scripts/functions/async/LineageReuseSpark3.dml
@@ -44,14 +44,14 @@ i = 1;
 
 for (l in 1:no_lamda)
 {
-  beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12, maxIterations=1,
-      maxii=1, reg = lamda, verbose=FALSE);
+  beta = l2svm(X=X, Y=y, intercept=FALSE, tol=1e-12, maxIter=1,
+      maxInnerIter=1, reg = lamda, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, FALSE);
   i = i + 1;
 
-  beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12, maxIterations=1,
-      maxii=1, reg = lamda, verbose=FALSE);
+  beta = l2svm(X=X, Y=y, intercept=TRUE, tol=1e-12, maxIter=1,
+      maxInnerIter=1, reg = lamda, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, TRUE);
   i = i + 1;
diff --git a/src/test/scripts/functions/async/LineageReuseSpark5.dml b/src/test/scripts/functions/async/LineageReuseSpark5.dml
index 6d6245c5882..c9fc5c940c8 100644
--- a/src/test/scripts/functions/async/LineageReuseSpark5.dml
+++ b/src/test/scripts/functions/async/LineageReuseSpark5.dml
@@ -27,9 +27,9 @@ X = rand(rows=M, cols=N, sparsity=sp, seed=42);
 y = rand(rows=M, cols=1, min=0, max=2, seed=42);
 y = ceil(y);
 
-model_svm = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
- reg=0.001, maxIterations=20, verbose=FALSE);
-model_mlr = multiLogReg(X=X, Y=y, icpt=2, tol=1e-6, reg=0.001, maxi=20, maxii=20, verbose=FALSE);
+model_svm = l2svm(X=X, Y=y, intercept=TRUE, tol=1e-12,
+ reg=0.001, maxIter=20, verbose=FALSE);
+model_mlr = multiLogReg(X=X, Y=y, intercept=2, tol=1e-6, reg=0.001, maxIter=20, maxInnerIter=20, verbose=FALSE);
 
 # Assign random weights and grid search top-k models
 bestAcc = 0;
diff --git a/src/test/scripts/functions/async/LineageReuseSpark6.dml b/src/test/scripts/functions/async/LineageReuseSpark6.dml
index 1c348f82751..477e1ec15a8 100644
--- a/src/test/scripts/functions/async/LineageReuseSpark6.dml
+++ b/src/test/scripts/functions/async/LineageReuseSpark6.dml
@@ -55,27 +55,27 @@ for (r in 1:no_bracket) {
   {
     #print("lamda = "+as.scalar(HPlamdas[i,1])+", maxIterations = "+maxIter);
     #Run L2svm with intercept true
-    /*beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
-      reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
+    /*beta = l2svm(X=X, Y=y, intercept=TRUE, tol=1e-12,
+      reg = as.scalar(HPlamdas[i,1]), maxIter=maxIter, verbose=FALSE);
     svmModels[i,1] = l2norm(X, y, beta); #1st column
     svmModels[i,2:nrow(beta)+1] = t(beta);*/
 
     #Run L2svm with intercept false
-    beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
-      reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
+    beta = l2svm(X=X, Y=y, intercept=FALSE, tol=1e-12,
+      reg = as.scalar(HPlamdas[i,1]), maxIter=maxIter, verbose=FALSE);
     svmModels[i,1] = l2norm(X, y, beta); #1st column
     svmModels[i,2:nrow(beta)+1] = t(beta);
 
     #Run multilogreg with intercept true
-    /*beta = multiLogReg(X=X, Y=y, icpt=2, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
-      maxi=maxIter, maxii=20, verbose=FALSE);
+    /*beta = multiLogReg(X=X, Y=y, intercept=2, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
+      maxIter=maxIter, maxInnerIter=20, verbose=FALSE);
     [prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
     mlrModels[i,1] = acc; #1st column
     mlrModels[i,2:nrow(beta)+1] = t(beta);
 
     #Run multilogreg with intercept false
-    beta = multiLogReg(X=X, Y=y, icpt=1, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
-      maxi=maxIter, maxii=20, verbose=FALSE);
+    beta = multiLogReg(X=X, Y=y, intercept=1, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
+      maxIter=maxIter, maxInnerIter=20, verbose=FALSE);
     [prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
     mlrModels[i,1] = acc; #1st column
     mlrModels[i,2:nrow(beta)+1] = t(beta);*/
diff --git a/src/test/scripts/functions/async/LineageReuseSpark8.dml b/src/test/scripts/functions/async/LineageReuseSpark8.dml
index ac486a509e3..a76543f3147 100644
--- a/src/test/scripts/functions/async/LineageReuseSpark8.dml
+++ b/src/test/scripts/functions/async/LineageReuseSpark8.dml
@@ -116,7 +116,7 @@ return(Matrix[Double] X, Matrix[Double] imputedVec)
 
 wrapIQR = function(Matrix[double] X) return (Matrix[double] out) {
   #[X,q1,q3,iqr,k,r] = rwOutlierByIQR(X=X, max_iterations=0, verbose=FALSE);
-  [X,q1,q3,iqr,k,r] = outlierByIQR(X=X, max_iterations=10, verbose=FALSE);
+  [X,q1,q3,iqr,k,r] = outlierByIQR(X=X, maxIter=10, verbose=FALSE);
   while(FALSE){}
   out = X;
 }
@@ -129,7 +129,7 @@ wrapImputeByMode = function(Matrix[double] X) return (Matrix[double] out) {
 
 wrapMice = function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3,
     Double threshold = 0.8, Boolean verbose = FALSE) return(Matrix[Double] output) {
-  [out,meta,th,dM,betaList] = mice(X=X, cMask=cMask, iter=iter, verbose=verbose);
+  [out,meta,th,dM,betaList] = mice(X=X, ctypes=cMask, maxIter=iter, verbose=verbose);
   while(FALSE){}
   output = out;
 }
diff --git a/src/test/scripts/functions/async/MaxParallelizeOrder2.dml b/src/test/scripts/functions/async/MaxParallelizeOrder2.dml
index 81e9207105e..29a1ea04771 100644
--- a/src/test/scripts/functions/async/MaxParallelizeOrder2.dml
+++ b/src/test/scripts/functions/async/MaxParallelizeOrder2.dml
@@ -44,15 +44,15 @@ i = 1;
 
 for (l in 1:no_lamda)
 {
-  beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
-#      lambda = lamda, maxIterations=10, verbose=FALSE);
+  beta = l2svm(X=X, Y=y, intercept=FALSE, tol=1e-12,
+#      lambda = lamda, maxIter=10, verbose=FALSE);
       reg = lamda, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, FALSE);
   i = i + 1;
 
-  beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
-#      lambda = lamda, maxIterations=10, verbose=FALSE);
+  beta = l2svm(X=X, Y=y, intercept=TRUE, tol=1e-12,
+#      lambda = lamda, maxIter=10, verbose=FALSE);
       reg = lamda, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, TRUE);
diff --git a/src/test/scripts/functions/async/MaxParallelizeOrder5.dml b/src/test/scripts/functions/async/MaxParallelizeOrder5.dml
index 6398eb15ce6..7fb319827ed 100644
--- a/src/test/scripts/functions/async/MaxParallelizeOrder5.dml
+++ b/src/test/scripts/functions/async/MaxParallelizeOrder5.dml
@@ -48,8 +48,8 @@ nComb = 5; #10
 
 for (i in 1:nComb) {
   [newA1, Mout] = pca(X=A, K=K+i);
-  beta1 = lmDS(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
-  y_predict1 = lmPredict(X=newA1, B=beta1, icpt=1);
+  beta1 = lmDS(X=newA1, y=y, intercept=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmPredict(X=newA1, B=beta1, intercept=1);
   R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
   R[,i] = R2_ad1;
 }
diff --git a/src/test/scripts/functions/builtin/ConnectedComponents.dml b/src/test/scripts/functions/builtin/ConnectedComponents.dml
index 56403a88042..318a2ceb53d 100644
--- a/src/test/scripts/functions/builtin/ConnectedComponents.dml
+++ b/src/test/scripts/functions/builtin/ConnectedComponents.dml
@@ -23,6 +23,6 @@ X = read($1)
 n = max(X);
 G = table(X[,1], X[, 2], n, n)
 G = G + t(G); #symmetry
-C = components(G=G, maxi=$2, verbose=FALSE)
+C = components(G=G, maxIter=$2, verbose=FALSE)
 
 write(C, $3)
diff --git a/src/test/scripts/functions/builtin/GMM.dml b/src/test/scripts/functions/builtin/GMM.dml
index 5bb557f414f..eec32dd63b6 100644
--- a/src/test/scripts/functions/builtin/GMM.dml
+++ b/src/test/scripts/functions/builtin/GMM.dml
@@ -22,9 +22,9 @@
 X = read($1, data_type = "frame", format = "csv")
 X = as.matrix(X[, 2:ncol(X)-1])
 
-[labels, prob, df, bic, mu, prec_chol, w] = gmm(X=X, n_components = $2,
-  model = $3,  init_params = $4, iter = $5, 
-  reg_covar = $6, tol = $7, seed=$8, verbose=TRUE)
+[labels, prob, df, bic, mu, prec_chol, w] = gmm(X=X, nComponents = $2,
+  model = $3,  initParams = $4, maxIter = $5,
+  reg = $6, tol = $7, seed=$8, verbose=TRUE)
 
 out = (rowMaxs(prob) < 0.7)
 
diff --git a/src/test/scripts/functions/builtin/GMM_Predict.dml b/src/test/scripts/functions/builtin/GMM_Predict.dml
index 283db7c0761..f9528295c11 100644
--- a/src/test/scripts/functions/builtin/GMM_Predict.dml
+++ b/src/test/scripts/functions/builtin/GMM_Predict.dml
@@ -34,8 +34,8 @@ test = rbind(test, X[96:101,])
 test = rbind(test, X[146:150,])
 
 # train GMM
-[labels, prob, df, bic, mu, prec_chol, w] = gmm(X=train, n_components = $2,
-  model = $3, init_params = $4, iter = $5, reg_covar = $6, tol = $7, seed=$8, verbose=TRUE)
+[labels, prob, df, bic, mu, prec_chol, w] = gmm(X=train, nComponents = $2,
+  model = $3, initParams = $4, maxIter = $5, reg = $6, tol = $7, seed=$8, verbose=TRUE)
  
 # predict labels
 [pred, pp] = gmmPredict(test, w, mu, prec_chol, $3)  
diff --git a/src/test/scripts/functions/builtin/GNMF.dml b/src/test/scripts/functions/builtin/GNMF.dml
index 509f988846e..8d7cdb01ae9 100644
--- a/src/test/scripts/functions/builtin/GNMF.dml
+++ b/src/test/scripts/functions/builtin/GNMF.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 X = read($1)
-[W, H] = gnmf(X=X, rnk=$4, maxi=$5)
+[W, H] = gnmf(X=X, rank=$4, maxIter=$5)
 write(W, $2)
 write(H, $3)
diff --git a/src/test/scripts/functions/builtin/GridSearchLM.dml b/src/test/scripts/functions/builtin/GridSearchLM.dml
index d439a809590..7e478b73753 100644
--- a/src/test/scripts/functions/builtin/GridSearchLM.dml
+++ b/src/test/scripts/functions/builtin/GridSearchLM.dml
@@ -33,8 +33,8 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-args = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
-params = list("reg", "tol", "maxi", "verbose");
+args = list(X=X, y=y, intercept=0, reg=-1, tol=-1, maxIter=-1, verbose=FALSE);
+params = list("reg", "tol", "maxIter", "verbose");
 paramRanges = list(10^seq(0,-4), 10^seq(-6,-12), 10^seq(1,3), as.matrix(as.double(verbose)));
 [B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
   numB=ncol(X), params=params, paramValues=paramRanges, trainArgs=args);
diff --git a/src/test/scripts/functions/builtin/GridSearchLM2.dml b/src/test/scripts/functions/builtin/GridSearchLM2.dml
index c1727949a49..0711484b0bf 100644
--- a/src/test/scripts/functions/builtin/GridSearchLM2.dml
+++ b/src/test/scripts/functions/builtin/GridSearchLM2.dml
@@ -22,7 +22,7 @@
 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) 
   return (Matrix[Double] loss)
 {
-  yhat = lmPredict(X=X, B=B, ytest=y)
+  yhat = lmPredict(X=X, B=B, Ytest=y)
   loss = as.matrix(sum((y - yhat)^2));
 }
 
@@ -35,7 +35,7 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-params = list("icpt","reg", "tol", "maxi");
+params = list("intercept","reg", "tol", "maxIter");
 paramRanges = list(seq(0,2),10^seq(0,-4), 10^seq(-6,-12), 10^seq(1,3));
 [B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
   numB=ncol(X)+1, params=params, paramValues=paramRanges);
diff --git a/src/test/scripts/functions/builtin/GridSearchLMCV.dml b/src/test/scripts/functions/builtin/GridSearchLMCV.dml
index 84cef20689b..4094bbad268 100644
--- a/src/test/scripts/functions/builtin/GridSearchLMCV.dml
+++ b/src/test/scripts/functions/builtin/GridSearchLMCV.dml
@@ -22,7 +22,7 @@
 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) 
   return (Matrix[Double] loss)
 {
-  yhat = lmPredict(X=X, B=B, ytest=y)
+  yhat = lmPredict(X=X, B=B, Ytest=y)
   loss = as.matrix(sum((y - yhat)^2));
 }
 
@@ -35,7 +35,7 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-params = list("icpt","reg", "tol", "maxi");
+params = list("intercept","reg", "tol", "maxIter");
 # paramRanges = list(seq(0,1,2),10^seq(0,-4), 10^seq(-6,-12), 10^seq(1,3));
 paramRanges = list(seq(0,1),10^seq(0,0), 10^seq(-6,-6), 10^seq(1,2));
 [B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
diff --git a/src/test/scripts/functions/builtin/GridSearchMLogreg.dml b/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
index 08aebad83ae..c9f0109541e 100644
--- a/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
+++ b/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
@@ -34,9 +34,9 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-params = list("icpt", "reg", "maxii");
+params = list("intercept", "reg", "maxInnerIter");
 paramRanges = list(seq(0,2),10^seq(1,-6), 10^seq(1,3));
-trainArgs = list(X=Xtrain, Y=ytrain, icpt=-1, reg=-1, tol=1e-9, maxi=100, maxii=-1);
+trainArgs = list(X=Xtrain, Y=ytrain, intercept=-1, reg=-1, tol=1e-9, maxIter=100, maxInnerIter=-1);
 [B1,opt] = gridSearch(X=Xtrain, y=ytrain, train="multiLogReg", predict="accuracy", numB=(ncol(X)+1)*(nc-1),
   params=params, paramValues=paramRanges, trainArgs=trainArgs, verbose=TRUE);
 B2 = multiLogReg(X=Xtrain, Y=ytrain, verbose=TRUE);
diff --git a/src/test/scripts/functions/builtin/HyperbandLM.dml b/src/test/scripts/functions/builtin/HyperbandLM.dml
index faaf03fa0e3..5db7453accd 100644
--- a/src/test/scripts/functions/builtin/HyperbandLM.dml
+++ b/src/test/scripts/functions/builtin/HyperbandLM.dml
@@ -50,8 +50,8 @@ paramRanges[2,2] = 10^-12;
 
 # use lmCG, because this implementation of hyperband only makes sense with
 # iterative algorithms
-[B1, optHyperParams] = hyperband(X_train=X_train, y_train=y_train, X_val=X_val,
-  y_val=y_val, params=params, paramRanges=paramRanges, R=50, eta=3, verbose=TRUE);
+[B1, optHyperParams] = hyperband(X=X_train, Y=y_train, Xtest=X_val,
+  Ytest=y_val, params=params, paramRanges=paramRanges, R=50, eta=3, verbose=TRUE);
 
 # train reference with default values
 B2 = lmCG(X=X_train, y=y_train, verbose=FALSE);
diff --git a/src/test/scripts/functions/builtin/HyperbandLM2.dml b/src/test/scripts/functions/builtin/HyperbandLM2.dml
index 0c851d020f5..1925f03ee84 100644
--- a/src/test/scripts/functions/builtin/HyperbandLM2.dml
+++ b/src/test/scripts/functions/builtin/HyperbandLM2.dml
@@ -34,7 +34,7 @@ y_test = y[(numTrSamples+numValSamples+2):nrow(X),];
 params = list("reg");
 paramRanges = matrix("0 20", rows=1, cols=2);
 
-[bestWeights, optHyperParams] = hyperband(X_train=X_train, y_train=y_train,
-  X_val=X_val, y_val=y_val, params=params, paramRanges=paramRanges);
+[bestWeights, optHyperParams] = hyperband(X=X_train, Y=y_train,
+  Xtest=X_val, Ytest=y_val, params=params, paramRanges=paramRanges);
 
 print(toString(optHyperParams))
diff --git a/src/test/scripts/functions/builtin/HyperbandLM3.dml b/src/test/scripts/functions/builtin/HyperbandLM3.dml
index b35d36db67e..31ff2e70ef4 100644
--- a/src/test/scripts/functions/builtin/HyperbandLM3.dml
+++ b/src/test/scripts/functions/builtin/HyperbandLM3.dml
@@ -38,11 +38,11 @@ y_test = y[(numTrSamples+numValSamples+2):nrow(X),];
 params = list("reg");
 paramRanges = matrix("0 20", rows=1, cols=2);
 
-[bestWeights, optHyperParams] = hyperband(X_train=X_train, y_train=y_train,
-  X_val=X_val, y_val=y_val, params=params, paramRanges=paramRanges);
+[bestWeights, optHyperParams] = hyperband(X=X_train, Y=y_train,
+  Xtest=X_val, Ytest=y_val, params=params, paramRanges=paramRanges);
 
 paramRanges2 = list(10^seq(0,-4))
-trainArgs = list(X=X_train, y=y_train, icpt=0, reg=-1, tol=1e-9, maxi=0, verbose=FALSE);
+trainArgs = list(X=X_train, y=y_train, intercept=0, reg=-1, tol=1e-9, maxIter=0, verbose=FALSE);
 [bestWeights, optHyperParams2] = gridSearch(X=X_train, y=y_train, numB=ncol(X),
   train="lm", predict="l2norm", trainArgs=trainArgs, params=params, paramValues=paramRanges2);
 
diff --git a/src/test/scripts/functions/builtin/MultiLogReg.dml b/src/test/scripts/functions/builtin/MultiLogReg.dml
index 33c977217fe..edee62a0dd8 100644
--- a/src/test/scripts/functions/builtin/MultiLogReg.dml
+++ b/src/test/scripts/functions/builtin/MultiLogReg.dml
@@ -21,5 +21,5 @@
 
 X = read($X)
 Y = read($Y)
-betas= multiLogReg(X=X,  Y=Y, icpt = $inc, tol = $tol, reg = $reg, maxi = $maxOut, maxii=$maxIn, verbose=$verbose)
+betas= multiLogReg(X=X,  Y=Y, intercept = $inc, tol = $tol, reg = $reg, maxIter = $maxOut, maxInnerIter=$maxIn, verbose=$verbose)
 write(betas, $output)
diff --git a/src/test/scripts/functions/builtin/PNMF.dml b/src/test/scripts/functions/builtin/PNMF.dml
index b92560bd40b..063d2f5e0ba 100644
--- a/src/test/scripts/functions/builtin/PNMF.dml
+++ b/src/test/scripts/functions/builtin/PNMF.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 X = read($1)
-[W, H] = pnmf(X=X, rnk=$4, maxi=$5)
+[W, H] = pnmf(X=X, rank=$4, maxIter=$5)
 write(W, $2)
 write(H, $3)
diff --git a/src/test/scripts/functions/builtin/adasynRealData.dml b/src/test/scripts/functions/builtin/adasynRealData.dml
index 46e9273ec25..cec76292e08 100644
--- a/src/test/scripts/functions/builtin/adasynRealData.dml
+++ b/src/test/scripts/functions/builtin/adasynRealData.dml
@@ -44,7 +44,7 @@ if( upsample ) {
   [Xtrain,Ytrain] = adasyn(X=Xtrain, Y=Ytrain, k=$4, seed=7);
 }
 
-B = multiLogReg(X=Xtrain, Y=Ytrain, icpt=2);
+B = multiLogReg(X=Xtrain, Y=Ytrain, intercept=2);
 [P,yhat,acc] = multiLogRegPredict(X=Xtest, Y=Ytest, B=B);
 print("accuracy: "+acc)
 
diff --git a/src/test/scripts/functions/builtin/arima.dml b/src/test/scripts/functions/builtin/arima.dml
index 8802b878869..941dec9b15f 100644
--- a/src/test/scripts/functions/builtin/arima.dml
+++ b/src/test/scripts/functions/builtin/arima.dml
@@ -22,7 +22,7 @@
 X = read($X)
 solver = ifelse($useJacobi, "jacobi", "cg")
 
-coefficients = arima(X=X, max_func_invoc=$max_func_invoc, p=$p, d=$d, q=$q, 
-  P=$P, D=$D, Q=$Q, s=$s, include_mean=$include_mean, solver=solver)
+coefficients = arima(X=X, maxIter=$max_func_invoc, p=$p, d=$d, q=$q,
+  P=$P, D=$D, Q=$Q, s=$s, includeMean=$include_mean, solver=solver)
 
 write(coefficients, $model)
diff --git a/src/test/scripts/functions/builtin/autoencoder_2layer.dml b/src/test/scripts/functions/builtin/autoencoder_2layer.dml
index 05574cb0201..7fa0c3af78e 100644
--- a/src/test/scripts/functions/builtin/autoencoder_2layer.dml
+++ b/src/test/scripts/functions/builtin/autoencoder_2layer.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-[W1, b1, W2, b2, W3, b3, W4, b4, hidden] = autoencoder_2layer(X = read($X), W1_rand = read($W1_rand), W2_rand = read($W2_rand), 
-  W3_rand = read($W3_rand), W4_rand = read($W4_rand), order_rand = read($order_rand),
-  num_hidden1 = $H1, num_hidden2 = $H2, max_epochs = $EPOCH, full_obj = $OBJ, 
-  batch_size = $BATCH, step = $STEP, decay = $DECAY, mu = $MOMENTUM)
+[W1, b1, W2, b2, W3, b3, W4, b4, hidden] = autoencoder_2layer(X = read($X), W1 = read($W1_rand), W2 = read($W2_rand),
+  W3 = read($W3_rand), W4 = read($W4_rand), Order = read($order_rand),
+  numHidden1 = $H1, numHidden2 = $H2, epochs = $EPOCH, fullObj = $OBJ,
+  batchSize = $BATCH, step = $STEP, decay = $DECAY, mu = $MOMENTUM)
 
 write(W1, $W1_out);
 write(b1, $b1_out);
diff --git a/src/test/scripts/functions/builtin/cspline.dml b/src/test/scripts/functions/builtin/cspline.dml
index 284c70d1fa5..f5baa04cbe6 100644
--- a/src/test/scripts/functions/builtin/cspline.dml
+++ b/src/test/scripts/functions/builtin/cspline.dml
@@ -34,15 +34,15 @@ max_iter = as.integer(max_iter)
 
 if (mode == "CG")
 {
-  [result, K] = csplineCG(X=X, Y=Y, inp_x=inp_x, tol=tolerance, maxi=max_iter);
+  [result, K] = csplineCG(X=X, Y=Y, xPred=inp_x, tol=tolerance, maxIter=max_iter);
 }
 else if (mode == "DS")
 {
-  [result, K] = csplineDS(X=X, Y=Y, inp_x=inp_x);
+  [result, K] = csplineDS(X=X, Y=Y, xPred=inp_x);
 }
 else
 {
-  [result, K] = cspline(X=X, Y=Y, inp_x=inp_x, tol=tolerance, maxi=max_iter);
+  [result, K] = cspline(X=X, Y=Y, xPred=inp_x, tol=tolerance, maxIter=max_iter);
 }
 
 write(result, y_output);
diff --git a/src/test/scripts/functions/builtin/decisionTree.dml b/src/test/scripts/functions/builtin/decisionTree.dml
index 829451d02b5..add88203e51 100644
--- a/src/test/scripts/functions/builtin/decisionTree.dml
+++ b/src/test/scripts/functions/builtin/decisionTree.dml
@@ -24,6 +24,6 @@ Y = read($2);
 R = read($3);
 
 M = decisionTree(X = X, y = Y, ctypes = R,
-  max_features=1, min_split=4, min_leaf=2, verbose=TRUE);
+  maxFeatures=1, minSplit=4, minLeaf=2, verbose=TRUE);
 
 write(M, $4);
diff --git a/src/test/scripts/functions/builtin/decisionTreeRealData1.dml b/src/test/scripts/functions/builtin/decisionTreeRealData1.dml
index 446e50abf63..fd8feb07842 100644
--- a/src/test/scripts/functions/builtin/decisionTreeRealData1.dml
+++ b/src/test/scripts/functions/builtin/decisionTreeRealData1.dml
@@ -31,15 +31,15 @@ X = X[, 1:ncol(X)-1]
 X = imputeByMode(X);
 
 if( $3==1 ) {
-  M = decisionTree(X=X, y=Y, ctypes=R, max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = decisionTree(X=X, y=Y, ctypes=R, maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = decisionTreePredict(X=X, y=Y, ctypes=R, M=M)
 }
 else {
   sf = 1.0/($3-1);
-  M = randomForest(X=X, y=Y, ctypes=R, sample_frac=sf, num_trees=$3-1,
-                   max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = randomForest(X=X, y=Y, ctypes=R, sampleFrac=sf, numTrees=$3-1,
+                   maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = randomForestPredict(X=X, y=Y, ctypes=R,  M=M)
 }
 
diff --git a/src/test/scripts/functions/builtin/decisionTreeRealData2.dml b/src/test/scripts/functions/builtin/decisionTreeRealData2.dml
index f875b3de927..271379365c1 100644
--- a/src/test/scripts/functions/builtin/decisionTreeRealData2.dml
+++ b/src/test/scripts/functions/builtin/decisionTreeRealData2.dml
@@ -30,15 +30,15 @@ X = X[,1:ncol(X)-1];
 X = replace(target=X, pattern=NaN, replacement=5); # 1 val
 
 if( $3==1 ) {
-  M = decisionTree(X=X, y=Y, ctypes=R, max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = decisionTree(X=X, y=Y, ctypes=R, maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = decisionTreePredict(X=X, y=Y, ctypes=R, M=M)
 }
 else {
   sf = 1.0/($3-1);
-  M = randomForest(X=X, y=Y, ctypes=R, sample_frac=sf, num_trees=$3-1,
-                   max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = randomForest(X=X, y=Y, ctypes=R, sampleFrac=sf, numTrees=$3-1,
+                   maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = randomForestPredict(X=X, y=Y, ctypes=R,  M=M)
 }
 
diff --git a/src/test/scripts/functions/builtin/decisionTreeRealData3.dml b/src/test/scripts/functions/builtin/decisionTreeRealData3.dml
index e53eb107a0f..d3faba88cf8 100644
--- a/src/test/scripts/functions/builtin/decisionTreeRealData3.dml
+++ b/src/test/scripts/functions/builtin/decisionTreeRealData3.dml
@@ -30,15 +30,15 @@ X = cbind(X[,1:ncol(X)-2], X[,ncol(X)]);
 X = replace(target=X, pattern=NaN, replacement=5); # 1 val
 
 if( $3==1 ) {
-  M = decisionTree(X=X, y=Y, ctypes=R, max_features=1, max_values=$4,
-        impurity="rss", min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = decisionTree(X=X, y=Y, ctypes=R, maxFeatures=1, maxValues=$4,
+        impurity="rss", minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = decisionTreePredict(X=X, ctypes=R, M=M)
 }
 else {
   sf = 1.0/($3-1);
-  M = randomForest(X=X, y=Y, ctypes=R, sample_frac=sf, num_trees=$3-1,
-        impurity="rss", max_features=1, max_values=$4,
-        min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = randomForest(X=X, y=Y, ctypes=R, sampleFrac=sf, numTrees=$3-1,
+        impurity="rss", maxFeatures=1, maxValues=$4,
+        minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = randomForestPredict(X=X, ctypes=R,  M=M)
 }
 
diff --git a/src/test/scripts/functions/builtin/decisionTreeRealData4.dml b/src/test/scripts/functions/builtin/decisionTreeRealData4.dml
index e1429f64991..9458c710531 100644
--- a/src/test/scripts/functions/builtin/decisionTreeRealData4.dml
+++ b/src/test/scripts/functions/builtin/decisionTreeRealData4.dml
@@ -30,15 +30,15 @@ X = X[, 2:ncol(X)]
 X = imputeByMode(X);
 
 if( $3==1 ) {
-  M = decisionTree(X=X, y=Y, ctypes=R, max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = decisionTree(X=X, y=Y, ctypes=R, maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = decisionTreePredict(X=X, y=Y, ctypes=R, M=M)
 }
 else {
   sf = 1.0/($3-1);
-  M = randomForest(X=X, y=Y, ctypes=R, sample_frac=sf, num_trees=$3-1,
-                   max_features=1, max_values=$4,
-                   min_split=10, min_leaf=4, seed=7, verbose=TRUE);
+  M = randomForest(X=X, y=Y, ctypes=R, sampleFrac=sf, numTrees=$3-1,
+                   maxFeatures=1, maxValues=$4,
+                   minSplit=10, minLeaf=4, seed=7, verbose=TRUE);
   yhat = randomForestPredict(X=X, y=Y, ctypes=R,  M=M)
 }
 
diff --git a/src/test/scripts/functions/builtin/exponentialMovingAverage.dml b/src/test/scripts/functions/builtin/exponentialMovingAverage.dml
index 740648d7fd3..2807997dc7d 100644
--- a/src/test/scripts/functions/builtin/exponentialMovingAverage.dml
+++ b/src/test/scripts/functions/builtin/exponentialMovingAverage.dml
@@ -20,7 +20,7 @@
 #-------------------------------------------------------------
 
 X = read($F, data_type="frame", format="csv", header=FALSE)
-Z = ema(X=X, search_iterations=$search_iterations, mode=$mode, freq=$freq, alpha=$alpha, 
+Z = ema(X=X, iter=$search_iterations, mode=$mode, freq=$freq, alpha=$alpha,
   beta=$beta, gamma=$gamma)
 
 write(Z, $O, format = "csv")
diff --git a/src/test/scripts/functions/builtin/ffNeuralNetwork.dml b/src/test/scripts/functions/builtin/ffNeuralNetwork.dml
index f3fc385a323..46528dc687a 100644
--- a/src/test/scripts/functions/builtin/ffNeuralNetwork.dml
+++ b/src/test/scripts/functions/builtin/ffNeuralNetwork.dml
@@ -28,7 +28,7 @@ x_train = data[,1:6]
 y_train = data[, 7]
 
 # Train the model on synthetic dataset for binary classification generated by scikit-learn
-model = ffTrain(X=x_train, Y=y_train, batch_size=501, epochs=3, learning_rate=0.001, out_activation="sigmoid", loss_fcn="cel", verbose=TRUE, shuffle=TRUE)
+model = ffTrain(X=x_train, Y=y_train, batchSize=501, epochs=3, lr=0.001, outActivation="sigmoid", lossFn="cel", verbose=TRUE, shuffle=TRUE)
 # Make predictions on the training set to test the model's capability of learning
 prediction = ffPredict(model=model, X=x_train)
 
diff --git a/src/test/scripts/functions/builtin/garch.dml b/src/test/scripts/functions/builtin/garch.dml
index b70db51d228..75ce81a0838 100644
--- a/src/test/scripts/functions/builtin/garch.dml
+++ b/src/test/scripts/functions/builtin/garch.dml
@@ -21,7 +21,7 @@
 
 X = read($X)
 
-[fitted_X, fitted_var_hist, a0, arch_coef, var_coef] = garch(X=X, kmax=$kmax, momentum=$momentum, start_stepsize=$start_stepsize, end_stepsize=$end_stepsize, start_vicinity=$start_vicinity, end_vicinity=$end_vicinity, sim_seed=54321, verbose=FALSE)
+[fitted_X, fitted_var_hist, a0, arch_coef, var_coef] = garch(X=X, iter=$kmax, momentum=$momentum, startStepsize=$start_stepsize, endStepsize=$end_stepsize, startVicinity=$start_vicinity, endVicinity=$end_vicinity, seed=54321, verbose=FALSE)
 mean_residual_error = matrix(sum(fitted_X - X) / nrow(fitted_X), rows=1, cols=1)
 
 write(mean_residual_error, $model)
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/glmTest.dml b/src/test/scripts/functions/builtin/glmTest.dml
index a5549a85b24..0207769fb71 100644
--- a/src/test/scripts/functions/builtin/glmTest.dml
+++ b/src/test/scripts/functions/builtin/glmTest.dml
@@ -21,5 +21,6 @@
 
 X = read($X);
 Y = read($Y);
-betas= glm(X=X,  Y=Y, dfam = $dfam, vpow = $vpow, link = $link, lpow = $lpow, yneg = $yneg, icpt = $icpt, disp=$disp, reg = $reg, tol = $tol, moi = $moi, mii=$mii, verbose=TRUE)
+betas= glm(X=X,  Y=Y, dfam = $dfam, vpow = $vpow, link = $link, lpow = $lpow, yneg = $yneg, intercept = $icpt,
+  disp = $disp, reg = $reg, tol= $tol, maxIter = $moi, maxInnerIter = $mii, verbose=TRUE)
 write(betas, $B)
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/handsOffClustering.dml b/src/test/scripts/functions/builtin/handsOffClustering.dml
index 7b816d9a4ef..2ab000d934a 100644
--- a/src/test/scripts/functions/builtin/handsOffClustering.dml
+++ b/src/test/scripts/functions/builtin/handsOffClustering.dml
@@ -70,7 +70,7 @@ return(Matrix[Double] output)
 
       if(sum(mask) > 0)
         X = utils::dummycoding(replace(target = X, pattern = NaN, replacement=0), mask)
-      trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1, verbose=FALSE);
+      trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, tol=-1, maxIter=100, maxInnerIter=-1, verbose=FALSE);
       [B1, opt] = gridSearch(X=X, y=Y, train="multiLogReg", predict="W", numB=ncol(X)+1, cv=TRUE, cvk=cv,
         params=params, paramValues=paramRanges, trainArgs=trainArgs, verbose=TRUE);
       evalFunHp = as.matrix(opt)  
@@ -137,10 +137,10 @@ return (Matrix[Double] accuracyMatrix)
     trainy = trainset[, 1]
     testX = testset[, 2:ncol(testset)]
     testy = testset[, 1]
-    beta = multiLogReg(X=trainX, Y=trainy, icpt=as.scalar(MLhp[1,1]), reg=as.scalar(MLhp[1,2]), tol=as.scalar(MLhp[1,3]), 
-    maxi=as.scalar(MLhp[1,4]), maxii=50, verbose=FALSE);
+    beta = multiLogReg(X=trainX, Y=trainy, intercept=as.scalar(MLhp[1,1]), reg=as.scalar(MLhp[1,2]), tol=as.scalar(MLhp[1,3]),
+      maxIter=as.scalar(MLhp[1,4]), maxInnerIter=50, verbose=FALSE);
     [prob, yhat, a] = multiLogRegPredict(testX, beta, testy, FALSE)
-    accuracy = getAccuracy(testy, yhat, isWeighted)
+    accuracy = getAccuracy(yhat, testy, isWeighted)
     accuracyMatrix[i] = accuracy
   }
 
diff --git a/src/test/scripts/functions/builtin/imputeByKNN.dml b/src/test/scripts/functions/builtin/imputeByKNN.dml
index 0e87026e2bc..2d23743eb7d 100644
--- a/src/test/scripts/functions/builtin/imputeByKNN.dml
+++ b/src/test/scripts/functions/builtin/imputeByKNN.dml
@@ -28,7 +28,7 @@ mask = is.nan(X)
 #Perform the KNN imputation
 result = imputeByKNN(X = X, method = $2)
 result2 = imputeByKNN(X = X, method = $3)
-result3 = imputeByKNN(X = X, method = $4, seed = $5, sample_frac = $6)
+result3 = imputeByKNN(X = X, method = $4, seed = $5, sampleFrac = $6)
 
 #Get the imputed value
 I = (mask[,2] == 1);
diff --git a/src/test/scripts/functions/builtin/knn.dml b/src/test/scripts/functions/builtin/knn.dml
index 8ea5a7ef98b..25bef9b09a9 100644
--- a/src/test/scripts/functions/builtin/knn.dml
+++ b/src/test/scripts/functions/builtin/knn.dml
@@ -24,7 +24,7 @@ T = read($in_T)
 CL = read($in_CL)
 k = $in_k
 
-[NNR, PR, FI] = knn(Train=X,  Test=T, CL=CL, k_value=k, predict_con_tg=1);
+[NNR, PR, FI] = knn(Train=X,  Test=T, CL=CL, k=k, predictCont=1);
 
 PR_val = matrix(0, 0, ncol(T));
 for(i in 1:nrow(T)) {
diff --git a/src/test/scripts/functions/builtin/knnbf.dml b/src/test/scripts/functions/builtin/knnbf.dml
index e5ae2de34ab..31399261cc4 100644
--- a/src/test/scripts/functions/builtin/knnbf.dml
+++ b/src/test/scripts/functions/builtin/knnbf.dml
@@ -23,6 +23,6 @@ X = read($in_X)
 T = read($in_T)
 k = $in_k
 
-NNR = knnbf(X=X,  T=T, k_value = k)
+NNR = knnbf(X=X,  T=T, k = k)
 
 write(NNR, $out_B)
diff --git a/src/test/scripts/functions/builtin/knnbfReference.dml b/src/test/scripts/functions/builtin/knnbfReference.dml
index 994f4668b2e..72ec9da84a8 100644
--- a/src/test/scripts/functions/builtin/knnbfReference.dml
+++ b/src/test/scripts/functions/builtin/knnbfReference.dml
@@ -24,6 +24,6 @@ T = read($in_T)
 CL = read($in_CL)
 k = $in_k
 
-[NNR, PR, FI] = knn(Train=X,  Test=T, CL=CL, k_value=k);
+[NNR, PR, FI] = knn(Train=X,  Test=T, CL=CL, k=k);
 
 write(NNR, $out_B)
diff --git a/src/test/scripts/functions/builtin/l2svm.dml b/src/test/scripts/functions/builtin/l2svm.dml
index 20438ee457f..b8e34d10840 100644
--- a/src/test/scripts/functions/builtin/l2svm.dml
+++ b/src/test/scripts/functions/builtin/l2svm.dml
@@ -21,5 +21,5 @@
 
 X = read($X)
 Y = read($Y)
-model= l2svm(X=X,  Y=Y, intercept = $inc, epsilon = $eps, reg = $lam, maxIterations = $max )
+model= l2svm(X=X,  Y=Y, intercept = $inc, tol = $eps, reg = $lam, maxIter = $max )
 write(model, $model)
diff --git a/src/test/scripts/functions/builtin/leNetTest.dml b/src/test/scripts/functions/builtin/leNetTest.dml
index ded0746ad40..27589284aec 100644
--- a/src/test/scripts/functions/builtin/leNetTest.dml
+++ b/src/test/scripts/functions/builtin/leNetTest.dml
@@ -51,7 +51,7 @@ model = lenetTrain(images, labels, images_val, labels_val, C, Hin, Win, 128, 3,
                                                 0.007, 0.9, 0.95, 5e-04, TRUE, -1)
 
 # Predict on the training set to test capacity of the network
-probs = lenetPredict(model=model, X=images, C=C, Hin=Hin, Win=Win)
+probs = lenetPredict(model=model, X=images, c=C, h=Hin, w=Win)
 
 # Accuracy
 correct_pred = rowIndexMax(probs) == rowIndexMax(labels)
diff --git a/src/test/scripts/functions/builtin/lm.dml b/src/test/scripts/functions/builtin/lm.dml
index 93ab41fec42..1081169453e 100644
--- a/src/test/scripts/functions/builtin/lm.dml
+++ b/src/test/scripts/functions/builtin/lm.dml
@@ -21,5 +21,5 @@
 
 X = read($1)
 y = read($2)
-C = lm(X = X, y = y, icpt = 0, reg = 1e-12)
+C = lm(X = X, y = y, intercept = 0, reg = 1e-12)
 write(C, $3)
diff --git a/src/test/scripts/functions/builtin/lmpredict.dml b/src/test/scripts/functions/builtin/lmpredict.dml
index 2105f6c8247..a3091f4ed12 100644
--- a/src/test/scripts/functions/builtin/lmpredict.dml
+++ b/src/test/scripts/functions/builtin/lmpredict.dml
@@ -22,8 +22,8 @@
 X = read($1) # Training data
 y = read($2) # response values
 p = read($3) # random data to predict
-w = lmDS(X = X, y = y, icpt = 1, reg = 1e-12)
-p = lmPredict(X = X, B = w, ytest=matrix(0,1,1), icpt = 1)
+w = lmDS(X = X, y = y, intercept = 1, reg = 1e-12)
+p = lmPredict(X = X, B = w, Ytest=matrix(0,1,1), intercept = 1)
 p2 = glmPredict(X = X, B = w, dfam=1, link=1, vpow=0.0, lpow=1.0);
 
 if( sum(abs(p2-p) > 1e8) !=0 )
diff --git a/src/test/scripts/functions/builtin/mice.dml b/src/test/scripts/functions/builtin/mice.dml
index 96362565264..8766ecbac41 100644
--- a/src/test/scripts/functions/builtin/mice.dml
+++ b/src/test/scripts/functions/builtin/mice.dml
@@ -36,7 +36,7 @@ if(sum(Mask) == ncol(F))
   jspecR = "{ids:true, recode:["+s+"]}";
   [X, M] = transformencode(target=F, spec=jspecR);
   # call mice
-  [dataset, meta, th, dm, betaList] = mice(X=X,cMask=Mask, iter=$iteration, threshold=0.8, verbose = FALSE)
+  [dataset, meta, th, dm, betaList] = mice(X=X,ctypes=Mask, maxIter=$iteration, threshold=0.8, verbose = FALSE)
   output1 = miceApply(X=X, meta=meta, threshold=th, dM=dm, betaList=betaList)
   match = abs(output1 - dataset) < 0.16
   print("match: \n"+(sum(match == 0) == 0))
@@ -51,7 +51,7 @@ else if(sum(Mask) == 0){
   # no transformation is required, cast the frame into matrix and call mice
   # as.matrix() will convert the null values into zeros, so explicitly replace zeros with NaN
   X = replace(target = as.matrix(F), pattern = 0, replacement = NaN)
-  [output, meta, th, dm, betaList] = mice(X=X, cMask=Mask, iter=$iteration, verbose = FALSE )
+  [output, meta, th, dm, betaList] = mice(X=X, ctypes=Mask, maxIter=$iteration, verbose = FALSE )
   output1 = miceApply(X=X, meta=meta, threshold=th, dM=dm, betaList=betaList)
   match = abs(output - output1) < 0.1
   print("match sum: \n"+(sum(match == 0) == 0))
@@ -70,7 +70,7 @@ else
   jspecR = "{ids:true, recode:["+s+"]}";
   [X, M] = transformencode(target=F, spec=jspecR);
   # call mice
-  dataset = mice(X=X,cMask=Mask, iter=$iteration, verbose = FALSE )
+  dataset = mice(X=X,ctypes=Mask, maxIter=$iteration, verbose = FALSE )
   # decode data into original format
   output = as.matrix(transformdecode(target=dataset, spec=jspecR, meta=M));
   # below lines are only for testing purpose  
diff --git a/src/test/scripts/functions/builtin/multiLogRegPredict.dml b/src/test/scripts/functions/builtin/multiLogRegPredict.dml
index 1e83d8dad47..91f6c5b2439 100644
--- a/src/test/scripts/functions/builtin/multiLogRegPredict.dml
+++ b/src/test/scripts/functions/builtin/multiLogRegPredict.dml
@@ -24,7 +24,7 @@ Y = read($2) # response values
 X_test = read($3) # random data to predict
 Y_test = read($4) # random data labels
 
-w = multiLogReg(X=X,  Y=Y, icpt=2, tol=0.00000001, reg=1.0, maxi=100, maxii=0, verbose=FALSE)
+w = multiLogReg(X=X,  Y=Y, intercept=2, tol=0.00000001, reg=1.0, maxIter=100, maxInnerIter=0, verbose=FALSE)
 [prob, y, accuracy] = multiLogRegPredict(X=X_test, B=w, Y=Y_test, verbose=TRUE)
 acc = matrix(accuracy, 1, 1)
 write(acc, $5)
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/multisvm.dml b/src/test/scripts/functions/builtin/multisvm.dml
index b1fb5fd9e7f..9c514b2532a 100644
--- a/src/test/scripts/functions/builtin/multisvm.dml
+++ b/src/test/scripts/functions/builtin/multisvm.dml
@@ -22,5 +22,5 @@
 X = read($X)
 Y = read($Y)
 model = msvm(X=X,  Y=Y,  intercept = $inc,
-  epsilon = $eps, reg = $lam, maxIterations = $max )
+  tol = $eps, reg = $lam, maxIter = $max )
 write(model, $model)
diff --git a/src/test/scripts/functions/builtin/randomForestTest.dml b/src/test/scripts/functions/builtin/randomForestTest.dml
index 971c0106623..eb3a47f8e0a 100644
--- a/src/test/scripts/functions/builtin/randomForestTest.dml
+++ b/src/test/scripts/functions/builtin/randomForestTest.dml
@@ -37,8 +37,8 @@ jspec = "{ids: true, bin: ["
 [X,D] = transformencode(target=F, spec=jspec);
 
 R = matrix(1, rows=1, cols=ncol(X)+1);
-M = randomForest(X=X, y=Y, ctypes=R, num_trees=num_trees, seed=7,
-  max_depth=depth, min_leaf=num_leafs, impurity=impurity, verbose=TRUE);
+M = randomForest(X=X, y=Y, ctypes=R, numTrees=num_trees, seed=7,
+  maxDepth=depth, minLeaf=num_leafs, impurity=impurity, verbose=TRUE);
 randomForestPredict(X=X, y=Y, ctypes=R, M=M, verbose=TRUE);
 
 write(M, $7);
diff --git a/src/test/scripts/functions/builtin/sliceLineRealData.dml b/src/test/scripts/functions/builtin/sliceLineRealData.dml
index fc0fccecae1..cd188c144c2 100644
--- a/src/test/scripts/functions/builtin/sliceLineRealData.dml
+++ b/src/test/scripts/functions/builtin/sliceLineRealData.dml
@@ -39,8 +39,8 @@ cix = matrix(X + foffb, m*n, 1);
 X2 = table(rix, cix); #one-hot encoded
 
 # learn model
-B = lm(X=X2, y=y, icpt=2, reg=0.001, verbose=FALSE);
-yhat = lmPredict(X=X2, B=B, ytest=y, icpt=1, verbose=FALSE);
+B = lm(X=X2, y=y, intercept=2, reg=0.001, verbose=FALSE);
+yhat = lmPredict(X=X2, B=B, Ytest=y, intercept=1, verbose=FALSE);
 acc = lmPredictStats(yhat, y, TRUE);
 e = (y-yhat)^2;
 
diff --git a/src/test/scripts/functions/builtin/steplm.dml b/src/test/scripts/functions/builtin/steplm.dml
index 8ba6c2da4a3..bf37b62f99e 100644
--- a/src/test/scripts/functions/builtin/steplm.dml
+++ b/src/test/scripts/functions/builtin/steplm.dml
@@ -22,7 +22,7 @@
 X = read($1);
 y = read($2);
 
-[C, S] = steplm(X = X, y = y, icpt = 1);
+[C, S] = steplm(X = X, y = y, intercept = 1);
 
 write(C, $3);
 write(S, $4);
diff --git a/src/test/scripts/functions/builtin/xgboost_classification.dml b/src/test/scripts/functions/builtin/xgboost_classification.dml
index 81697fb034a..10c88884bd2 100644
--- a/src/test/scripts/functions/builtin/xgboost_classification.dml
+++ b/src/test/scripts/functions/builtin/xgboost_classification.dml
@@ -24,5 +24,5 @@ y = read($2);
 R = read($3);
 sml_type = 2;
 num_trees = 2;
-M = xgboost(X = X, y = y, R = R, sml_type = sml_type, num_trees = num_trees);
+M = xgboost(X = X, y = y, R = R, smlType = sml_type, numTrees = num_trees);
 write(M, $6);
diff --git a/src/test/scripts/functions/builtin/xgboost_predict_classification.dml b/src/test/scripts/functions/builtin/xgboost_predict_classification.dml
index dad594351b2..6e6ccbb29c1 100644
--- a/src/test/scripts/functions/builtin/xgboost_predict_classification.dml
+++ b/src/test/scripts/functions/builtin/xgboost_predict_classification.dml
@@ -26,7 +26,7 @@ y = dataset[1:nrow(dataset), 3]
 
 R = matrix("1.0 2.0", rows=1, cols=ncol(X))
 
-M = xgboost(X = X, y = y, R = R, sml_type=2)
+M = xgboost(X = X, y = y, R = R, smlType=2)
 P = xgboostPredictClassification(X = X, M = M)
 
 write(P, $1)
diff --git a/src/test/scripts/functions/builtin/xgboost_regression.dml b/src/test/scripts/functions/builtin/xgboost_regression.dml
index 89405c248b6..b94c4360909 100644
--- a/src/test/scripts/functions/builtin/xgboost_regression.dml
+++ b/src/test/scripts/functions/builtin/xgboost_regression.dml
@@ -24,5 +24,5 @@ y = read($2);
 R = read($3);
 sml_type = 1;
 num_trees = 2;
-M = xgboost(X = X, y = y, R = R, sml_type = sml_type, num_trees = num_trees);
+M = xgboost(X = X, y = y, R = R, smlType = sml_type, numTrees = num_trees);
 write(M, $6);
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
index 9e3aa93d93d..3f665de1836 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
@@ -27,10 +27,10 @@ full_obj = ifdef($OBJ, FALSE)
 fmt = ifdef($fmt, "text")
 
 [W1, b1, W2, b2, W3, b3, W4, b4, hidden] = autoencoder_2layer(
-  X = read($X), W1_rand = read($W1_rand), W2_rand = read($W2_rand), 
-  W3_rand = read($W3_rand), W4_rand = read($W4_rand), order_rand = read($order_rand),
-  num_hidden1 = $H1, num_hidden2 = $H2, max_epochs = $EPOCH, full_obj = full_obj, 
-  batch_size = batch_size, step = step, decay = decay, mu = mu)
+  X = read($X), W1 = read($W1_rand), W2 = read($W2_rand),
+  W3 = read($W3_rand), W4 = read($W4_rand), Order = read($order_rand),
+  numHidden1 = $H1, numHidden2 = $H2, epochs = $EPOCH, fullObj = full_obj,
+  batchSize = batch_size, step = step, decay = decay, mu = mu)
 
 write(W1, $W1_out, format=fmt)
 write(b1, $b1_out, format=fmt)
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_KMeans.dml b/src/test/scripts/functions/codegenalg/Algorithm_KMeans.dml
index fce8b084bd0..7932a0792b4 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_KMeans.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_KMeans.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 X = read($X);
-C = kmeans(X=X, k=$k, runs=$runs, eps=$tol, max_iter=$maxi);
+C = kmeans(X=X, k=$k, runs=$runs, tol=$tol, maxIter=$maxi);
 write(C, $C);
 
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_L2SVM.dml b/src/test/scripts/functions/codegenalg/Algorithm_L2SVM.dml
index 62b2d925cbb..dfa0658590d 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_L2SVM.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_L2SVM.dml
@@ -21,6 +21,6 @@
 
 X = read($X);
 Y = read($Y);
-w = l2svm(X=X, Y=Y, intercept=$icpt, epsilon=$tol, reg=$reg, maxIterations=$maxiter);
+w = l2svm(X=X, Y=Y, intercept=$icpt, tol=$tol, reg=$reg, maxIter=$maxiter);
 write(w, $model);
 
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_LinregCG.dml b/src/test/scripts/functions/codegenalg/Algorithm_LinregCG.dml
index 1c67849b2df..6a6e455dba7 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_LinregCG.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_LinregCG.dml
@@ -21,6 +21,6 @@
 
 X = read($X);
 Y = read($Y);
-w = lmCG(X=X, y=Y, icpt=$icpt, tol=$tol, reg=$reg, maxi=$maxi);
+w = lmCG(X=X, y=Y, intercept=$icpt, tol=$tol, reg=$reg, maxIter=$maxi);
 write(w, $B);
 
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_MLogreg.dml b/src/test/scripts/functions/codegenalg/Algorithm_MLogreg.dml
index ca0ab5f8395..af79c0a36b3 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_MLogreg.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_MLogreg.dml
@@ -21,6 +21,6 @@
 
 X = read($X);
 Y = read($Y);
-w = multiLogReg(X=X, Y=Y, icpt=$icpt, tol=$tol, reg=$reg, maxi=$moi);
+w = multiLogReg(X=X, Y=Y, intercept=$icpt, tol=$tol, reg=$reg, maxIter=$moi);
 write(w, $B);
 
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_MSVM.dml b/src/test/scripts/functions/codegenalg/Algorithm_MSVM.dml
index 49fa1ff8d58..88e5c31e587 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_MSVM.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_MSVM.dml
@@ -21,6 +21,6 @@
 
 X = read($X);
 Y = read($Y);
-w = msvm(X=X, Y=Y, intercept=$icpt, epsilon=$tol, reg=$reg, maxIterations=$maxiter);
+w = msvm(X=X, Y=Y, intercept=$icpt, tol=$tol, reg=$reg, maxIter=$maxiter);
 write(w, $model);
 
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_StepLM.dml b/src/test/scripts/functions/codegenalg/Algorithm_StepLM.dml
index f5e606a5914..9e9e64c92ad 100644
--- a/src/test/scripts/functions/codegenalg/Algorithm_StepLM.dml
+++ b/src/test/scripts/functions/codegenalg/Algorithm_StepLM.dml
@@ -31,7 +31,7 @@ thr = ifdef ($thr, 0.001);
 X_orig = read (fileX);
 y = read (fileY);
 
-[beta_out, Selected] = steplm(X=X_orig, y=y, icpt=intercept, verbose=FALSE);
+[beta_out, Selected] = steplm(X=X_orig, y=y, intercept=intercept, verbose=FALSE);
 
 write(Selected, fileS, format=fmt);
 write(beta_out, fileB, format=fmt);
diff --git a/src/test/scripts/functions/compress/workload/WorkloadAnalysisKmeans.dml b/src/test/scripts/functions/compress/workload/WorkloadAnalysisKmeans.dml
index 7382436bf61..1ccfca2849f 100644
--- a/src/test/scripts/functions/compress/workload/WorkloadAnalysisKmeans.dml
+++ b/src/test/scripts/functions/compress/workload/WorkloadAnalysisKmeans.dml
@@ -27,5 +27,5 @@ print("kmeans")
 
 [data, Centering, ScaleFactor] = scale(X, TRUE, TRUE)
 # terminates with result
-[Y_n, C_n] = kmeans(X=data, k=16, runs= 1, max_iter=as.integer($4), eps= 1e-17, seed= 13, is_verbose=TRUE)
+[Y_n, C_n] = kmeans(X=data, k=16, runs= 1, maxIter=as.integer($4), tol= 1e-17, seed= 13, verbose=TRUE)
 print(sum(Y_n))
diff --git a/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmCG.dml b/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmCG.dml
index c0834259fc8..11057e7bb39 100644
--- a/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmCG.dml
+++ b/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmCG.dml
@@ -30,7 +30,7 @@ print("LMCG")
 
 X = scale(X=X, scale=TRUE, center=TRUE);
 B = lmCG(X=X, y=y, verbose=TRUE);
-p = lmPredict(X=X, B=B, ytest=y, verbose=TRUE);
+p = lmPredict(X=X, B=B, Ytest=y, verbose=TRUE);
 
 classifications = (p > 0.1)
 
diff --git a/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmDS.dml b/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmDS.dml
index 9ed910d5cbf..63d67ff2305 100644
--- a/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmDS.dml
+++ b/src/test/scripts/functions/compress/workload/WorkloadAnalysisLmDS.dml
@@ -30,7 +30,7 @@ print("LMDS")
 
 X = scale(X=X, scale=TRUE, center=TRUE);
 B = lmDS(X=X, y=y, verbose=TRUE);
-p = lmPredict(X=X, B=B, ytest=y, verbose=TRUE);
+p = lmPredict(X=X, B=B, Ytest=y, verbose=TRUE);
 
 classifications = (p > 0.1)
 
diff --git a/src/test/scripts/functions/compress/workload/WorkloadAnalysisMLogReg.dml b/src/test/scripts/functions/compress/workload/WorkloadAnalysisMLogReg.dml
index d42750683a5..2ec904a908d 100644
--- a/src/test/scripts/functions/compress/workload/WorkloadAnalysisMLogReg.dml
+++ b/src/test/scripts/functions/compress/workload/WorkloadAnalysisMLogReg.dml
@@ -26,7 +26,7 @@ print("")
 print("MLogReg")
 
 X = scale(X=X, scale=TRUE, center=TRUE);
-B = multiLogReg(X=X, Y=Y, verbose=FALSE, maxi=3, maxii=2, icpt=0);
+B = multiLogReg(X=X, Y=Y, verbose=FALSE, maxIter=3, maxInnerIter=2, intercept=0);
 
 [nn, P, acc] = multiLogRegPredict(X=X, B=B, Y=Y)
 [nn, C] = confusionMatrix(P, Y)
diff --git a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
index 1b386034683..40cbdb0b1eb 100644
--- a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
+++ b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
@@ -28,7 +28,7 @@ reg = $in_lambda;
 maxi = $in_maxi;
 thr = $in_thr;
 
-[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi, check = TRUE, thr = thr);
+[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxIter = maxi, check = TRUE, threshold = thr);
 
 Z = U %*% V;
 
diff --git a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
index 64ef02469e3..5ff2492cd43 100644
--- a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
@@ -27,7 +27,7 @@ reg = $in_lambda;
 maxi = $in_maxi;
 thr = $in_thr;
 
-[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi, check = TRUE, thr = thr);
+[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxIter = maxi, check = TRUE, threshold = thr);
 
 Z = U %*% V;
 
diff --git a/src/test/scripts/functions/federated/FederatedGLMTest.dml b/src/test/scripts/functions/federated/FederatedGLMTest.dml
index 6c349bd8731..c73b9e67349 100644
--- a/src/test/scripts/functions/federated/FederatedGLMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedGLMTest.dml
@@ -27,5 +27,5 @@ if ($rP) {
 }
 Y = read($in_Y)
 
-model = glm(X=X, Y=Y, icpt = FALSE, tol = 1e-6, reg = 0.01)
+model = glm(X=X, Y=Y, intercept = FALSE, tol = 1e-6, reg = 0.01)
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedGLMTestReference.dml b/src/test/scripts/functions/federated/FederatedGLMTestReference.dml
index fe815a4b766..d7dd9b4aa3d 100644
--- a/src/test/scripts/functions/federated/FederatedGLMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedGLMTestReference.dml
@@ -25,5 +25,5 @@ if ($4) {
   X = cbind(read($1), read($2))
 }
 Y = read($3)
-model = glm(X=X, Y=Y, icpt = FALSE, tol = 1e-6, reg = 0.01)
+model = glm(X=X, Y=Y, intercept = FALSE, tol = 1e-6, reg = 0.01)
 write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedKmeansTest.dml b/src/test/scripts/functions/federated/FederatedKmeansTest.dml
index 017ac518696..778082e5f4a 100644
--- a/src/test/scripts/functions/federated/FederatedKmeansTest.dml
+++ b/src/test/scripts/functions/federated/FederatedKmeansTest.dml
@@ -25,6 +25,6 @@ else
   X = federated(addresses=list($in_X1, $in_X2),
     ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols)))
 
-[C,Y] = kmeans(X=X, k=4, runs=$runs, max_iter=150)
+[C,Y] = kmeans(X=X, k=4, runs=$runs, maxIter=150)
 
 write(C, $out)
diff --git a/src/test/scripts/functions/federated/FederatedKmeansTestReference.dml b/src/test/scripts/functions/federated/FederatedKmeansTestReference.dml
index 3046eaedc42..c4d3b5509bf 100644
--- a/src/test/scripts/functions/federated/FederatedKmeansTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedKmeansTestReference.dml
@@ -24,6 +24,6 @@ if( $3 )
 else
   X = rbind(read($1), read($2))
 
-[C,Y] = kmeans(X=X, k=4, runs=$4, max_iter=150)
+[C,Y] = kmeans(X=X, k=4, runs=$4, maxIter=150)
 
 write(C, $5)
diff --git a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
index e31a08d29fe..05f076fde9e 100644
--- a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
@@ -30,6 +30,6 @@ else {
     ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols)))
 }
 
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
index e5e428abe68..561314f379e 100644
--- a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
@@ -27,6 +27,6 @@ if( $4 ) {
 else
   X = rbind(read($1), read($2))
 
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 
 write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedLmPipeline.dml b/src/test/scripts/functions/federated/FederatedLmPipeline.dml
index 957e630f6bb..1d0933e2935 100644
--- a/src/test/scripts/functions/federated/FederatedLmPipeline.dml
+++ b/src/test/scripts/functions/federated/FederatedLmPipeline.dml
@@ -44,10 +44,10 @@ X = scale(X=X, center=TRUE, scale=TRUE);
 [Xtrain , Xtest, ytrain, ytest] = split(X=X, Y=y, cont=$cont, seed=7)
 
 # train regression model
-B = lm(X=Xtrain, y=ytrain, icpt=1, reg=1e-3, tol=1e-9, verbose=TRUE)
+B = lm(X=Xtrain, y=ytrain, intercept=1, reg=1e-3, tol=1e-9, verbose=TRUE)
 
 # model evaluation on test split
-yhat = lmPredict(X=Xtest, B=B, icpt=1, ytest=ytest, verbose=TRUE);
+yhat = lmPredict(X=Xtest, B=B, intercept=1, Ytest=ytest, verbose=TRUE);
 
 # write trained model and meta data
 write(B, $out)
diff --git a/src/test/scripts/functions/federated/FederatedLmPipeline4Workers.dml b/src/test/scripts/functions/federated/FederatedLmPipeline4Workers.dml
index 0dfdefbe8ff..7e78a005724 100644
--- a/src/test/scripts/functions/federated/FederatedLmPipeline4Workers.dml
+++ b/src/test/scripts/functions/federated/FederatedLmPipeline4Workers.dml
@@ -46,10 +46,10 @@ X = scale(X=X, center=TRUE, scale=TRUE);
 [Xtrain , Xtest, ytrain, ytest] = split(X=X, Y=y, cont=$cont, seed=7)
 
 # train regression model
-B = lm(X=Xtrain, y=ytrain, icpt=1, reg=1e-3, tol=1e-9, verbose=TRUE)
+B = lm(X=Xtrain, y=ytrain, intercept=1, reg=1e-3, tol=1e-9, verbose=TRUE)
 
 # model evaluation on test split
-yhat = lmPredict(X=Xtest, B=B, icpt=1, ytest=ytest, verbose=TRUE);
+yhat = lmPredict(X=Xtest, B=B, intercept=1, Ytest=ytest, verbose=TRUE);
 
 # write trained model and meta data
 write(B, $out)
diff --git a/src/test/scripts/functions/federated/FederatedLmPipeline4WorkersReference.dml b/src/test/scripts/functions/federated/FederatedLmPipeline4WorkersReference.dml
index 62e1642a19d..5a1bfcad7e9 100644
--- a/src/test/scripts/functions/federated/FederatedLmPipeline4WorkersReference.dml
+++ b/src/test/scripts/functions/federated/FederatedLmPipeline4WorkersReference.dml
@@ -44,10 +44,10 @@ X = scale(X=X, center=TRUE, scale=TRUE);
 [Xtrain , Xtest, ytrain, ytest] = split(X=X, Y=y, cont=$6, seed=7)
 
 # train regression model
-B = lm(X=Xtrain, y=ytrain, icpt=1, reg=1e-3, tol=1e-9, verbose=TRUE)
+B = lm(X=Xtrain, y=ytrain, intercept=1, reg=1e-3, tol=1e-9, verbose=TRUE)
 
 # model evaluation on test split
-yhat = lmPredict(X=Xtest, B=B, icpt=1, ytest=ytest, verbose=TRUE);
+yhat = lmPredict(X=Xtest, B=B, intercept=1, Ytest=ytest, verbose=TRUE);
 
 # write trained model and meta data
 write(B, $7)
diff --git a/src/test/scripts/functions/federated/FederatedLmPipelineReference.dml b/src/test/scripts/functions/federated/FederatedLmPipelineReference.dml
index 21ee4634886..e6abddd21d5 100644
--- a/src/test/scripts/functions/federated/FederatedLmPipelineReference.dml
+++ b/src/test/scripts/functions/federated/FederatedLmPipelineReference.dml
@@ -44,10 +44,10 @@ X = scale(X=X, center=TRUE, scale=TRUE);
 [Xtrain , Xtest, ytrain, ytest] = split(X=X, Y=y, cont=$6, seed=7)
 
 # train regression model
-B = lm(X=Xtrain, y=ytrain, icpt=1, reg=1e-3, tol=1e-9, verbose=TRUE)
+B = lm(X=Xtrain, y=ytrain, intercept=1, reg=1e-3, tol=1e-9, verbose=TRUE)
 
 # model evaluation on test split
-yhat = lmPredict(X=Xtest, B=B, icpt=1, ytest=ytest, verbose=TRUE);
+yhat = lmPredict(X=Xtest, B=B, intercept=1, Ytest=ytest, verbose=TRUE);
 
 # write trained model and meta data
 write(B, $7)
diff --git a/src/test/scripts/functions/federated/FederatedLogRegTest.dml b/src/test/scripts/functions/federated/FederatedLogRegTest.dml
index a3d1f8951e0..fd26fe017b0 100644
--- a/src/test/scripts/functions/federated/FederatedLogRegTest.dml
+++ b/src/test/scripts/functions/federated/FederatedLogRegTest.dml
@@ -22,5 +22,5 @@
 X = federated(addresses=list($in_X1, $in_X2),
     ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols)))
 Y = read($in_Y)
-model = multiLogReg(X=X,  Y=Y, tol=1e-5, maxi=30, icpt = 0)
+model = multiLogReg(X=X,  Y=Y, tol=1e-5, maxIter=30, intercept = 0)
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedLogRegTestReference.dml b/src/test/scripts/functions/federated/FederatedLogRegTestReference.dml
index 5e9d1329437..2b9136d40fa 100644
--- a/src/test/scripts/functions/federated/FederatedLogRegTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedLogRegTestReference.dml
@@ -21,5 +21,5 @@
 
 X = rbind(read($1), read($2))
 Y = read($3)
-model = multiLogReg(X=X,  Y=Y, tol=1e-5, maxi=30,  icpt = 0)
+model = multiLogReg(X=X,  Y=Y, tol=1e-5, maxIter=30,  intercept = 0)
 write(model, $4)
diff --git a/src/test/scripts/functions/federated/FederatedMSVMTest.dml b/src/test/scripts/functions/federated/FederatedMSVMTest.dml
index b44dd727e15..b5bf9166ab4 100644
--- a/src/test/scripts/functions/federated/FederatedMSVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedMSVMTest.dml
@@ -30,6 +30,6 @@ else {
     ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols)))
 }
 
-model = msvm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100, verbose = FALSE)
+model = msvm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100, verbose = FALSE)
 
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
index e4da0f346a6..506dc8bb607 100644
--- a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
@@ -27,6 +27,6 @@ if( $4 ) {
 else
   X = rbind(read($1), read($2))
 
-model = msvm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100, verbose = FALSE)
+model = msvm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100, verbose = FALSE)
 
 write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedPNMFTest.dml b/src/test/scripts/functions/federated/FederatedPNMFTest.dml
index e8b01c93ac1..9b3a13dadfc 100644
--- a/src/test/scripts/functions/federated/FederatedPNMFTest.dml
+++ b/src/test/scripts/functions/federated/FederatedPNMFTest.dml
@@ -25,7 +25,7 @@ X = federated(addresses=list($in_X1, $in_X2),
 rank = $in_rank;
 max_iter = $in_max_iter;
 
-[W, H] = pnmf(X = X, rnk = rank, maxi = max_iter);
+[W, H] = pnmf(X = X, rank = rank, maxIter = max_iter);
 
 Z = W %*% H;
 
diff --git a/src/test/scripts/functions/federated/FederatedPNMFTestReference.dml b/src/test/scripts/functions/federated/FederatedPNMFTestReference.dml
index b501cf924c4..6b385697c02 100644
--- a/src/test/scripts/functions/federated/FederatedPNMFTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedPNMFTestReference.dml
@@ -24,7 +24,7 @@ X = rbind(read($in_X1), read($in_X2));
 rank = $in_rank;
 max_iter = $in_max_iter;
 
-[W, H] = pnmf(X = X, rnk = rank, maxi = max_iter);
+[W, H] = pnmf(X = X, rank = rank, maxIter = max_iter);
 
 Z = W %*% H;
 
diff --git a/src/test/scripts/functions/federated/FederatedUnivarTest.dml b/src/test/scripts/functions/federated/FederatedUnivarTest.dml
index 443c0c157e2..589668aa550 100644
--- a/src/test/scripts/functions/federated/FederatedUnivarTest.dml
+++ b/src/test/scripts/functions/federated/FederatedUnivarTest.dml
@@ -26,6 +26,6 @@
 X = federated(addresses=list($in_X1, $in_X2, $in_X3, $in_X4),
     ranges=list(list(0, 0), list($rows, $cols/4), list(0,$cols/4), list($rows, $cols/2), list(0,$cols/2), list($rows, 3*($cols/4)), list(0, 3*($cols/4)), list($rows, $cols)));
 Y = read($in_Y);
-B = univar(X=X, types=Y);
+B = univar(X=X, ctypes=Y);
 write(B, $out);
 
diff --git a/src/test/scripts/functions/federated/FederatedUnivarTestReference.dml b/src/test/scripts/functions/federated/FederatedUnivarTestReference.dml
index 46072e7e6a8..34c0ce383a2 100644
--- a/src/test/scripts/functions/federated/FederatedUnivarTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedUnivarTestReference.dml
@@ -22,5 +22,5 @@
 #X = rbind(read($1), read($2), read($3), read($4));
 X = cbind(read($1), read($2), read($3), read($4));
 types = read($5);
-B = univar(X=X, types=types);
+B = univar(X=X, ctypes=types);
 write(B, $6);
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
index a56619f1aae..c7789108956 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
@@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1, $in_X2),
     ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols)))
 Y = federated(addresses=list($in_Y1, $in_Y2),
     ranges=list(list(0, 0), list($rows / 2, 1), list($rows / 2, 0), list($rows, 1)))
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
index aa73e3a463c..e0b7d134c1b 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
@@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1),
     ranges=list(list(0, 0), list($rows / 2, $cols)))
 Y = federated(addresses=list($in_Y1),
     ranges=list(list(0, 0), list($rows / 2, 1)))
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
index 03c2b5f0bfe..d26b7350c1d 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
@@ -21,5 +21,5 @@
 
 X = read($1)
 Y = read($3)
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
index 26233a3ab4c..6fc890bed34 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
@@ -21,5 +21,5 @@
 
 X = rbind(read($1), read($2))
 Y = rbind(read($3), read($4))
-model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100)
+model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = 1e-12, reg = 1, maxIter = 100)
 write(model, $5)
diff --git a/src/test/scripts/functions/lineage/FedLmPipelineReuse.dml b/src/test/scripts/functions/lineage/FedLmPipelineReuse.dml
index 3db669be9f3..1a2ddf7246f 100644
--- a/src/test/scripts/functions/lineage/FedLmPipelineReuse.dml
+++ b/src/test/scripts/functions/lineage/FedLmPipelineReuse.dml
@@ -46,12 +46,12 @@ X = scale(X=X, center=TRUE, scale=TRUE);
 # train regression model with different hyperparameters
 for (i in 1:10) {
   reg = 1e-3 + (0 * 0.001);
-  B = lm(X=Xtrain, y=ytrain, icpt=1, reg=reg, tol=1e-9, verbose=TRUE);
+  B = lm(X=Xtrain, y=ytrain, intercept=1, reg=reg, tol=1e-9, verbose=TRUE);
   # TODO: find the best beta
 }
 
 # model evaluation on test split
-yhat = lmPredict(X=Xtest, B=B, icpt=1, ytest=ytest, verbose=TRUE);
+yhat = lmPredict(X=Xtest, B=B, intercept=1, Ytest=ytest, verbose=TRUE);
 
 # write trained model and meta data
 write(B, $out)
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg1.dml b/src/test/scripts/functions/lineage/LineageReuseAlg1.dml
index f87ac4071ac..33cdd3e595f 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg1.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg1.dml
@@ -23,7 +23,7 @@ X = rand(rows=100, cols=10, sparsity=1.0, seed=1);
 y = X %*% rand(rows=10, cols=1, sparsity=1.0, seed=1);
 R = matrix(0, 101, 2);
 
-[C, S] = steplm(X=X, y=y, icpt=2);
+[C, S] = steplm(X=X, y=y, intercept=2);
 S = cbind(S, matrix(1, 1, 1));
 R[1:nrow(C) ,1] = C;
 R[1:ncol(S) ,2] = t(S);
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg2.dml b/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
index d838e5b63bf..950919c9684 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
@@ -49,7 +49,7 @@ for (i in 1:5)
         reg = 10^h1;
         icpt = h2;
         tol = 10^h3;
-        beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
+        beta = lm(X=Xi, y=y, intercept=icpt, reg=reg, tol=tol, maxIter=0, verbose=FALSE);
         Rbeta[k, 1:nrow(beta)] = t(beta);
         Rloss[k,] = l2norm(Xi, y, beta, icpt);
         k = k + 1;
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg3.dml b/src/test/scripts/functions/lineage/LineageReuseAlg3.dml
index 17e6b6d8dfd..f78bdddf0a3 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg3.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg3.dml
@@ -24,9 +24,9 @@ findBetas = function(Matrix[double] X, Matrix[double] y)
 {
   R = matrix(0, rows=10*(ncol(X)+1), cols=5);
   for (lamda in 20:39) {
-    #betas = multiLogReg(X=X, Y=y, maxii=0, verbose=FALSE);
-    betas = multiLogReg(X=X, Y=y, icpt=2, tol=0.000001,
-                        reg=lamda, maxi=100, maxii=0, verbose=FALSE);
+    #betas = multiLogReg(X=X, Y=y, maxInnerIter=0, verbose=FALSE);
+    betas = multiLogReg(X=X, Y=y, intercept=2, tol=0.000001,
+                        reg=lamda, maxIter=100, maxInnerIter=0, verbose=FALSE);
     R[1:ncol(X)+1,] = betas;
   }
   all_betas = R;
@@ -39,9 +39,9 @@ findIcpt = function(Matrix[double] X, Matrix[double] y)
   for (lamda in 20:29) {
     for (icpt in 1:2) {
       #Function level reuse of 3 out of 6 calls.
-      betas = multiLogReg(X=X, Y=y, icpt=icpt, tol=0.000001,
-                          reg=lamda, maxi=100, maxii=0, verbose=FALSE);
-      #betas = multiLogReg(X=X, Y=y, icpt=icpt, maxii=0, verbose=FALSE);
+      betas = multiLogReg(X=X, Y=y, intercept=icpt, tol=0.000001,
+                          reg=lamda, maxIter=100, maxInnerIter=0, verbose=FALSE);
+      #betas = multiLogReg(X=X, Y=y, intercept=icpt, maxInnerIter=0, verbose=FALSE);
       R[1:ncol(X)+1,] = betas;
     }
   }
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
index 660c705f28b..13194335f0f 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
@@ -42,13 +42,13 @@ y = ceil(y);
 
 for (l in 1:no_lamda)
 {
-  beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
+  beta = l2svm(X=X, Y=y, intercept=FALSE, tol=1e-12,
       reg = reg, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, FALSE);
   i = i + 1;
 
-  beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
+  beta = l2svm(X=X, Y=y, intercept=TRUE, tol=1e-12,
       reg = reg, verbose=FALSE);
   Rbeta[1:nrow(beta),i] = beta;
   Rloss[i,] = l2norm(X, y, beta, TRUE);
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg6.dml b/src/test/scripts/functions/lineage/LineageReuseAlg6.dml
index 5dc64b03b0f..cb38423078b 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg6.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg6.dml
@@ -88,8 +88,8 @@ Kc = floor(ncol(A) * 0.8);
 
 for (i in 1:10) {
   newA1 = PCA(A=A, K=Kc+i);
-  beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
-  y_predict1 = lmPredict(X=newA1, B=beta1, ytest=matrix(0,1,1), icpt=1);
+  beta1 = lm(X=newA1, y=y, intercept=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmPredict(X=newA1, B=beta1, Ytest=matrix(0,1,1), intercept=1);
   R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
   R[,i] = R2_ad1;
 }
diff --git a/src/test/scripts/functions/lineage/LineageReuseEval1.dml b/src/test/scripts/functions/lineage/LineageReuseEval1.dml
index 00a1b390ad1..cfd69b22758 100644
--- a/src/test/scripts/functions/lineage/LineageReuseEval1.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseEval1.dml
@@ -22,7 +22,7 @@
 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B)
   return (Matrix[Double] loss)
 {
-  yhat = lmPredict(X=X, B=B, ytest=y)
+  yhat = lmPredict(X=X, B=B, Ytest=y)
   loss = as.matrix(sum((y - yhat)^2));
 }
 
@@ -35,7 +35,7 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-params = list("icpt","reg", "tol"); #numValues = 3, 5, 7
+params = list("intercept","reg", "tol"); #numValues = 3, 5, 7
 paramRanges = list(seq(0,2), 10^seq(0,-4), 10^seq(-6,-12)); #3*5*7 = 105
 [B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
   numB=ncol(X)+1, params=params, paramValues=paramRanges, verbose=FALSE);
diff --git a/src/test/scripts/functions/lineage/LineageReuseEval2.dml b/src/test/scripts/functions/lineage/LineageReuseEval2.dml
index 2263d77944f..96177fb19bc 100644
--- a/src/test/scripts/functions/lineage/LineageReuseEval2.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseEval2.dml
@@ -34,9 +34,9 @@ ytrain = y[1:N,];
 Xtest = X[(N+1):nrow(X),];
 ytest = y[(N+1):nrow(X),];
 
-params = list("icpt", "reg", "maxii");
+params = list("intercept", "reg", "maxInnerIter");
 paramRanges = list(seq(0,2),10^seq(1,-6), 10^seq(1,3));
-trainArgs = list(X=Xtrain, Y=ytrain, icpt=-1, reg=-1, tol=1e-9, maxi=100, maxii=-1, verbose=FALSE);
+trainArgs = list(X=Xtrain, Y=ytrain, intercept=-1, reg=-1, tol=1e-9, maxIter=100, maxInnerIter=-1, verbose=FALSE);
 [B1,opt] = gridSearch(X=Xtrain, y=ytrain, train="multiLogReg", predict="accuracy", numB=ncol(X)+1,
   params=params, paramValues=paramRanges, trainArgs=trainArgs, verbose=FALSE);
 
diff --git a/src/test/scripts/functions/lineage/LineageReuseGPU4.dml b/src/test/scripts/functions/lineage/LineageReuseGPU4.dml
index 0f4de81b34d..35028b52b28 100644
--- a/src/test/scripts/functions/lineage/LineageReuseGPU4.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseGPU4.dml
@@ -48,7 +48,7 @@ for (i in 1:5)
         reg = 10^h1;
         icpt = h2;
         tol = 10^h3;
-        beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
+        beta = lm(X=Xi, y=y, intercept=icpt, reg=reg, tol=tol, maxIter=0, verbose=FALSE);
         Rbeta[k, 1:nrow(beta)] = t(beta);
         Rloss[k,] = l2norm(Xi, y, beta, icpt);
         k = k + 1;
diff --git a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
index ebd360b7316..269918d483e 100644
--- a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
+++ b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
@@ -43,7 +43,7 @@ msvm2 = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
   parfor(class in 1:max(Y), opt=CONSTRAINED, par=4, mode=REMOTE_SPARK) {
     Y_local = 2 * (Y == class) - 1
     w[,class] = l2svm(X=X, Y=Y_local, intercept=intercept,
-        epsilon=epsilon, reg=reg, maxIterations=maxIterations, 
+        tol=epsilon, reg=reg, maxIter=maxIterations,
         verbose= verbose, columnId=class)
   }
   
diff --git a/src/test/scripts/functions/misc/FunPotpourriParforEvalBuiltin.dml b/src/test/scripts/functions/misc/FunPotpourriParforEvalBuiltin.dml
index ab8991ea66b..89ccfe375f3 100644
--- a/src/test/scripts/functions/misc/FunPotpourriParforEvalBuiltin.dml
+++ b/src/test/scripts/functions/misc/FunPotpourriParforEvalBuiltin.dml
@@ -57,10 +57,10 @@ crossV = function(Matrix[double] X, Matrix[double] y, Integer k, Matrix[Double]
     trainy = trainset[, 1]
     testX = testset[, 2:ncol(testset)]
     testy = testset[, 1]
-    beta = multiLogReg(X=trainX, Y=trainy, icpt=as.scalar(MLhp[1,1]), reg=as.scalar(MLhp[1,2]), tol=as.scalar(MLhp[1,3]), 
-      maxi=as.scalar(MLhp[1,4]), maxii=50, verbose=FALSE);
+    beta = multiLogReg(X=trainX, Y=trainy, intercept=as.scalar(MLhp[1,1]), reg=as.scalar(MLhp[1,2]), tol=as.scalar(MLhp[1,3]),
+      maxIter=as.scalar(MLhp[1,4]), maxInnerIter=50, verbose=FALSE);
     [prob, yhat, acc] = multiLogRegPredict(testX, beta, testy, FALSE)
-    accuracy = getAccuracy(testy, yhat, isWeighted)
+    accuracy = getAccuracy(yhat, testy, isWeighted)
     accuracyMatrix[i] = accuracy
   }
 }
diff --git a/src/test/scripts/functions/parfor/parfor_eval_local.dml b/src/test/scripts/functions/parfor/parfor_eval_local.dml
index bda33a76798..17812a8ea8b 100644
--- a/src/test/scripts/functions/parfor/parfor_eval_local.dml
+++ b/src/test/scripts/functions/parfor/parfor_eval_local.dml
@@ -25,5 +25,5 @@ parfor(i in 1:10, opt=CONSTRAINED, mode=LOCAL) {
   if( i>5 )
     s = "outlierByIQR";
   print(toString(sum(eval(s,
-    list(X=X, k=1.5, repairMethod=1, max_iterations=0, verbose=FALSE)))));
+    list(X=X, k=1.5, repairMethod=1, maxIter=0, verbose=FALSE)))));
 }
diff --git a/src/test/scripts/functions/parfor/parfor_eval_remote.dml b/src/test/scripts/functions/parfor/parfor_eval_remote.dml
index c6bf05e835e..cb59c96eaf6 100644
--- a/src/test/scripts/functions/parfor/parfor_eval_remote.dml
+++ b/src/test/scripts/functions/parfor/parfor_eval_remote.dml
@@ -25,5 +25,5 @@ parfor(i in 1:10, opt=CONSTRAINED, mode=REMOTE_SPARK) {
   if( i>5 )
     s = "outlierByIQR";
   print(toString(sum(eval(s,
-    list(X=X, k=1.5, repairMethod=1, max_iterations=0, verbose=FALSE)))));
+    list(X=X, k=1.5, repairMethod=1, maxIter=0, verbose=FALSE)))));
 }
diff --git a/src/test/scripts/functions/parfor/parfor_eval_remote2.dml b/src/test/scripts/functions/parfor/parfor_eval_remote2.dml
index cb0c60fce31..a6cb4ea03fd 100644
--- a/src/test/scripts/functions/parfor/parfor_eval_remote2.dml
+++ b/src/test/scripts/functions/parfor/parfor_eval_remote2.dml
@@ -26,7 +26,7 @@ parfor(i in 1:10, opt=CONSTRAINED, mode=REMOTE_SPARK) {
   if( i>5 )
     s = "outlierByIQR";
   print(toString(sum(eval(s,
-    list(X=X, k=1.5, repairMethod=1, max_iterations=0, verbose=FALSE)))));
+    list(X=X, k=1.5, repairMethod=1, maxIter=0, verbose=FALSE)))));
 }
 
 parfor(i2 in 1:10, opt=CONSTRAINED, mode=REMOTE_SPARK) {
@@ -34,5 +34,5 @@ parfor(i2 in 1:10, opt=CONSTRAINED, mode=REMOTE_SPARK) {
   if( i2>5 )
     s2 = "outlierByIQR";
   print(toString(sum(eval(s2,
-    list(X=X, k=1.5, repairMethod=1, max_iterations=0, verbose=FALSE)))));
+    list(X=X, k=1.5, repairMethod=1, maxIter=0, verbose=FALSE)))));
 }
diff --git a/src/test/scripts/functions/pipelines/fit_pipelineTest.dml b/src/test/scripts/functions/pipelines/fit_pipelineTest.dml
index 4a9b12addcc..c07c77a652e 100644
--- a/src/test/scripts/functions/pipelines/fit_pipelineTest.dml
+++ b/src/test/scripts/functions/pipelines/fit_pipelineTest.dml
@@ -92,7 +92,7 @@ return(Matrix[Double] output, Matrix[Double] error)
     nc = max(Y);
     params = list("icpt", "reg", "tol", "maxi")
     paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
-    trainArgs = list(X=X, y=Y, icpt=-1, reg=-1, tol=1e-9, maxi=100, maxii=-1, verbose=FALSE);
+    trainArgs = list(X=X, y=Y, intercept=-1, reg=-1, tol=1e-9, maxIter=100, maxInnerIter=-1, verbose=FALSE);
     [B1,opt] = gridSearch(X=X, y=Y, train="multiLogReg", predict="accuracy", numB=(ncol(X)+1)*(nc-1),
       params=params, paramValues=paramRanges, trainArgs=trainArgs, cv=TRUE, cv=3, verbose=FALSE);
     evalFunHp = as.matrix(opt) 
@@ -103,11 +103,11 @@ return(Matrix[Double] output, Matrix[Double] error)
     a = 0
   }
   else {
-    beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-      maxi=1000, maxii=0, verbose=FALSE);
+    beta = multiLogReg(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
+      maxIter=1000, maxInnerIter=0, verbose=FALSE);
     [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
     error = yhat != Ytest
-    a = getAccuracy(Ytest, yhat, TRUE)
+    a = getAccuracy(yhat, Ytest, TRUE)
     accuracy = as.matrix(accuracy)
     print("accuracy: "+toString(accuracy))
   }
diff --git a/src/test/scripts/functions/pipelines/topkLogicalTest.dml b/src/test/scripts/functions/pipelines/topkLogicalTest.dml
index 3c6e70cd7bb..13b5dba166e 100644
--- a/src/test/scripts/functions/pipelines/topkLogicalTest.dml
+++ b/src/test/scripts/functions/pipelines/topkLogicalTest.dml
@@ -111,10 +111,10 @@ evalML = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matr
   
 return(Matrix[Double] accuracy)
 {
-  beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-    maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
+  beta = multiLogReg(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
+    maxIter=as.scalar(evalFunHp[1,4]), maxInnerIter=50, verbose=FALSE);
   [prob, yhat, a] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
-  # accuracy = getAccuracy(Ytest, yhat, FALSE)
+  # accuracy = getAccuracy(yhat, Ytest, FALSE)
   print("accuracy weighted: "+a)
   accuracy = as.matrix(a)
 }
diff --git a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
index 84549f199d9..b15b1ec3c33 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
@@ -58,8 +58,8 @@ metaInfo = metaInfo[, 2:ncol(metaInfo)]
 
 [topKPipelines, topKHyperParams, topKScores, baseLineScore, evalFunHp, applyFunc] = topk_cleaning(dataTrain=trainData, dataTest=testData, metaData=metaInfo, primitives=primitives, parameters=param, 
   refSol = frame(["imputeByMean", "scale", "dummycoding"], rows=1, cols=3),
-  evaluationFunc=evalFunc, evalFunHp=as.matrix(NaN),topK=topK, resource_val=resources, enablePruning=TRUE,
-  expectedIncrease=expectedIncrease, seed = 23, max_iter=max_iter, cv=testCV, cvk=cvk, sample=sample, isLastLabel=TRUE, correctTypos=FALSE) 
+  evaluationFunc=evalFunc, evalFunHp=as.matrix(NaN),topK=topK, resourceVal=resources, enablePruning=TRUE,
+  expectedIncrease=expectedIncrease, seed = 23, maxIter=max_iter, cv=testCV, cvk=cvk, sample=sample, isLastLabel=TRUE, correctTypos=FALSE)
 
 write(topKPipelines, output+"/pip.csv", format="csv")
 write(topKHyperParams, output+"/hp.csv", format="csv")
@@ -82,7 +82,7 @@ return(Matrix[Double] output, Matrix[Double] error)
     nc = max(Y);
     params = list("icpt", "reg", "tol")
     paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5));
-    trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=1e-9, maxi=1000, maxii=-1, verbose=FALSE);
+    trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, tol=1e-9, maxIter=1000, maxInnerIter=-1, verbose=FALSE);
     dataArgs = list("X", "Y");
     # [B1,opt] = gridSearch(X=X, y=Y, train="multiLogReg", predict="accuracy", numB=(ncol(X)+1)*(nc-1),
       # params=params, paramValues=paramRanges, dataArgs=dataArgs, trainArgs=trainArgs, cv=TRUE, cvk=3, verbose=TRUE);
@@ -96,8 +96,8 @@ return(Matrix[Double] output, Matrix[Double] error)
     a = 0
   }
   else {
-    beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-      maxi=1000, maxii=0, verbose=FALSE);
+    beta = multiLogReg(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
+      maxIter=1000, maxInnerIter=0, verbose=FALSE);
     [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
     error = yhat != Ytest
     accuracy = as.matrix(accuracy)
@@ -118,9 +118,9 @@ return(Matrix[Double] output, Matrix[Double] error)
   if(is.na(as.scalar(evalFunHp[1,1])))
   {
     nc = max(Y);
-    params = list("intercept", "reg", "epsilon")
+    params = list("intercept", "reg", "tol")
     paramRanges = list(seq(0, 1), 10^seq(1,-3), 10^seq(1,-5));
-    trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, epsilon=-1, maxIterations=1000,  verbose=FALSE);
+    trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, tol=-1, maxIterations=1000,  verbose=FALSE);
     dataArgs = list("X", "Y");
     [B1,opt] = gridSearch(X=X, y=Y, train="msvm", predict="accuracyMSVM", numB=(ncol(X)+1)*(nc),
       params=params, paramValues=paramRanges, dataArgs=dataArgs, trainArgs=trainArgs, cv=TRUE, cvk=3, verbose=FALSE);
@@ -134,8 +134,8 @@ return(Matrix[Double] output, Matrix[Double] error)
     a = 0
   }
   else {
-    beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), epsilon=as.scalar(evalFunHp[1,3]), 
-      maxIterations=1000, verbose=FALSE);
+    beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
+      maxIter=1000, verbose=FALSE);
     yhat = msvmPredict(X=Xtest, W=beta);
     yhat = rowIndexMax(yhat)
     accuracy = mean(yhat == Ytest) * 100
diff --git a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
index 6a13253e08a..f3f782529ac 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
@@ -50,7 +50,7 @@ else {
 #matrix("1 1e-6 1e-9 1000", rows=1, cols=4)
 [topKPipelines, topKHyperParams, topKScores, baseLineScore, evalFunHp, applyFunc] = topk_cleaning(dataTrain=trainData, dataTest=testData,
   primitives=primitives, parameters=param, evaluationFunc=evalFunc, evalFunHp=as.matrix(NaN),
-  topK=topK, resource_val=resources, cv=testCV, cvk=cvk, sample=sample, isLastLabel=TRUE, correctTypos=FALSE)
+  topK=topK, resourceVal=resources, cv=testCV, cvk=cvk, sample=sample, isLastLabel=TRUE, correctTypos=FALSE)
 
 write(topKPipelines, output+"/pip.csv", format="csv")
 write(topKHyperParams, output+"/hp.csv", format="csv")
@@ -71,15 +71,15 @@ return(Matrix[Double] output)
   if(is.na(as.scalar(evalFunHp[1,1])))
   {
     # do the gridsearch for hyper-parameters
-    lArgs=list(X=X, y=Y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
-    params = list("icpt","reg", "tol");
+    lArgs=list(X=X, y=Y, intercept=0, reg=-1, tol=-1, maxIter=-1, verbose=FALSE);
+    params = list("intercept","reg", "tol");
     paramRanges = list(seq(0,2,1), 10^seq(0,-4), 10^seq(-6,-12));
     [B1, opt] = gridSearch(X=X, y=Y, train="lm", predict="wmape", trainArgs=lArgs,
       numB=ncol(X)+1, params=params, paramValues=paramRanges, cv=TRUE, cvk=3, verbose=FALSE);
     evalFunHp = as.matrix(opt)
   }
-  beta = lm(X=X, y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
-    maxi=1000, verbose=FALSE);
+  beta = lm(X=X, y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]),
+    tol=as.scalar(evalFunHp[1,3]), maxIter=1000, verbose=FALSE);
 
   acc = wmape(Xtest, Ytest, beta)
   accuracy = (1 - acc)
@@ -88,14 +88,14 @@ return(Matrix[Double] output)
 
 # wmape = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return (Matrix[Double] loss) {
   # # loss = as.matrix(sum((y - X%*%B)^2));
-  # pred = lmPredict(X=X, B=B, ytest=y);
+  # pred = lmPredict(X=X, B=B, Ytest=y);
   # WMAPE = sum(abs(y - pred))/sum(abs(y)) #this will give the lose into range of [0,1]
   # loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
 # }
 
 wmape = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return (Matrix[Double] loss) {
   # loss = as.matrix(sum((y - X%*%B)^2));
-  pred = lmPredict(X=X, B=B, ytest=y, verbose=FALSE);
+  pred = lmPredict(X=X, B=B, Ytest=y, verbose=FALSE);
   # print("WMAPO: "+(1 - (sum(abs((pred - y)/(pred + y)))/nrow(y))))
   WMAPE = 1 - (sum(abs((pred - y)/(pred + y)))/nrow(y)) #this will give the lose into range of [0,1]
   loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
diff --git a/src/test/scripts/functions/privacy/FederatedLmCG2.dml b/src/test/scripts/functions/privacy/FederatedLmCG2.dml
index c8ff3b7bc74..820b4d4a3a7 100644
--- a/src/test/scripts/functions/privacy/FederatedLmCG2.dml
+++ b/src/test/scripts/functions/privacy/FederatedLmCG2.dml
@@ -23,6 +23,6 @@ X = federated(addresses=list($X1, $X2),
               ranges=list(list(0, 0), list($r / 2, $c), list($r / 2, 0), list($r, $c)))
 y = federated(addresses=list($y1, $y2),
               ranges=list(list(0, 0), list($r / 2, 1), list($r / 2, 0), list($r, 1)))
-C = lmCG(X = X, y = y, reg = 1e-12, maxi = 2, verbose=FALSE)
+C = lmCG(X = X, y = y, reg = 1e-12, maxIter = 2, verbose=FALSE)
 write(C, $C)
 
diff --git a/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTest.dml b/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTest.dml
index 8b259dbb99d..63514e0124b 100644
--- a/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTest.dml
+++ b/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTest.dml
@@ -22,5 +22,5 @@
 X = federated(addresses=list($X1, $X2),
         ranges=list(list(0, 0), list($r / 2, $c), list($r / 2, 0), list($r, $c)))
 
-[C, Y] = kmeans(X=X,k=4, runs=1, max_iter=120, seed=93)
+[C, Y] = kmeans(X=X,k=4, runs=1, maxIter=120, seed=93)
 write(C, $Z);
diff --git a/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTestReference.dml b/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTestReference.dml
index b2510c560e6..a1700e5d224 100644
--- a/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTestReference.dml
+++ b/src/test/scripts/functions/privacy/fedplanning/FederatedKMeansPlanningTestReference.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 
-  X = rbind(read($X1), read($X2))
-  [C, Y] = kmeans(X=X,k=4, runs=1, max_iter=120, seed=93)
-  write(C, $Z);
+X = rbind(read($X1), read($X2))
+[C, Y] = kmeans(X=X,k=4, runs=1, maxIter=120, seed=93)
+write(C, $Z);
diff --git a/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTest.dml b/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTest.dml
index 134d1b35c27..4285ba0c97f 100644
--- a/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTest.dml
+++ b/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTest.dml
@@ -31,6 +31,6 @@
   reg = 1
   maxIterations = 100
 
-  model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = epsilon, reg = reg, maxIterations = maxIterations)
+  model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = epsilon, reg = reg, maxIter = maxIterations)
 
   write(model, $Z)
diff --git a/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTestReference.dml b/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTestReference.dml
index 7fec5d2a202..1dd660a4884 100644
--- a/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTestReference.dml
+++ b/src/test/scripts/functions/privacy/fedplanning/FederatedL2SVMFunctionPlanningTestReference.dml
@@ -30,6 +30,6 @@
   reg = 1
   maxIterations = 100
 
-  model = l2svm(X=X,  Y=Y, intercept = FALSE, epsilon = epsilon, reg = reg, maxIterations = maxIterations)
+  model = l2svm(X=X,  Y=Y, intercept = FALSE, tol = epsilon, reg = reg, maxIter = maxIterations)
 
   write(model, $Z)
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
index 057069312f8..308f3ac9c1c 100644
--- a/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
+++ b/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
@@ -91,16 +91,16 @@ Kc = floor(ncol(A) * 0.8);
 
 for (i in 1:10) {
   newA1 = PCA(A=A, K=Kc+i);
-  beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
-  y_predict1 = lmPredict(X=newA1, B=beta1, ytest=matrix(0,1,1), icpt=1);
+  beta1 = lm(X=newA1, y=y, intercept=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmPredict(X=newA1, B=beta1, Ytest=matrix(0,1,1), intercept=1);
   R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
   R[,i] = R2_ad1;
 }
 
 for (i in 1:10) {
   newA3 = PCA(A=A, K=Kc+5);
-  beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE);
-  y_predict3 = lmPredict(X=newA3, B=beta3, ytest=matrix(0,1,1), icpt=1);
+  beta3 = lm(X=newA3, y=y, intercept=1, reg=0.001*i, verbose=FALSE);
+  y_predict3 = lmPredict(X=newA3, B=beta3, Ytest=matrix(0,1,1), intercept=1);
   R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1);
   R[,10+i] = R2_ad3;
 }
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
index 795f50760e3..4f1a5c01cec 100644
--- a/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
+++ b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
@@ -91,16 +91,16 @@ Kc = floor(ncol(A) * 0.8);
 
 for (i in 1:10) {
   newA1 = PCA(A=A, K=Kc+i);
-  beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
-  y_predict1 = lmPredict(X=newA1, B=beta1, ytest=matrix(0,1,1), icpt=1);
+  beta1 = lm(X=newA1, y=y, intercept=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmPredict(X=newA1, B=beta1, Ytest=matrix(0,1,1), intercept=1);
   R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
   R[,i] = R2_ad1;
 }
 
 parfor (i in 1:10) {
   newA3 = PCA(A=A, K=Kc+5);
-  beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE);
-  y_predict3 = lmPredict(X=newA3, B=beta3, ytest=matrix(0,1,1), icpt=1);
+  beta3 = lm(X=newA3, y=y, intercept=1, reg=0.001*i, verbose=FALSE);
+  y_predict3 = lmPredict(X=newA3, B=beta3, Ytest=matrix(0,1,1), intercept=1);
   R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1);
   R[,10+i] = R2_ad3;
 }