From d9933fd93826b740bf2430729bee42b7cb2b33e7 Mon Sep 17 00:00:00 2001
From: Sander Valentin <sanderbboisen@gmail.com>
Date: Wed, 23 Feb 2022 10:45:52 +0100
Subject: [PATCH 1/5] Change AbstractMatrix to AbstractVecOrMat

---
 src/classification/main.jl | 22 +++++++++++-----------
 src/classification/tree.jl |  6 +++---
 src/measures.jl            | 14 +++++++-------
 src/regression/main.jl     |  6 +++---
 src/regression/tree.jl     |  6 +++---
 src/scikitlearnAPI.jl      |  6 +++---
 src/util.jl                |  2 +-
 7 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/classification/main.jl b/src/classification/main.jl
index 52e7bdb7..2f909a78 100644
--- a/src/classification/main.jl
+++ b/src/classification/main.jl
@@ -23,7 +23,7 @@ end
 
 # Applies `row_fun(X_row)::AbstractVector` to each row in X
 # and returns a matrix containing the resulting vectors, stacked vertically
-function stack_function_results(row_fun::Function, X::AbstractMatrix)
+function stack_function_results(row_fun::Function, X::AbstractVecOrMat)
     N = size(X, 1)
     N_cols = length(row_fun(X[1, :])) # gets the number of columns
     out = Array{Float64}(undef, N, N_cols)
@@ -52,7 +52,7 @@ end
 
 function build_stump(
         labels      :: AbstractVector{T},
-        features    :: AbstractMatrix{S},
+        features    :: AbstractVecOrMat{S},
         weights      = nothing;
         rng          = Random.GLOBAL_RNG) where {S, T}
 
@@ -73,7 +73,7 @@ end
 
 function build_tree(
         labels              :: AbstractVector{T},
-        features            :: AbstractMatrix{S},
+        features            :: AbstractVecOrMat{S},
         n_subfeatures        = 0,
         max_depth            = -1,
         min_samples_leaf     = 1,
@@ -150,7 +150,7 @@ function apply_tree(tree::Node{S, T}, features::AbstractVector{S}) where {S, T}
     end
 end
 
-function apply_tree(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}) where {S, T}
+function apply_tree(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}) where {S, T}
     N = size(features,1)
     predictions = Array{T}(undef, N)
     for i in 1:N
@@ -184,12 +184,12 @@ function apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels)
     end
 end
 
-apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}, labels) where {S, T} =
+apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
     stack_function_results(row->apply_tree_proba(tree, row, labels), features)
 
 function build_forest(
         labels              :: AbstractVector{T},
-        features            :: AbstractMatrix{S},
+        features            :: AbstractVecOrMat{S},
         n_subfeatures       = -1,
         n_trees             = 10,
         partial_sampling    = 0.7,
@@ -268,7 +268,7 @@ function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where
     end
 end
 
-function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S}) where {S, T}
+function apply_forest(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}) where {S, T}
     N = size(features,1)
     predictions = Array{T}(undef, N)
     for i in 1:N
@@ -290,13 +290,13 @@ function apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVector{S},
     return compute_probabilities(labels, votes)
 end
 
-apply_forest_proba(forest::Ensemble{S, T}, features::AbstractMatrix{S}, labels) where {S, T} =
+apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
     stack_function_results(row->apply_forest_proba(forest, row, labels),
                            features)
 
 function build_adaboost_stumps(
         labels       :: AbstractVector{T},
-        features     :: AbstractMatrix{S},
+        features     :: AbstractVecOrMat{S},
         n_iterations :: Integer;
         rng           = Random.GLOBAL_RNG) where {S, T}
     N = length(labels)
@@ -339,7 +339,7 @@ function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Fl
     return top_prediction
 end
 
-function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractMatrix{S}) where {S, T}
+function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVecOrMat{S}) where {S, T}
     n_samples = size(features, 1)
     predictions = Array{T}(undef, n_samples)
     for i in 1:n_samples
@@ -363,6 +363,6 @@ function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVec
 end
 
 function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
-                                    features::AbstractMatrix{S}, labels::AbstractVector{T}) where {S, T}
+                                    features::AbstractVecOrMat{S}, labels::AbstractVector{T}) where {S, T}
     stack_function_results(row->apply_adaboost_stumps_proba(stumps, coeffs, row, labels), features)
 end
diff --git a/src/classification/tree.jl b/src/classification/tree.jl
index ce22a9b0..3dccfca7 100644
--- a/src/classification/tree.jl
+++ b/src/classification/tree.jl
@@ -43,7 +43,7 @@ module treeclassifier
     # find an optimal split that satisfy the given constraints
     # (max_depth, min_samples_split, min_purity_increase)
     function _split!(
-            X                   :: AbstractMatrix{S},   # the feature array
+            X                   :: AbstractVecOrMat{S},   # the feature array
             Y                   :: AbstractVector{Int}, # the label array
             W                   :: AbstractVector{U},   # the weight vector
             purity_function     :: Function,
@@ -226,7 +226,7 @@ module treeclassifier
     end
 
     function _fit(
-            X                     :: AbstractMatrix{S},
+            X                     :: AbstractVecOrMat{S},
             Y                     :: AbstractVector{Int},
             W                     :: AbstractVector{U},
             loss                  :: Function,
@@ -273,7 +273,7 @@ module treeclassifier
     end
 
     function fit(;
-            X                     :: AbstractMatrix{S},
+            X                     :: AbstractVecOrMat{S},
             Y                     :: AbstractVector{T},
             W                     :: Union{Nothing, AbstractVector{U}},
             loss=util.entropy     :: Function,
diff --git a/src/measures.jl b/src/measures.jl
index 06de1e18..35a62c3f 100644
--- a/src/measures.jl
+++ b/src/measures.jl
@@ -72,7 +72,7 @@ function confusion_matrix(actual::AbstractVector, predicted::AbstractVector)
     return ConfusionMatrix(classes, CM, accuracy, kappa)
 end
 
-function _nfoldCV(classifier::Symbol, labels::AbstractVector{T}, features::AbstractMatrix{S}, args...; verbose, rng) where {S, T}
+function _nfoldCV(classifier::Symbol, labels::AbstractVector{T}, features::AbstractVecOrMat{S}, args...; verbose, rng) where {S, T}
     _rng = mk_rng(rng)::Random.AbstractRNG
     nfolds = args[1]
     if nfolds < 2
@@ -151,7 +151,7 @@ end
 
 function nfoldCV_tree(
         labels              :: AbstractVector{T},
-        features            :: AbstractMatrix{S},
+        features            :: AbstractVecOrMat{S},
         n_folds             :: Integer,
         pruning_purity      :: Float64 = 1.0,
         max_depth           :: Integer = -1,
@@ -165,7 +165,7 @@ function nfoldCV_tree(
 end
 function nfoldCV_forest(
         labels              :: AbstractVector{T},
-        features            :: AbstractMatrix{S},
+        features            :: AbstractVecOrMat{S},
         n_folds             :: Integer,
         n_subfeatures       :: Integer = -1,
         n_trees             :: Integer = 10,
@@ -181,7 +181,7 @@ function nfoldCV_forest(
 end
 function nfoldCV_stumps(
         labels       ::AbstractVector{T},
-        features     ::AbstractMatrix{S},
+        features     ::AbstractVecOrMat{S},
         n_folds      ::Integer,
         n_iterations ::Integer = 10;
         verbose             :: Bool = true,
@@ -203,7 +203,7 @@ function R2(actual, predicted)
     return 1.0 - ss_residual/ss_total
 end
 
-function _nfoldCV(regressor::Symbol, labels::AbstractVector{T}, features::AbstractMatrix, args...; verbose, rng) where T <: Float64
+function _nfoldCV(regressor::Symbol, labels::AbstractVector{T}, features::AbstractVecOrMat, args...; verbose, rng) where T <: Float64
     _rng = mk_rng(rng)::Random.AbstractRNG
     nfolds = args[1]
     if nfolds < 2
@@ -279,7 +279,7 @@ end
 
 function nfoldCV_tree(
     labels              :: AbstractVector{T},
-    features            :: AbstractMatrix{S},
+    features            :: AbstractVecOrMat{S},
     n_folds             :: Integer,
     pruning_purity      :: Float64 = 1.0,
     max_depth           :: Integer = -1,
@@ -293,7 +293,7 @@ _nfoldCV(:tree, labels, features, n_folds, pruning_purity, max_depth,
 end
 function nfoldCV_forest(
     labels              :: AbstractVector{T},
-    features            :: AbstractMatrix{S},
+    features            :: AbstractVecOrMat{S},
     n_folds             :: Integer,
     n_subfeatures       :: Integer = -1,
     n_trees             :: Integer = 10,
diff --git a/src/regression/main.jl b/src/regression/main.jl
index 2d012aa0..af234e4c 100644
--- a/src/regression/main.jl
+++ b/src/regression/main.jl
@@ -10,13 +10,13 @@ function _convert(node::treeregressor.NodeMeta{S}, labels::Array{T}) where {S, T
     end
 end
 
-function build_stump(labels::AbstractVector{T}, features::AbstractMatrix{S}; rng = Random.GLOBAL_RNG) where {S, T <: Float64}
+function build_stump(labels::AbstractVector{T}, features::AbstractVecOrMat{S}; rng = Random.GLOBAL_RNG) where {S, T <: Float64}
     return build_tree(labels, features, 0, 1)
 end
 
 function build_tree(
         labels             :: AbstractVector{T},
-        features           :: AbstractMatrix{S},
+        features           :: AbstractVecOrMat{S},
         n_subfeatures       = 0,
         max_depth           = -1,
         min_samples_leaf    = 5,
@@ -48,7 +48,7 @@ end
 
 function build_forest(
         labels              :: AbstractVector{T},
-        features            :: AbstractMatrix{S},
+        features            :: AbstractVecOrMat{S},
         n_subfeatures       = -1,
         n_trees             = 10,
         partial_sampling    = 0.7,
diff --git a/src/regression/tree.jl b/src/regression/tree.jl
index 06ade6e2..ad3eedd6 100644
--- a/src/regression/tree.jl
+++ b/src/regression/tree.jl
@@ -42,7 +42,7 @@ module treeregressor
     # find an optimal split that satisfy the given constraints
     # (max_depth, min_samples_split, min_purity_increase)
     function _split!(
-            X                   :: AbstractMatrix{S}, # the feature array
+            X                   :: AbstractVecOrMat{S}, # the feature array
             Y                   :: AbstractVector{Float64}, # the label array
             W                   :: AbstractVector{U},
             node                :: NodeMeta{S}, # the node to split
@@ -229,7 +229,7 @@ module treeregressor
     end
 
     function _fit(
-            X                     :: AbstractMatrix{S},
+            X                     :: AbstractVecOrMat{S},
             Y                     :: AbstractVector{Float64},
             W                     :: AbstractVector{U},
             max_features          :: Int,
@@ -272,7 +272,7 @@ module treeregressor
     end
 
     function fit(;
-            X                     :: AbstractMatrix{S},
+            X                     :: AbstractVecOrMat{S},
             Y                     :: AbstractVector{Float64},
             W                     :: Union{Nothing, AbstractVector{U}},
             max_features          :: Int,
diff --git a/src/scikitlearnAPI.jl b/src/scikitlearnAPI.jl
index 249e531a..a510086c 100644
--- a/src/scikitlearnAPI.jl
+++ b/src/scikitlearnAPI.jl
@@ -136,7 +136,7 @@ end
                          [:pruning_purity_threshold, :min_samples_leaf, :n_subfeatures,
                           :max_depth, :min_samples_split, :min_purity_increase, :rng])
 
-function fit!(dt::DecisionTreeRegressor, X::AbstractMatrix, y::AbstractVector)
+function fit!(dt::DecisionTreeRegressor, X::AbstractVecOrMat, y::AbstractVector)
     n_samples, n_features = size(X)
     dt.root = build_tree(
         float.(y), X,
@@ -213,7 +213,7 @@ get_classes(rf::RandomForestClassifier) = rf.classes
                           :min_samples_leaf, :min_samples_split, :min_purity_increase,
                           :rng])
 
-function fit!(rf::RandomForestClassifier, X::AbstractMatrix, y::AbstractVector)
+function fit!(rf::RandomForestClassifier, X::AbstractVecOrMat, y::AbstractVector)
     n_samples, n_features = size(X)
     rf.ensemble = build_forest(
         y, X,
@@ -297,7 +297,7 @@ end
                           # since it'll change throughout fitting, but it works
                           :max_depth, :rng])
 
-function fit!(rf::RandomForestRegressor, X::AbstractMatrix, y::AbstractVector)
+function fit!(rf::RandomForestRegressor, X::AbstractVecOrMat, y::AbstractVector)
     n_samples, n_features = size(X)
     rf.ensemble = build_forest(
         float.(y), X,
diff --git a/src/util.jl b/src/util.jl
index fe1fe63d..e81a244b 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -298,7 +298,7 @@ module util
     end
 
     function check_input(
-            X                   :: AbstractMatrix{S},
+            X                   :: AbstractVecOrMat{S},
             Y                   :: AbstractVector{T},
             W                   :: AbstractVector{U},
             max_features        :: Int,

From 01aa8146ca3dc2f99f46fae248cea18269320e22 Mon Sep 17 00:00:00 2001
From: Sander Valentin <sanderbboisen@gmail.com>
Date: Wed, 23 Feb 2022 11:16:43 +0100
Subject: [PATCH 2/5] Fix n_samples and n_features

---
 src/regression/tree.jl | 16 ++++++++++++----
 src/util.jl            |  7 ++++++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/regression/tree.jl b/src/regression/tree.jl
index ad3eedd6..da000dee 100644
--- a/src/regression/tree.jl
+++ b/src/regression/tree.jl
@@ -239,8 +239,12 @@ module treeregressor
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}
 
-        n_samples, n_features = size(X)
-
+        if isa(X, AbstractVector)
+            n_samples = length(X)
+            n_features = 1
+        elseif isa(X, AbstractMatrix)
+            n_samples, n_features = size(X)
+        end
         Yf  = Array{Float64}(undef, n_samples)
         Xf  = Array{S}(undef, n_samples)
         Wf  = Array{U}(undef, n_samples)
@@ -281,8 +285,12 @@ module treeregressor
             min_samples_split     :: Int,
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}
-
-        n_samples, n_features = size(X)
+        if isa(X, AbstractVector)
+            n_samples = length(X)
+            n_features = 1
+        elseif isa(X, AbstractMatrix)
+            n_samples, n_features = size(X)
+        end
         if W == nothing
             W = fill(1.0, n_samples)
         end
diff --git a/src/util.jl b/src/util.jl
index e81a244b..0ef64139 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -306,7 +306,12 @@ module util
             min_samples_leaf    :: Int,
             min_samples_split   :: Int,
             min_purity_increase :: Float64) where {S, T, U}
-        n_samples, n_features = size(X)
+        if isa(X, AbstractVector)
+            n_samples = length(X)
+            n_features = 1
+        elseif isa(X, AbstractMatrix)
+            n_samples, n_features = size(X)
+        end
         if length(Y) != n_samples
             throw("dimension mismatch between X and Y ($(size(X)) vs $(size(Y))")
         elseif length(W) != n_samples

From 57684723827723ad2c7309e26f285d8db976d0c0 Mon Sep 17 00:00:00 2001
From: sanderbboisen <63041604+sanderbboisen@users.noreply.github.com>
Date: Thu, 3 Mar 2022 13:29:22 +0100
Subject: [PATCH 3/5] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7fd2fb74..2c6ff992 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 [![Codecov](https://codecov.io/gh/bensadeghi/DecisionTree.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/bensadeghi/DecisionTree.jl)
 [![Docs Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliahub.com/docs/DecisionTree/pEDeB/0.10.11/)
 
-### Disclaimer: this package is no longer maintained.
+Most of this work is not mine, but since the original repo is no longer maintained, I will try to adapt this fork to my own needs.
 
 Julia implementation of Decision Tree (CART) and Random Forest algorithms
 

From 63f012eeb42176654fd594fbed5ce26fb121e9d2 Mon Sep 17 00:00:00 2001
From: Sander Valentin <s.valentin@rn.dk>
Date: Wed, 18 May 2022 12:00:22 +0100
Subject: [PATCH 4/5] Make classification and regression trees work as intended
 with vector input

---
 src/classification/main.jl | 44 +++++++++++++++++++-------------------
 src/classification/tree.jl |  7 +++---
 src/regression/tree.jl     | 15 +++----------
 src/scikitlearnAPI.jl      | 22 ++++++++++++++-----
 src/util.jl                | 15 ++++++++++++-
 5 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/src/classification/main.jl b/src/classification/main.jl
index 2f909a78..d3b6f71d 100644
--- a/src/classification/main.jl
+++ b/src/classification/main.jl
@@ -138,15 +138,15 @@ function prune_tree(tree::LeafOrNode{S, T}, purity_thresh=1.0) where {S, T}
 end
 
 
-apply_tree(leaf::Leaf{T}, feature::AbstractVector{S}) where {S, T} = leaf.majority
+_apply_tree(leaf::Leaf{T}, feature::AbstractVector{S}) where {S, T} = leaf.majority
 
-function apply_tree(tree::Node{S, T}, features::AbstractVector{S}) where {S, T}
+function _apply_tree(tree::Node{S, T}, features::AbstractVector{S}) where {S, T}
     if tree.featid == 0
-        return apply_tree(tree.left, features)
+        return _apply_tree(tree.left, features)
     elseif features[tree.featid] < tree.featval
-        return apply_tree(tree.left, features)
+        return _apply_tree(tree.left, features)
     else
-        return apply_tree(tree.right, features)
+        return _apply_tree(tree.right, features)
     end
 end
 
@@ -154,7 +154,7 @@ function apply_tree(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}) where
     N = size(features,1)
     predictions = Array{T}(undef, N)
     for i in 1:N
-        predictions[i] = apply_tree(tree, features[i, :])
+        predictions[i] = _apply_tree(tree, features[i, :])
     end
     if T <: Float64
         return Float64.(predictions)
@@ -171,16 +171,16 @@ n_labels` matrix of probabilities, each row summing up to 1.
 `col_labels` is a vector containing the distinct labels
 (eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
 of the output matrix. """
-apply_tree_proba(leaf::Leaf{T}, features::AbstractVector{S}, labels) where {S, T} =
+_apply_tree_proba(leaf::Leaf{T}, features::AbstractVector{S}, labels) where {S, T} =
     compute_probabilities(labels, leaf.values)
 
-function apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels) where {S, T}
+function _apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels) where {S, T}
     if tree.featval === nothing
-        return apply_tree_proba(tree.left, features, labels)
+        return _apply_tree_proba(tree.left, features, labels)
     elseif features[tree.featid] < tree.featval
-        return apply_tree_proba(tree.left, features, labels)
+        return _apply_tree_proba(tree.left, features, labels)
     else
-        return apply_tree_proba(tree.right, features, labels)
+        return _apply_tree_proba(tree.right, features, labels)
     end
 end
 
@@ -254,11 +254,11 @@ function build_forest(
     return Ensemble{S, T}(forest)
 end
 
-function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where {S, T}
+function _apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where {S, T}
     n_trees = length(forest)
     votes = Array{T}(undef, n_trees)
     for i in 1:n_trees
-        votes[i] = apply_tree(forest.trees[i], features)
+        votes[i] = _apply_tree(forest.trees[i], features)
     end
 
     if T <: Float64
@@ -272,7 +272,7 @@ function apply_forest(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}) whe
     N = size(features,1)
     predictions = Array{T}(undef, N)
     for i in 1:N
-        predictions[i] = apply_forest(forest, features[i, :])
+        predictions[i] = _apply_forest(forest, features[i, :])
     end
     return predictions
 end
@@ -285,13 +285,13 @@ n_labels` matrix of probabilities, each row summing up to 1.
 `col_labels` is a vector containing the distinct labels
 (eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
 of the output matrix. """
-function apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVector{S}, labels) where {S, T}
-    votes = [apply_tree(tree, features) for tree in forest.trees]
+function _apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVector{S}, labels) where {S, T}
+    votes = [_apply_tree(tree, features) for tree in forest.trees]
     return compute_probabilities(labels, votes)
 end
 
 apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
-    stack_function_results(row->apply_forest_proba(forest, row, labels),
+    stack_function_results(row->_apply_forest_proba(forest, row, labels),
                            features)
 
 function build_adaboost_stumps(
@@ -321,11 +321,11 @@ function build_adaboost_stumps(
     return (Ensemble{S, T}(stumps), coeffs)
 end
 
-function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVector{S}) where {S, T}
+function _apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVector{S}) where {S, T}
     n_stumps = length(stumps)
     counts = Dict()
     for i in 1:n_stumps
-        prediction = apply_tree(stumps.trees[i], features)
+        prediction = _apply_tree(stumps.trees[i], features)
         counts[prediction] = get(counts, prediction, 0.0) + coeffs[i]
     end
     top_prediction = stumps.trees[1].left.majority
@@ -343,7 +343,7 @@ function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Fl
     n_samples = size(features, 1)
     predictions = Array{T}(undef, n_samples)
     for i in 1:n_samples
-        predictions[i] = apply_adaboost_stumps(stumps, coeffs, features[i,:])
+        predictions[i] = _apply_adaboost_stumps(stumps, coeffs, features[i,:])
     end
     return predictions
 end
@@ -356,7 +356,7 @@ n_labels` matrix of probabilities, each row summing up to 1.
 `col_labels` is a vector containing the distinct labels
 (eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
 of the output matrix. """
-function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
+function _apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
                                      features::AbstractVector{S}, labels::AbstractVector{T}) where {S, T}
     votes = [apply_tree(stump, features) for stump in stumps.trees]
     compute_probabilities(labels, votes, coeffs)
@@ -364,5 +364,5 @@ end
 
 function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
                                     features::AbstractVecOrMat{S}, labels::AbstractVector{T}) where {S, T}
-    stack_function_results(row->apply_adaboost_stumps_proba(stumps, coeffs, row, labels), features)
+    stack_function_results(row->_apply_adaboost_stumps_proba(stumps, coeffs, row, labels), features)
 end
diff --git a/src/classification/tree.jl b/src/classification/tree.jl
index 3dccfca7..00b567de 100644
--- a/src/classification/tree.jl
+++ b/src/classification/tree.jl
@@ -237,9 +237,8 @@ module treeclassifier
             min_samples_split     :: Int,
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}
-
-        n_samples, n_features = size(X)
-
+        
+        n_samples, n_features = util.find_n_samples_and_n_features(X)
         nc  = Array{U}(undef, n_classes)
         ncl = Array{U}(undef, n_classes)
         ncr = Array{U}(undef, n_classes)
@@ -284,7 +283,7 @@ module treeclassifier
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, T, U}
 
-        n_samples, n_features = size(X)
+        n_samples, n_features = util.find_n_samples_and_n_features(X)
         list, Y_ = util.assign(Y)
         if W == nothing
             W = fill(1, n_samples)
diff --git a/src/regression/tree.jl b/src/regression/tree.jl
index da000dee..dc42c063 100644
--- a/src/regression/tree.jl
+++ b/src/regression/tree.jl
@@ -239,12 +239,7 @@ module treeregressor
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}
 
-        if isa(X, AbstractVector)
-            n_samples = length(X)
-            n_features = 1
-        elseif isa(X, AbstractMatrix)
-            n_samples, n_features = size(X)
-        end
+        n_samples, n_features = util.find_n_samples_and_n_features(X)
         Yf  = Array{Float64}(undef, n_samples)
         Xf  = Array{S}(undef, n_samples)
         Wf  = Array{U}(undef, n_samples)
@@ -285,12 +280,8 @@ module treeregressor
             min_samples_split     :: Int,
             min_purity_increase   :: Float64,
             rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}
-        if isa(X, AbstractVector)
-            n_samples = length(X)
-            n_features = 1
-        elseif isa(X, AbstractMatrix)
-            n_samples, n_features = size(X)
-        end
+
+        n_samples, n_features = util.find_n_samples_and_n_features(X)
         if W == nothing
             W = fill(1.0, n_samples)
         end
diff --git a/src/scikitlearnAPI.jl b/src/scikitlearnAPI.jl
index a510086c..03fb7df9 100644
--- a/src/scikitlearnAPI.jl
+++ b/src/scikitlearnAPI.jl
@@ -49,8 +49,8 @@ get_classes(dt::DecisionTreeClassifier) = dt.classes
                          [:pruning_purity_threshold, :max_depth, :min_samples_leaf,
                           :min_samples_split, :min_purity_increase, :rng])
 
-function fit!(dt::DecisionTreeClassifier, X, y)
-    n_samples, n_features = size(X)
+function fit!(dt::DecisionTreeClassifier, X::AbstractVecOrMat, y)
+    n_samples, n_features = find_n_samples_and_n_features(X)
     dt.root = build_tree(
         y, X,
         dt.n_subfeatures,
@@ -137,7 +137,7 @@ end
                           :max_depth, :min_samples_split, :min_purity_increase, :rng])
 
 function fit!(dt::DecisionTreeRegressor, X::AbstractVecOrMat, y::AbstractVector)
-    n_samples, n_features = size(X)
+    n_samples, n_features = find_n_samples_and_n_features(X)
     dt.root = build_tree(
         float.(y), X,
         dt.n_subfeatures,
@@ -214,7 +214,7 @@ get_classes(rf::RandomForestClassifier) = rf.classes
                           :rng])
 
 function fit!(rf::RandomForestClassifier, X::AbstractVecOrMat, y::AbstractVector)
-    n_samples, n_features = size(X)
+    n_samples, n_features = find_n_samples_and_n_features(X)
     rf.ensemble = build_forest(
         y, X,
         rf.n_subfeatures,
@@ -298,7 +298,7 @@ end
                           :max_depth, :rng])
 
 function fit!(rf::RandomForestRegressor, X::AbstractVecOrMat, y::AbstractVector)
-    n_samples, n_features = size(X)
+    n_samples, n_features = find_n_samples_and_n_features(X)
     rf.ensemble = build_forest(
         float.(y), X,
         rf.n_subfeatures,
@@ -388,3 +388,15 @@ length(dt::DecisionTreeRegressor)   = length(dt.root)
 print_tree(dt::DecisionTreeClassifier, depth=-1; kwargs...) = print_tree(dt.root, depth; kwargs...)
 print_tree(dt::DecisionTreeRegressor,  depth=-1; kwargs...) = print_tree(dt.root, depth; kwargs...)
 print_tree(n::Nothing, depth=-1; kwargs...)                 = show(n)
+
+# Due to the current project structure, this can't currently be loaded from util
+function find_n_samples_and_n_features(X::AbstractVecOrMat)
+    n_samples, n_features = (0, 0)
+    if isa(X, AbstractVector)
+        n_samples = length(X)
+        n_features = 1
+    elseif isa(X, AbstractMatrix)
+        n_samples, n_features = size(X)
+    end
+    return (n_samples, n_features)
+end
\ No newline at end of file
diff --git a/src/util.jl b/src/util.jl
index 0ef64139..1b687c7f 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -3,7 +3,7 @@
 
 module util
 
-    export gini, entropy, zero_one, q_bi_sort!, hypergeometric, check_input
+    export gini, entropy, zero_one, q_bi_sort!, hypergeometric, check_input, find_n_samples_and_n_features
 
     function assign(Y :: AbstractVector{T}, list :: AbstractVector{T}) where T
         dict = Dict{T, Int}()
@@ -297,6 +297,19 @@ module util
         end
     end
 
+    # Find the appropriate values for n_samples and n_features.
+    # This is a shared need across multiple functions and files.
+    function find_n_samples_and_n_features(X::AbstractVecOrMat)
+        n_samples, n_features = (0, 0)
+        if isa(X, AbstractVector)
+            n_samples = length(X)
+            n_features = 1
+        elseif isa(X, AbstractMatrix)
+            n_samples, n_features = size(X)
+        end
+        return (n_samples, n_features)
+    end
+    
     function check_input(
             X                   :: AbstractVecOrMat{S},
             Y                   :: AbstractVector{T},

From f2684ecc989299a8837aa65fd6772c548b55c038 Mon Sep 17 00:00:00 2001
From: Sander Valentin <s.valentin@rn.dk>
Date: Wed, 18 May 2022 12:35:47 +0100
Subject: [PATCH 5/5] Add new tests to scikitlearApi and fix errors that
 occured during testing

---
 src/classification/main.jl         |  4 ++--
 test/classification/scikitlearn.jl | 15 +++++++++++++++
 test/miscellaneous/convert.jl      |  7 +++----
 test/regression/scikitlearn.jl     | 12 ++++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/classification/main.jl b/src/classification/main.jl
index d3b6f71d..3ae84a4e 100644
--- a/src/classification/main.jl
+++ b/src/classification/main.jl
@@ -185,7 +185,7 @@ function _apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels
 end
 
 apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
-    stack_function_results(row->apply_tree_proba(tree, row, labels), features)
+    stack_function_results(row->_apply_tree_proba(tree, row, labels), features)
 
 function build_forest(
         labels              :: AbstractVector{T},
@@ -358,7 +358,7 @@ n_labels` matrix of probabilities, each row summing up to 1.
 of the output matrix. """
 function _apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
                                      features::AbstractVector{S}, labels::AbstractVector{T}) where {S, T}
-    votes = [apply_tree(stump, features) for stump in stumps.trees]
+    votes = [_apply_tree(stump, features) for stump in stumps.trees]
     compute_probabilities(labels, votes, coeffs)
 end
 
diff --git a/test/classification/scikitlearn.jl b/test/classification/scikitlearn.jl
index c4329439..efc287ab 100644
--- a/test/classification/scikitlearn.jl
+++ b/test/classification/scikitlearn.jl
@@ -18,6 +18,21 @@ model = fit!(AdaBoostStumpClassifier(), features, labels)
 # Adaboost isn't so hot on this task, disabled for now
 mean(predict(model, features) .== labels)
 
+# Repeat above but for input as vector
+features = rand(n);
+labels = round.(Int, features);
+
+model = fit!(DecisionTreeClassifier(pruning_purity_threshold=0.9), features, labels)
+@test mean(predict(model, features) .== labels) > 0.8
+
+model = fit!(RandomForestClassifier(), features, labels)
+@test mean(predict(model, features) .== labels) > 0.8
+
+model = fit!(AdaBoostStumpClassifier(), features, labels)
+# Adaboost isn't so hot on this task, disabled for now
+mean(predict(model, features) .== labels)
+# End Vector input tests
+
 Random.seed!(2)
 N = 3000
 X = randn(N, 10)
diff --git a/test/miscellaneous/convert.jl b/test/miscellaneous/convert.jl
index c131dbd4..a2660cc8 100644
--- a/test/miscellaneous/convert.jl
+++ b/test/miscellaneous/convert.jl
@@ -1,20 +1,19 @@
 # Test conversion of Leaf to Node
-
 @testset "convert.jl" begin
 
 lf = Leaf(1, [1])
 nv = Node{Int, Int}[]
 push!(nv, lf)
-@test apply_tree(nv[1], [0]) == 1
+@test DecisionTree._apply_tree(nv[1], [0]) == 1
 
 lf = Leaf(1.0, [0.0, 1.0])
 nv = Node{Int, Float64}[]
 push!(nv, lf)
-@test apply_tree(nv[1], [0]) == 1.0
+@test DecisionTree._apply_tree(nv[1], [0]) == 1.0
 
 lf = Leaf("A", ["B", "A"])
 nv = Node{Int, String}[]
 push!(nv, lf)
-@test apply_tree(nv[1], [0]) == "A"
+@test DecisionTree._apply_tree(nv[1], [0]) == "A"
 
 end # @testset
diff --git a/test/regression/scikitlearn.jl b/test/regression/scikitlearn.jl
index 13e78742..2b1cbccb 100644
--- a/test/regression/scikitlearn.jl
+++ b/test/regression/scikitlearn.jl
@@ -15,6 +15,18 @@ model = fit!(DecisionTreeRegressor(min_samples_split=5), features, labels)
 model = fit!(RandomForestRegressor(n_trees=10, min_samples_leaf=5, n_subfeatures=2), features, labels)
 @test R2(labels, predict(model, features)) > 0.8
 
+# Repeat the above but for single feature vector
+features = rand(n)
+labels = features .* 2
+model = fit!(DecisionTreeRegressor(min_samples_leaf=5, pruning_purity_threshold=0.1), features, labels)
+@test R2(labels, predict(model, features)) > 0.8
+
+model = fit!(DecisionTreeRegressor(min_samples_split=5), features, labels)
+@test R2(labels, predict(model, features)) > 0.8
+
+model = fit!(RandomForestRegressor(n_trees=10, min_samples_leaf=5, n_subfeatures=1), features, labels)
+@test R2(labels, predict(model, features)) > 0.8
+
 Random.seed!(2)
 N = 3000
 X = randn(N, 10)