Skip to content

Commit fc3c5aa

Browse files
authored
Fixed pairwise for non-symmetric kernels (#148)
* use StatsBase.pairwise (fixes #143) * bump Julia version compat
1 parent 09300a0 commit fc3c5aa

File tree

8 files changed

+28
-61
lines changed

8 files changed

+28
-61
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
version:
16-
- '1.0'
16+
- '1.1'
1717
- '1' # automatically expands to the latest stable 1.x release of Julia
1818
- 'nightly'
1919
os:

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1414
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1515

1616
[compat]
17-
Arpack = "0.3, 0.4"
17+
Arpack = "0.3, 0.4, 0.5"
1818
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
19-
julia = "1"
19+
julia = "1.1"
2020

2121
[extras]
2222
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

src/MultivariateStats.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
module MultivariateStats
22
using LinearAlgebra
3-
using StatsBase: SimpleCovariance, CovarianceEstimator
3+
using StatsBase: SimpleCovariance, CovarianceEstimator, pairwise, pairwise!
44
import Statistics: mean, var, cov, covm
55
import Base: length, size, show, dump
6-
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights
6+
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights, pairwise
77
import SparseArrays
88
import LinearAlgebra: eigvals
99

src/cmds.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ function transform(M::MDS{T}, x::AbstractVector{<:Real}; distances=false) where
9595
end
9696

9797
# get distance matrix
98-
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
98+
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
9999
d = d.^2
100100

101101
# b = 0.5*(ones(n,n)*d./n - d + D*ones(n,1)./n - ones(n,n)*D*ones(n,1)./n^2)
@@ -142,7 +142,7 @@ function fit(::Type{MDS}, X::AbstractMatrix{T};
142142

143143
# get distance matrix and space dimension
144144
D, d = if !distances
145-
pairwise((x,y)->norm(x-y), X), size(X,1)
145+
pairwise((x,y)->norm(x-y), eachcol(X), symmetric=true), size(X,1)
146146
else
147147
X, NaN
148148
end
@@ -203,8 +203,8 @@ end
203203

204204
function stress(M::MDS)
205205
# calculate distances if original data was stored
206-
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
207-
DY = pairwise((x,y)->norm(x-y), transform(M))
206+
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
207+
DY = pairwise((x,y)->norm(x-y), eachcol(transform(M)), symmetric=true)
208208
n = size(DX,1)
209209
return sqrt(2*sum((DX - DY).^2)/sum(DX.^2));
210210
end

src/common.jl

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -121,30 +121,5 @@ function calcscattermat(Z::DenseMatrix)
121121
end
122122

123123
# calculate pairwise kernel
124-
function pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function,
125-
X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real})
126-
n = size(X, 2)
127-
m = size(Y, 2)
128-
for j = 1:m
129-
aj = view(Y, :, j)
130-
for i in j:n
131-
@inbounds K[i, j] = kernel(view(X, :, i), aj)[]
132-
end
133-
j <= n && for i in 1:(j - 1)
134-
@inbounds K[i, j] = K[j, i] # leveraging the symmetry
135-
end
136-
end
137-
K
138-
end
139-
140-
pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}) =
141-
pairwise!(K, kernel, X, X)
142-
143-
function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real})
144-
n = size(X, 2)
145-
m = size(Y, 2)
146-
K = similar(X, n, m)
147-
pairwise!(K, kernel, X, Y)
148-
end
149-
150-
pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X)
124+
pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) =
125+
[kernel(x,y) for y in eachcol(X)]

src/kpca.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ principalvars(M::KernelPCA) = M.λ
5050

5151
"""Calculate transformation to kernel space"""
5252
function transform(M::KernelPCA, x::AbstractVecOrMat{<:Real})
53-
k = pairwise(M.ker, M.X, x)
53+
k = pairwise(M.ker, eachcol(M.X), eachcol(x))
5454
transform!(M.center, k)
5555
return projection(M)'*k
5656
end
@@ -63,7 +63,7 @@ function reconstruct(M::KernelPCA, y::AbstractVecOrMat{<:Real})
6363
throw(ArgumentError("Inverse transformation coefficients are not available, set `inverse` parameter when fitting data"))
6464
end
6565
Pᵗ = M.α' .* sqrt.(M.λ)
66-
k = pairwise(M.ker, Pᵗ, y)
66+
k = pairwise(M.ker, eachcol(Pᵗ), eachcol(y))
6767
return M.inv*k
6868
end
6969

@@ -88,7 +88,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};
8888

8989
# set kernel function if available
9090
K = if isa(kernel, Function)
91-
pairwise(kernel, X)
91+
pairwise(kernel, eachcol(X), symmetric=true)
9292
elseif kernel === nothing
9393
@assert issymmetric(X) "Precomputed kernel matrix must be symmetric."
9494
inverse = false
@@ -126,7 +126,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};
126126
Q = zeros(T, 0, 0)
127127
if inverse
128128
Pᵗ = α' .* sqrt.(λ)
129-
KT = pairwise(kernel, Pᵗ)
129+
KT = pairwise(kernel, eachcol(Pᵗ), symmetric=true)
130130
Q = (KT + diagm(0 => fill(β, size(KT,1)))) \ X'
131131
end
132132

test/cmds.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ using Test
99
n = 10
1010
X0 = randn(d, n)
1111
G0 = X0'X0
12-
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), X0)
12+
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true)
1313

1414
## conversion between dmat and gram
1515

@@ -35,7 +35,7 @@ using Test
3535

3636
X = transform(M)
3737
@test size(X) == (3,n)
38-
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) D0
38+
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) D0
3939

4040
@test_throws DimensionMismatch transform(M, rand(d+1))
4141
y = transform(M, X0[:, 1])
@@ -49,11 +49,11 @@ using Test
4949

5050
X = transform(M)
5151
@test size(X) == (3,n)
52-
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) D0
52+
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) D0
5353

5454
@test_throws AssertionError transform(M, X0[:, 1])
5555
@test_throws DimensionMismatch transform(M, rand(d+1); distances = true)
56-
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) |> vec
56+
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec
5757
y = transform(M, d, distances=true)
5858
@test X[:, 2] y
5959

@@ -95,18 +95,18 @@ using Test
9595

9696
M = fit(MDS, sqrt.(D), maxoutdim=2, distances=true)
9797
X = transform(M)
98-
@test D MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X)
98+
@test D MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X), symmetric=true)
9999
@test eltype(X) == Float32
100100

101101
a = Float32[0.5, 0.5, 0.5, 0.5]
102102
A = vcat(hcat(D, a), hcat(a', zeros(Float32, 1, 1)))
103103
M⁺ = fit(MDS, sqrt.(A), maxoutdim=2, distances=true)
104104
X⁺ = transform(M⁺)
105-
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X⁺)
105+
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X⁺), symmetric=true)
106106

107107
y = transform(M, a, distances=true)
108108
Y = [X y]
109-
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), Y)
109+
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(Y), symmetric=true)
110110
@test eltype(Y) == Float32
111111

112112
# different input types

test/kpca.jl

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,10 @@ import Random
3434
end
3535

3636
# kernel calculations
37-
K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1:2])
38-
@test size(K) == (n, 2)
39-
@test K[1,1] == 0
40-
@test K[3,2] == norm(X[:,3] - X[:,2])
41-
42-
K = MultivariateStats.pairwise((x,y)->norm(x-y), X[:,1:3], X)
43-
@test size(K) == (3, n)
44-
@test K[1,1] == 0
45-
@test K[3,2] == norm(X[:,2] - X[:,3])
37+
ker = (x,y)->norm(x-y)
4638

4739
K = similar(X, n, n)
48-
MultivariateStats.pairwise!(K, (x,y)->norm(x-y), X)
40+
MultivariateStats.pairwise!(ker, K, eachcol(X))
4941
@test size(K) == (n, n)
5042
@test K[1,1] == 0
5143
@test K[2,1] == norm(X[:,2] - X[:,1])
@@ -54,7 +46,7 @@ import Random
5446
Iₙ = ones(n,n)/n
5547
@test MultivariateStats.transform!(KC, copy(K)) K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ
5648

57-
K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1])
49+
K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1]
5850
@test size(K) == (n, 1)
5951
@test K[1,1] == 0
6052
@test K[2,1] == norm(X[:,2] - X[:,1])
@@ -88,15 +80,15 @@ import Random
8880
M = fit(KernelPCA, X, inverse=true)
8981
@test all(isapprox.(reconstruct(M, transform(M)), X, atol=0.75))
9082

91-
# use rbf kernel
83+
# use RBF kernel
9284
γ = 10.
9385
rbf=(x,y)->exp(-γ*norm(x-y)^2.0)
9486
M = fit(KernelPCA, X, kernel=rbf)
9587
@test indim(M) == d
9688
@test outdim(M) == d
9789

9890
# use precomputed kernel
99-
K = MultivariateStats.pairwise((x,y)->x'*y, X)
91+
K = MultivariateStats.pairwise((x,y)->x'*y, eachcol(X), symmetric=true)
10092
@test_throws AssertionError fit(KernelPCA, rand(1,10), kernel=nothing) # symmetric kernel
10193
M = fit(KernelPCA, K, maxoutdim = 5, kernel=nothing, inverse=true) # use precomputed kernel
10294
M2 = fit(PCA, X, method=:cov, pratio=1.0)
@@ -117,7 +109,7 @@ import Random
117109

118110
@test indim(MM) == d
119111
@test outdim(MM) == d
120-
@test eltype(transform(MM, X[:,1])) == Float32
112+
@test eltype(transform(MM, XX[:,1])) == Float32
121113

122114
for func in (projection, principalvars)
123115
@test eltype(func(M)) == Float64

0 commit comments

Comments
 (0)