From 128262543f7163b6abc537da818bb3fa44723528 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 5 Sep 2018 10:26:52 +0200 Subject: [PATCH] Port to Julia 0.7/1.0 Use the new Name wrapper from NamedArrays 0.9.0 when indexing to avoid ambiguity with Integer names. --- REQUIRE | 4 ++-- src/freqtable.jl | 28 +++++++++++++--------------- test/freqtable.jl | 37 +++++++++++++++++++------------------ 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/REQUIRE b/REQUIRE index e53ce83..3f78e5d 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.6 -NamedArrays +julia 0.7 +NamedArrays 0.9.0 CategoricalArrays 0.3.0 DataFrames 0.11.0 \ No newline at end of file diff --git a/src/freqtable.jl b/src/freqtable.jl index ca91a57..3bee168 100644 --- a/src/freqtable.jl +++ b/src/freqtable.jl @@ -1,8 +1,7 @@ import Base.ht_keyindex # Cf. https://github.com/JuliaStats/StatsBase.jl/issues/135 -immutable UnitWeights <: AbstractVector{Int} -end +struct UnitWeights <: AbstractVector{Int} end Base.getindex(w::UnitWeights, ::Integer...) = 1 Base.getindex(w::UnitWeights, ::AbstractVector) = w @@ -15,11 +14,11 @@ Base.@pure vectypes(T) = Tuple{map(U -> Vector{U}, T.parameters)...} function _freqtable(x::Tuple, skipmissing::Bool = false, weights::AbstractVector{<:Real} = UnitWeights(), - subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) + subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) n = length(x) n == 0 && throw(ArgumentError("at least one argument must be provided")) - if !isa(subset, Void) + if !isa(subset, Nothing) x = map(y -> y[subset], x) weights = weights[subset] end @@ -50,12 +49,12 @@ function _freqtable(x::Tuple, end if skipmissing - filter!((k, v) -> !any(ismissing, k), d) + filter!(p -> !any(ismissing, p[1]), d) end keyvec = collect(keys(d)) - dimnames = Vector{Vector}(n) + dimnames = Vector{Vector}(undef, n) for i in 1:n s = Set{vtypes.parameters[i]}() for j in 1:length(keyvec) @@ -76,7 +75,7 @@ function _freqtable(x::Tuple, na = NamedArray(a, tuple(dimnames...)::vectypes(vtypes), ntuple(i -> "Dim$i", n)) for (k, v) in d - na[k...] = v + na[Name.(k)...] = v end na @@ -85,16 +84,16 @@ end freqtable(x::AbstractVector...; skipmissing::Bool = false, weights::AbstractVector{<:Real} = UnitWeights(), - subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) = + subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) = _freqtable(x, skipmissing, weights, subset) # Internal function needed for now so that n is inferred function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = false, weights::AbstractVector{<:Real} = UnitWeights(), - subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n + subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n n == 0 && throw(ArgumentError("at least one argument must be provided")) - if !isa(subset, Void) + if !isa(subset, Nothing) x = map(y -> y[subset], x) weights = weights[subset] end @@ -121,7 +120,7 @@ function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = missingpossible = any(miss) @inbounds for i in 1:len[1] - ref = x[1].refs[i] + ref = x[1].refs[i] el = ord[1][ref + 1] anymiss = missingpossible & (ref <= 0) @@ -141,7 +140,7 @@ end freqtable(x::AbstractCategoricalVector...; skipmissing::Bool = false, weights::AbstractVector{<:Real} = UnitWeights(), - subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) = + subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) = _freqtable(x, skipmissing, weights, subset) function freqtable(d::AbstractDataFrame, x::Symbol...; args...) @@ -214,14 +213,13 @@ julia> sum(pt, (1, 2)) ``` """ - prop(tbl::AbstractArray{<:Number}) = tbl / sum(tbl) function prop(tbl::AbstractArray{<:Number,N}, margin::Integer...) where N lo, hi = extrema(margin) (lo < 1 || hi > N) && throw(ArgumentError("margin must be a valid dimension")) - tbl ./ sum(tbl, tuple(setdiff(1:N, margin)...)) + tbl ./ sum(tbl, dims=tuple(setdiff(1:N, margin)...)) end prop(tbl::NamedArray{<:Number}, margin::Integer...) = - NamedArray(prop(array(tbl), margin...), tbl.dicts, tbl.dimnames) + NamedArray(prop(convert(Array, tbl), margin...), tbl.dicts, tbl.dimnames) diff --git a/test/freqtable.jl b/test/freqtable.jl index 2b9968d..f6a47e3 100644 --- a/test/freqtable.jl +++ b/test/freqtable.jl @@ -1,5 +1,5 @@ using FreqTables -using Base.Test +using Test x = repeat(["a", "b", "c", "d"], outer=[100]); # Values not in order to test discrepancy between index and levels with CategoricalArray @@ -24,24 +24,24 @@ pt = @inferred prop(tab) 0.075 0.05 0.05 0.075; 0.05 0.075 0.075 0.05; 0.05 0.075 0.075 0.05] -pt = @inferred prop(tab, 2) +pt = prop(tab, 2) @test pt == [0.3 0.2 0.2 0.3; 0.3 0.2 0.2 0.3; 0.2 0.3 0.3 0.2; 0.2 0.3 0.3 0.2] -pt = @inferred prop(tab, 1) +pt = prop(tab, 1) @test pt == [0.3 0.2 0.2 0.3; 0.3 0.2 0.2 0.3; 0.2 0.3 0.3 0.2; 0.2 0.3 0.3 0.2] -pt = @inferred prop(tab, 1, 2) +pt = prop(tab, 1, 2) @test pt == [1.0 1.0 1.0 1.0; 1.0 1.0 1.0 1.0; 1.0 1.0 1.0 1.0; 1.0 1.0 1.0 1.0] -tbl = @inferred prop(rand(5, 5, 5, 5), 1, 2) -sumtbl = sum(tbl, (3,4)) +tbl = prop(rand(5, 5, 5, 5), 1, 2) +sumtbl = sum(tbl, dims=(3,4)) @test all(x -> x ≈ 1.0, sumtbl) @test_throws MethodError prop() @@ -51,21 +51,21 @@ sumtbl = sum(tbl, (3,4)) @test_throws ArgumentError prop([1,2,3], 2) @test_throws ArgumentError prop([1,2,3], 0) -tab =freqtable(x, y, - subset=1:20, - weights=repeat([1, .5], outer=[10])) +tab = freqtable(x, y, + subset=1:20, + weights=repeat([1, .5], outer=[10])) @test tab == [2.0 3.0 1.0 1.5 3.0 2.0 1.5 1.0] @test names(tab) == [["a", "b", "c", "d"], ["C", "D"]] -pt = @inferred prop(tab) +pt = prop(tab) @test pt == [4 6; 2 3; 6 4; 3 2] / 30.0 -pt = @inferred prop(tab, 2) +pt = prop(tab, 2) @test pt == [8 12; 4 6; 12 8; 6 4] / 30.0 -pt = @inferred prop(tab, 1) +pt = prop(tab, 1) @test pt == [6 9; 6 9; 9 6; 9 6] / 15.0 -pt = @inferred prop(tab, 1, 2) +pt = prop(tab, 1, 2) @test pt == [1.0 1.0; 1.0 1.0; 1.0 1.0; 1.0 1.0] using CategoricalArrays @@ -85,9 +85,9 @@ tab = @inferred freqtable(cx, cy) 20 30 30 20] @test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]] -tab =freqtable(cx, cy, - subset=1:20, - weights=repeat([1, .5], outer=[10])) +tab = freqtable(cx, cy, + subset=1:20, + weights=repeat([1, .5], outer=[10])) @test tab == [0.0 0.0 2.0 3.0 0.0 0.0 1.0 1.5 0.0 0.0 3.0 2.0 @@ -100,7 +100,7 @@ const ≅ = isequal mx = Array{Union{String, Missing}}(x) my = Array{Union{String, Missing}}(y) mx[1] = missing -my[[1, 10, 20, 400]] = missing +my[[1, 10, 20, 400]] .= missing mcx = categorical(mx) mcy = categorical(my) @@ -143,7 +143,8 @@ tabc = freqtable(mcx, mcy, skipmissing=true) using DataFrames, CSV for docat in [false, true] - iris = CSV.read(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"), categorical=docat); + iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv"), + categorical=docat); if docat iris[:LongSepal] = categorical(iris[:SepalLength] .> 5.0) else