Skip to content

Commit

Permalink
Port to Julia 0.7/1.0
Browse files Browse the repository at this point in the history
Use the new Name wrapper from NamedArrays 0.9.0 when indexing to avoid ambiguity
with Integer names.
  • Loading branch information
nalimilan committed Sep 5, 2018
1 parent 45067fc commit 1282625
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 35 deletions.
4 changes: 2 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
julia 0.6
NamedArrays
julia 0.7
NamedArrays 0.9.0
CategoricalArrays 0.3.0
DataFrames 0.11.0
28 changes: 13 additions & 15 deletions src/freqtable.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import Base.ht_keyindex

# Cf. https://github.com/JuliaStats/StatsBase.jl/issues/135
immutable UnitWeights <: AbstractVector{Int}
end
struct UnitWeights <: AbstractVector{Int} end
Base.getindex(w::UnitWeights, ::Integer...) = 1
Base.getindex(w::UnitWeights, ::AbstractVector) = w

Expand All @@ -15,11 +14,11 @@ Base.@pure vectypes(T) = Tuple{map(U -> Vector{U}, T.parameters)...}
function _freqtable(x::Tuple,
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
n = length(x)
n == 0 && throw(ArgumentError("at least one argument must be provided"))

if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end
Expand Down Expand Up @@ -50,12 +49,12 @@ function _freqtable(x::Tuple,
end

if skipmissing
filter!((k, v) -> !any(ismissing, k), d)
filter!(p -> !any(ismissing, p[1]), d)
end

keyvec = collect(keys(d))

dimnames = Vector{Vector}(n)
dimnames = Vector{Vector}(undef, n)
for i in 1:n
s = Set{vtypes.parameters[i]}()
for j in 1:length(keyvec)
Expand All @@ -76,7 +75,7 @@ function _freqtable(x::Tuple,
na = NamedArray(a, tuple(dimnames...)::vectypes(vtypes), ntuple(i -> "Dim$i", n))

for (k, v) in d
na[k...] = v
na[Name.(k)...] = v
end

na
Expand All @@ -85,16 +84,16 @@ end
freqtable(x::AbstractVector...;
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)

# Internal function needed for now so that n is inferred
function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
n == 0 && throw(ArgumentError("at least one argument must be provided"))

if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end
Expand All @@ -121,7 +120,7 @@ function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool =
missingpossible = any(miss)

@inbounds for i in 1:len[1]
ref = x[1].refs[i]
ref = x[1].refs[i]
el = ord[1][ref + 1]
anymiss = missingpossible & (ref <= 0)

Expand All @@ -141,7 +140,7 @@ end

freqtable(x::AbstractCategoricalVector...; skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)

function freqtable(d::AbstractDataFrame, x::Symbol...; args...)
Expand Down Expand Up @@ -214,14 +213,13 @@ julia> sum(pt, (1, 2))
```
"""

prop(tbl::AbstractArray{<:Number}) = tbl / sum(tbl)

function prop(tbl::AbstractArray{<:Number,N}, margin::Integer...) where N
lo, hi = extrema(margin)
(lo < 1 || hi > N) && throw(ArgumentError("margin must be a valid dimension"))
tbl ./ sum(tbl, tuple(setdiff(1:N, margin)...))
tbl ./ sum(tbl, dims=tuple(setdiff(1:N, margin)...))
end

prop(tbl::NamedArray{<:Number}, margin::Integer...) =
NamedArray(prop(array(tbl), margin...), tbl.dicts, tbl.dimnames)
NamedArray(prop(convert(Array, tbl), margin...), tbl.dicts, tbl.dimnames)
37 changes: 19 additions & 18 deletions test/freqtable.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using FreqTables
using Base.Test
using Test

x = repeat(["a", "b", "c", "d"], outer=[100]);
# Values not in order to test discrepancy between index and levels with CategoricalArray
Expand All @@ -24,24 +24,24 @@ pt = @inferred prop(tab)
0.075 0.05 0.05 0.075;
0.05 0.075 0.075 0.05;
0.05 0.075 0.075 0.05]
pt = @inferred prop(tab, 2)
pt = prop(tab, 2)
@test pt == [0.3 0.2 0.2 0.3;
0.3 0.2 0.2 0.3;
0.2 0.3 0.3 0.2;
0.2 0.3 0.3 0.2]
pt = @inferred prop(tab, 1)
pt = prop(tab, 1)
@test pt == [0.3 0.2 0.2 0.3;
0.3 0.2 0.2 0.3;
0.2 0.3 0.3 0.2;
0.2 0.3 0.3 0.2]
pt = @inferred prop(tab, 1, 2)
pt = prop(tab, 1, 2)
@test pt == [1.0 1.0 1.0 1.0;
1.0 1.0 1.0 1.0;
1.0 1.0 1.0 1.0;
1.0 1.0 1.0 1.0]

tbl = @inferred prop(rand(5, 5, 5, 5), 1, 2)
sumtbl = sum(tbl, (3,4))
tbl = prop(rand(5, 5, 5, 5), 1, 2)
sumtbl = sum(tbl, dims=(3,4))
@test all(x -> x 1.0, sumtbl)

@test_throws MethodError prop()
Expand All @@ -51,21 +51,21 @@ sumtbl = sum(tbl, (3,4))
@test_throws ArgumentError prop([1,2,3], 2)
@test_throws ArgumentError prop([1,2,3], 0)

tab =freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [2.0 3.0
1.0 1.5
3.0 2.0
1.5 1.0]
@test names(tab) == [["a", "b", "c", "d"], ["C", "D"]]
pt = @inferred prop(tab)
pt = prop(tab)
@test pt == [4 6; 2 3; 6 4; 3 2] / 30.0
pt = @inferred prop(tab, 2)
pt = prop(tab, 2)
@test pt == [8 12; 4 6; 12 8; 6 4] / 30.0
pt = @inferred prop(tab, 1)
pt = prop(tab, 1)
@test pt == [6 9; 6 9; 9 6; 9 6] / 15.0
pt = @inferred prop(tab, 1, 2)
pt = prop(tab, 1, 2)
@test pt == [1.0 1.0; 1.0 1.0; 1.0 1.0; 1.0 1.0]

using CategoricalArrays
Expand All @@ -85,9 +85,9 @@ tab = @inferred freqtable(cx, cy)
20 30 30 20]
@test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]

tab =freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [0.0 0.0 2.0 3.0
0.0 0.0 1.0 1.5
0.0 0.0 3.0 2.0
Expand All @@ -100,7 +100,7 @@ const ≅ = isequal
mx = Array{Union{String, Missing}}(x)
my = Array{Union{String, Missing}}(y)
mx[1] = missing
my[[1, 10, 20, 400]] = missing
my[[1, 10, 20, 400]] .= missing

mcx = categorical(mx)
mcy = categorical(my)
Expand Down Expand Up @@ -143,7 +143,8 @@ tabc = freqtable(mcx, mcy, skipmissing=true)
using DataFrames, CSV

for docat in [false, true]
iris = CSV.read(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"), categorical=docat);
iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv"),
categorical=docat);
if docat
iris[:LongSepal] = categorical(iris[:SepalLength] .> 5.0)
else
Expand Down

0 comments on commit 1282625

Please sign in to comment.