Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial work on handling missing values #196

Merged
merged 8 commits into from
Sep 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/linearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ LinearMixedModel(f::FormulaTerm, tbl;
function LinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable;
contrasts = Dict{Symbol,Any}(),
wts = [])
# TODO: perform missing_omit() after apply_schema() when improved
# missing support is in a StatsModels release
tbl, _ = StatsModels.missing_omit(tbl, f)
form = apply_schema(f, schema(f, tbl, contrasts), LinearMixedModel)
# tbl, _ = StatsModels.missing_omit(tbl, form)

y, Xs = modelcols(form, tbl)

y = reshape(float(y), (:, 1)) # y as a floating-point matrix
Expand Down
11 changes: 11 additions & 0 deletions src/randomeffectsterm.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
struct RandomEffectsTerm <: AbstractTerm
lhs::StatsModels.TermOrTerms
rhs::StatsModels.TermOrTerms
function RandomEffectsTerm(lhs,rhs)
if isempty(intersect(StatsModels.termvars(lhs), StatsModels.termvars(rhs)))
new(lhs, rhs)
else
throw(ArgumentError("Same variable appears on both sides of |"))
end
end
end

Base.show(io::IO, t::RandomEffectsTerm) = print(io, "($(t.lhs) | $(t.rhs))")
StatsModels.is_matrix_term(::Type{RandomEffectsTerm}) = false

function StatsModels.termvars(t::RandomEffectsTerm)
vcat(StatsModels.termvars(t.lhs), StatsModels.termvars(t.rhs))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to say you might want to use union (like StatsModels does) but there shouldn't be any duplication between the lhs and rhs so there's really no need.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, duplication on the lhs and rhs would be very bad.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be worth checking that in the constructor actually...

end

function StatsModels.apply_schema(t::FunctionTerm{typeof(|)}, schema::StatsModels.FullRank,
Mod::Type{<:MixedModel})
lhs, rhs = apply_schema.(t.args_parsed, Ref(schema), Mod)
Expand Down
18 changes: 18 additions & 0 deletions test/FactorReTerm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ const LMM = LinearMixedModel
end
end

@testset "RandomEffectsTerm" begin
slp = dat[:sleepstudy]
contrasts = Dict{Symbol,Any}()

@testset "Detect same variable as blocking and experimental" begin
f = @formula(Y ~ 1 + (1 + G|G))
@test_throws ArgumentError apply_schema(f, schema(f, slp, contrasts), LinearMixedModel)
end

@testset "Detect both blocking and experimental variables" begin
# note that U is not in the fixed effects because we want to make square
# that we're detecting all the variables in the random effects
f = @formula(Y ~ 1 + (1 + U|G))
form = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel)
@test StatsModels.termvars(form.rhs) == [:U, :G]
end
end

#=
@testset "vectorRe" begin
slp = dat[:sleepstudy]
Expand Down
37 changes: 37 additions & 0 deletions test/missing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using MixedModels, RData, Test

if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
const dat = Dict(Symbol(k) => v for (k, v) in
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
end

# deepcopy because we're going to modify it
slp = deepcopy(dat[:sleepstudy])
slp[!,:U] = Array{Union{Missing, Float64},1}(slp[!,:U])
slp[1,:U] = missing

# TODO: re-enable this test when better missing support has landed in StatsModels
# @testset "No impact from missing on schema" begin
# f = @formula(Y ~ 1 + U + (1|G))
# contrasts = Dict{Symbol,Any}()
# form = apply_schema(f, schema(f, dat[:sleepstudy], contrasts), LinearMixedModel)
# form_missing = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel)
#
# @test form.lhs == form_missing.lhs
# @test form.rhs == form_missing.rhs
# end

@testset "Missing Omit" begin
@testset "Missing from unused variables" begin
# missing from unused variables should have no impact
m1 = fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:sleepstudy])
m1_missing = fit(MixedModel, @formula(Y ~ 1 + (1|G)), slp)
@test isapprox(m1.θ, m1_missing.θ, rtol=1.0e-12)
end

@testset "Missing from used variables" begin
m1 = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), dat[:sleepstudy])
m1_missing = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), slp)
@test nobs(m1) - nobs(m1_missing) == 1
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ include("pls.jl")
include("pirls.jl")
include("gausshermite.jl")
include("fit.jl")
include("missing.jl")

using MixedModels