From 07be1b553c30cd02970cced07bdab42c363bd752 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 17 Feb 2020 11:37:47 +0100 Subject: [PATCH 01/12] Bringing predict_median back (#197) --- Project.toml | 2 +- src/operations.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index b33dab2b..fecf1bfc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJBase" uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" authors = ["Anthony D. Blaom "] -version = "0.11.5" +version = "0.11.6" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/operations.jl b/src/operations.jl index 06023dfc..3c7d7ac1 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -21,7 +21,7 @@ ## TODO: need to add checks on the arguments of ## predict(::AbstractMachine, ) and transform(::AbstractMachine, ) -for operation in (:predict, :predict_mean, :predict_mode, +for operation in (:predict, :predict_mean, :predict_mode, :predict_median, :transform, :inverse_transform) ex = quote function $(operation)(machine::AbstractMachine, args...) From 69a1dde57eff212b4f860d1ab424f6d95708d5e3 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 17 Feb 2020 22:16:47 +0100 Subject: [PATCH 02/12] synch dev --- Project.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Project.toml b/Project.toml index b976ef78..45467b3d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,7 @@ name = "MLJBase" uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" authors = ["Anthony D. Blaom "] -<<<<<<< HEAD -version = "0.11.6" -======= version = "0.11.7" ->>>>>>> master [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 16764da9eda84913e1d8e9e805adee38333e298a Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 18 Feb 2020 13:59:12 +1300 Subject: [PATCH 03/12] add confusion matrix tests --- test/measures/confusion_matrix.jl | 76 +++++++++++++++++++++++++++++++ test/measures/measures.jl | 1 + 2 files changed, 77 insertions(+) create mode 100644 test/measures/confusion_matrix.jl diff --git a/test/measures/confusion_matrix.jl b/test/measures/confusion_matrix.jl new file mode 100644 index 00000000..9c2e8e22 --- /dev/null +++ b/test/measures/confusion_matrix.jl @@ -0,0 +1,76 @@ +using Test +using MLJBase +include(joinpath("..", "..", "test", "_models", "models.jl")) +using .Models + +@testset "basics" begin + y = categorical(['m', 'f', 'n', 'f', 'm', 'n', 'n', 'm', 'f']) + ŷ = categorical(['f', 'f', 'm', 'f', 'n', 'm', 'n', 'm', 'f']) + l = levels(y) # f, m, n + cm = confmat(ŷ, y; warn=false) + e(l,i,j) = sum((ŷ .== l[i]) .& (y .== l[j])) + for i in 1:3, j in 1:3 + @test cm[i,j] == e(l,i,j) + end + perm = [3, 1, 2] + l2 = l[perm] + cm2 = confmat(ŷ, y; perm=perm) # no warning because permutation is given + for i in 1:3, j in 1:3 + @test cm2[i,j] == e(l2,i,j) + end + @test_logs (:warn, "The classes are un-ordered,\nusing order: ['f', 'm', 'n'].\nTo suppress this warning, consider coercing to OrderedFactor.") confmat(ŷ, y) + ŷc = coerce(ŷ, OrderedFactor) + yc = coerce(y, OrderedFactor) + @test confmat(ŷc, yc).mat == cm.mat + + y = categorical(['a','b','a','b']) + ŷ = categorical(['b','b','a','a']) + @test_logs (:warn, "The classes are un-ordered,\nusing: negative='a' and positive='b'.\nTo suppress this warning, consider coercing to OrderedFactor.") confmat(ŷ, y) + + # more tests for coverage + y = categorical([1,2,3,1,2,3,1,2,3]) + ŷ = categorical([1,2,3,1,2,3,1,2,3]) + @test_throws ArgumentError confmat(ŷ, y, rev=true) + + # silly test for display + ŷ = coerce(y, OrderedFactor) + y = coerce(y, OrderedFactor) + iob = IOBuffer() + Base.show(iob, MIME("text/plain"), confmat(ŷ, y)) + siob = String(take!(iob)) + @test strip(siob) == strip(""" + ┌─────────────────────────────────────────┐ + │ Ground Truth │ + ┌─────────────┼─────────────┬─────────────┬─────────────┤ + │ Predicted │ 1 │ 2 │ 3 │ + ├─────────────┼─────────────┼─────────────┼─────────────┤ + │ 1 │ 3 │ 0 │ 0 │ + ├─────────────┼─────────────┼─────────────┼─────────────┤ + │ 2 │ 0 │ 3 │ 0 │ + ├─────────────┼─────────────┼─────────────┼─────────────┤ + │ 3 │ 0 │ 0 │ 3 │ + └─────────────┴─────────────┴─────────────┴─────────────┘""") + +end + +@testset "confmat as measure" begin + + @test info(confmat).orientation == :other + model = DeterministicConstantClassifier() + + X = (x=rand(10),) + long = categorical(collect("abbaacaabbbbababcbac"), ordered=true) + y = long[1:10] + yhat =long[11:20] + + confmat(yhat, y).mat == [1 2 0; 3 1 1; 1 1 0] + + MLJBase.value(confmat, yhat, X, y, nothing) + + e = evaluate(model, X, y, + measures=[misclassification_rate, confmat], + resampling=Holdout(fraction_train=0.5)) + cm = e.measurement[2] + @test cm.labels == ["a", "b", "c"] + @test cm.mat == [2 2 1; 0 0 0; 0 0 0] +end diff --git a/test/measures/measures.jl b/test/measures/measures.jl index 217130ea..4088095e 100644 --- a/test/measures/measures.jl +++ b/test/measures/measures.jl @@ -69,6 +69,7 @@ end include("continuous.jl") include("finite.jl") include("loss_functions_interface.jl") +include("confusion_matrix.jl") end true From 397f8f4ffe5ca5af3208c73b0e3a7e588c8402f8 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 10:15:52 +1300 Subject: [PATCH 04/12] disable all but resampling tests; rm thread tests; put back others --- src/resampling.jl | 2 +- test/resampling.jl | 50 +++++++++++++++------------------ test/runtests.jl | 70 +++++++++++++++++++++++----------------------- 3 files changed, 58 insertions(+), 64 deletions(-) diff --git a/src/resampling.jl b/src/resampling.jl index b052156a..18fdc9f0 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -579,7 +579,7 @@ function evaluate!(mach::Machine, resampling, weights, return ret end - measurements_flat = if acceleration isa CPUProcesses + if acceleration isa CPUProcesses ## TODO: progress meter for distributed case if verbosity > 0 @info "Distributing cross-validation computation " * diff --git a/test/resampling.jl b/test/resampling.jl index 414bbaa9..a316bdb4 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -1,11 +1,11 @@ -module TestResampling +#module TestResampling using Distributed import ComputationalResources: CPU1, CPUProcesses, CPUThreads -using ..TestUtilities +using .TestUtilities @everywhere begin - using ..Models + using .Models import Random.seed! seed!(1234) end @@ -48,7 +48,7 @@ import StatsBase predict, override) end -@testset_accelerated "folds specified" accel (exclude=[CPUProcesses],) begin +@testset_accelerated "folds specified" accel (exclude=[CPUThreads],) begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) @@ -76,16 +76,14 @@ end v = [1/2, 3/4, 1/2, 3/4, 1/2] - # XXX Please fix these tests as they are currently non-deterministic - - # @test result.per_fold[1] ≈ v - # @test result.per_fold[2] ≈ v - # @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) - # @test ismissing(result.per_observation[1]) - # @test result.per_observation[2][1] ≈ [1/2, 1/2] - # @test result.per_observation[2][2] ≈ [3/4, 3/4] - # @test result.measurement[1] ≈ mean(v) - # @test result.measurement[2] ≈ mean(v) + @test result.per_fold[1] ≈ v + @test result.per_fold[2] ≈ v + @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) + @test ismissing(result.per_observation[1]) + @test result.per_observation[2][1] ≈ [1/2, 1/2] + @test result.per_observation[2][2] ≈ [3/4, 3/4] + @test result.measurement[1] ≈ mean(v) + @test result.measurement[2] ≈ mean(v) end @testset "repeated resampling" begin @@ -147,7 +145,7 @@ end acceleration=accel).measurement[1] end -@testset_accelerated "cv" accel begin +@testset_accelerated "cv" accel (exclude=[CPUThreads],) begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) @@ -160,8 +158,7 @@ end result = evaluate!(mach, resampling=cv, measure=[rms, rmslp1], acceleration=accel) - # XXX Please fix these tests as they are currently non-deterministic - # @test result.per_fold[1] ≈ [1/2, 3/4, 1/2, 3/4, 1/2] + @test result.per_fold[1] ≈ [1/2, 3/4, 1/2, 3/4, 1/2] shuffled = evaluate!(mach, resampling=CV(shuffle=true), acceleration=accel) # using rms default @@ -201,7 +198,7 @@ end @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) end -@testset_accelerated "sample weights in evaluation" accel begin +@testset_accelerated "sample weights in evaluation" accel (exclude=[CPUThreads],) begin # cv: x1 = ones(4) x2 = ones(4) @@ -214,11 +211,10 @@ end e = evaluate!(mach, resampling=cv, measure=l1, weights=w, verbosity=0, acceleration=accel).measurement[1] - # XXX Please fix this as currently non-deterministic - # @test e ≈ (1/3 + 13/14)/2 + @test e ≈ (1/3 + 13/14)/2 end -@testset_accelerated "resampler as machine" accel (exclude=[CPUProcesses],) begin +@testset_accelerated "resampler as machine" accel begin N = 50 X = (x1=rand(N), x2=rand(N), x3=rand(N)) y = X.x1 -2X.x2 + 0.05*rand(N) @@ -273,7 +269,7 @@ struct DummyResamplingStrategy <: MLJBase.ResamplingStrategy end @test e.measurement[1] ≈ 1.0 end -@testset_accelerated "sample weights in training and evaluation" accel begin +@testset_accelerated "sample weights in training and evaluation" accel (exclude=[CPUThreads],) begin yraw = ["Perry", "Antonia", "Perry", "Antonia", "Skater"] X = (x=rand(5),) y = categorical(yraw) @@ -334,8 +330,7 @@ end operation=predict_mode, rows=rows, acceleration=accel) - # XXX Please fix these tests as they are currently non-deterministic - # @test e1.per_fold ≈ e2.per_fold + @test e1.per_fold ≈ e2.per_fold # resampler as machine with evaluation weights not specified: resampler = Resampler(model=model, resampling=CV(); @@ -349,8 +344,7 @@ end measure=misclassification_rate, operation=predict_mode, acceleration=accel).measurement[1] - # XXX Please fix these tests as they are currently non-deterministic - # @test e1 ≈ e2 + @test e1 ≈ e2 # resampler as machine with evaluation weights specified: weval = rand(3N); @@ -368,8 +362,8 @@ end weights=weval, acceleration=accel).measurement[1] # XXX Please fix this test as currently non-deterministic - # @test e1 ≈ e2 + @test e1 ≈ e2 end -end +#end true diff --git a/test/runtests.jl b/test/runtests.jl index 87a431bd..792caa2c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using Distributed -addprocs(2) +addprocs(5) @everywhere begin using MLJModelInterface @@ -27,49 +27,49 @@ print("Loading some models for testing...") include_everywhere("_models/models.jl") print("\r \r") -@testset "misc" begin - @test include("utilities.jl") - @test include("distributions.jl") - @test include("parameter_inspection.jl") - @test include("equality.jl") - @test include("info_dict.jl") - @test include("static.jl") -end +# @testset "misc" begin +# @test include("utilities.jl") +# @test include("distributions.jl") +# @test include("parameter_inspection.jl") +# @test include("equality.jl") +# @test include("info_dict.jl") +# @test include("static.jl") +# end -@testset "interface" begin - @test include("interface/interface.jl") -end +# @testset "interface" begin +# @test include("interface/interface.jl") +# end -@testset "measures" begin - @test include("measures/measures.jl") -end +# @testset "measures" begin +# @test include("measures/measures.jl") +# end @testset "resampling" begin @test include("resampling.jl") end -@testset "data" begin - @test include("data/data.jl") - @test include("data/datasets.jl") - @test include("data/datasets_synthetic.jl") -end +# @testset "data" begin +# @test include("data/data.jl") +# @test include("data/datasets.jl") +# @test include("data/datasets_synthetic.jl") +# end -@testset "machines+composition" begin - @test include("machines.jl") - @test include("composition/composites.jl") - @test include("composition/pipelines.jl") - @test include("composition/pipeline_static.jl") - @test include("composition/networks.jl") +# @testset "machines+composition" begin +# @test include("machines.jl") +# @test include("composition/composites.jl") +# @test include("composition/pipelines.jl") +# @test include("composition/pipeline_static.jl") +# @test include("composition/networks.jl") - VERSION ≥ v"1.3.0-" && @test include("composition/arrows.jl") -end +# VERSION ≥ v"1.3.0-" && @test include("composition/arrows.jl") +# end -@testset "hyperparam" begin - @test include("hyperparam/one_dimensional_ranges.jl") - @test include("hyperparam/one_dimensional_range_methods.jl") -end +# @testset "hyperparam" begin +# @test include("hyperparam/one_dimensional_ranges.jl") +# @test include("hyperparam/one_dimensional_range_methods.jl") +# end -@testset "openml" begin - @test include("openml.jl") -end +# @testset "openml" begin +# @test include("openml.jl") +# end From 331ea2cbf1decde4b34afc21c2be671a86f62d92 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 16:28:10 +1300 Subject: [PATCH 05/12] all resampling tests passing for CPU1 and CPUProcesses --- src/resampling.jl | 73 +++++++--- test/resampling.jl | 318 +++++++++++++++++++++++------------------ test/runtests.jl | 2 +- test/test_utilities.jl | 12 +- 4 files changed, 237 insertions(+), 168 deletions(-) diff --git a/src/resampling.jl b/src/resampling.jl index 18fdc9f0..2559eefa 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -521,26 +521,40 @@ const AbstractRow = Union{AbstractVector{<:Integer}, Colon} const TrainTestPair = Tuple{AbstractRow,AbstractRow} const TrainTestPairs = AbstractVector{<:TrainTestPair} -function _evaluate!(func::Function, res::CPU1, nfolds, verbosity) - p = Progress(nfolds + 1, dt=0, desc="Evaluating over $nfolds folds: ", - barglyphs=BarGlyphs("[=> ]"), barlen=25, color=:yellow) - verbosity > 0 && next!(p) - return reduce(vcat, (func(k, p, verbosity) for k in 1:nfolds)) +function _evaluate!(func, ::CPU1, nfolds, channel) + generator = (begin + r = func(k) + put!(channel, true) + r + end for k in 1:nfolds) + ret = reduce(vcat, generator) + put!(channel, false) + return ret end -function _evaluate!(func::Function, res::CPUProcesses, nfolds, verbosity) - # TODO: use pmap here ?: - return @distributed vcat for k in 1:nfolds - func(k) +function _evaluate!(func, ::CPUProcesses, nfolds, channel) + ret = @distributed vcat for k in 1:nfolds + r = func(k) + put!(channel, true) + r end + put!(channel, false) + return ret end @static if VERSION >= v"1.3.0-DEV.573" - function _evaluate!(func::Function, res::CPUThreads, nfolds, verbosity) - task_vec = [Threads.@spawn func(k) for k in 1:nfolds] - return reduce(vcat, fetch.(task_vec)) + function _evaluate!(func, ::CPUThreads, nfolds, channel) + task_vec = [Threads.@spawn begin + r=func(k) + put!(channel, true) + r + end + for k in 1:nfolds] + ret = reduce(vcat, fetch.(task_vec)) + put!(channel, false) + return ret end end -# Evaluation when resampling is a TrainTestPairs (core evaluator): +# Evaluation when resampling is a TrainTestPairs (CORE EVALUATOR): function evaluate!(mach::Machine, resampling, weights, rows, verbosity, repeats, measures, operation, acceleration, force) @@ -559,6 +573,15 @@ function evaluate!(mach::Machine, resampling, weights, nmeasures = length(measures) + # set up progress meter and a remote channel for communication + p = Progress(nfolds, + dt=0, + desc="Evaluating over a total of $nfolds folds: ", + barglyphs=BarGlyphs("[=> ]"), + barlen=25, + color=:yellow) + channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) + function get_measurements(k) train, test = resampling[k] fit!(mach; rows=train, verbosity=verbosity-1, force=force) @@ -572,22 +595,30 @@ function evaluate!(mach::Machine, resampling, weights, yhat = operation(mach, Xtest) return [value(m, yhat, Xtest, ytest, wtest) for m in measures] - end - function get_measurements(k, p, verbosity) # p = progress meter - ret = get_measurements(k) - verbosity > 0 && next!(p) - return ret + put!(channel, true) end if acceleration isa CPUProcesses - ## TODO: progress meter for distributed case if verbosity > 0 @info "Distributing cross-validation computation " * "among $(nworkers()) workers." end end - measurements_flat = - _evaluate!(get_measurements, acceleration, nfolds, verbosity) + + @sync begin + # printing the progress bar + @async while take!(channel) + verbosity < 1 || next!(p) + end + + @async global measurements_flat = + _evaluate!(get_measurements, + acceleration, + nfolds, + channel) + end + + close(channel) # in the following rows=folds, columns=measures: measurements_matrix = permutedims( diff --git a/test/resampling.jl b/test/resampling.jl index a316bdb4..9a977bec 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -3,11 +3,13 @@ using Distributed import ComputationalResources: CPU1, CPUProcesses, CPUThreads using .TestUtilities +using ProgressMeter @everywhere begin using .Models import Random.seed! seed!(1234) + const verb = 0 end using Test @@ -15,39 +17,71 @@ using MLJBase import Distributions import StatsBase -@test CV(nfolds=6) == CV(nfolds=6) -@test CV(nfolds=5) != CV(nfolds=6) -@test MLJBase.train_test_pairs(CV(), 1:10) != - MLJBase.train_test_pairs(CV(shuffle=true), 1:10) -@test MLJBase.train_test_pairs(Holdout(), 1:10) != - MLJBase.train_test_pairs(Holdout(shuffle=true), 1:10) - -@testset "checking measure/model compatibility" begin - model = ConstantRegressor() - y = rand(4) - override=false - @test MLJBase._check_measure(:junk, :junk, :junk, :junk, true) == nothing - @test_throws(ArgumentError, - MLJBase._check_measure(model, rms, y, predict, override)) - @test MLJBase._check_measure(model, rms, y, predict_mean, override) == - nothing - @test MLJBase._check_measure(model, rms, y, predict_median, override) == - nothing - y=categorical(collect("abc")) - @test_throws(ArgumentError, - MLJBase._check_measure(model, rms, y, - predict_median, override)) - model = ConstantClassifier() - @test_throws(ArgumentError, - MLJBase._check_measure(model, misclassification_rate, y, - predict, override)) - @test MLJBase._check_measure(model, misclassification_rate, y, - predict_mode, override) == nothing - model = Models.DeterministicConstantClassifier() - @test_throws ArgumentError MLJBase._check_measure(model, cross_entropy, y, - predict, override) +@testset_accelerated "dispatch of resources and progress meter" accel begin + + @everywhere begin + nfolds = 6 + nmeasures = 2 + func(k) = (sleep(0.01*rand()); fill(1:k, nmeasures)) + end + + channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) + p = Progress(nfolds, dt=0) + + @sync begin + + # printing the progress bar + @async while take!(channel) + next!(p) + end + + @async begin + global result = + MLJBase._evaluate!(func, accel, nfolds, channel) + end + end + + @test result == + [1:1, 1:1, 1:2, 1:2, 1:3, 1:3, 1:4, 1:4, 1:5, 1:5, 1:6, 1:6] + + close(channel) + end + +# @test CV(nfolds=6) == CV(nfolds=6) +# @test CV(nfolds=5) != CV(nfolds=6) +# @test MLJBase.train_test_pairs(CV(), 1:10) != +# MLJBase.train_test_pairs(CV(shuffle=true), 1:10) +# @test MLJBase.train_test_pairs(Holdout(), 1:10) != +# MLJBase.train_test_pairs(Holdout(shuffle=true), 1:10) + +# @testset "checking measure/model compatibility" begin +# model = ConstantRegressor() +# y = rand(4) +# override=false +# @test MLJBase._check_measure(:junk, :junk, :junk, :junk, true) == nothing +# @test_throws(ArgumentError, +# MLJBase._check_measure(model, rms, y, predict, override)) +# @test MLJBase._check_measure(model, rms, y, predict_mean, override) == +# nothing +# @test MLJBase._check_measure(model, rms, y, predict_median, override) == +# nothing +# y=categorical(collect("abc")) +# @test_throws(ArgumentError, +# MLJBase._check_measure(model, rms, y, +# predict_median, override)) +# model = ConstantClassifier() +# @test_throws(ArgumentError, +# MLJBase._check_measure(model, misclassification_rate, y, +# predict, override)) +# @test MLJBase._check_measure(model, misclassification_rate, y, +# predict_mode, override) == nothing +# model = Models.DeterministicConstantClassifier() +# @test_throws ArgumentError MLJBase._check_measure(model, cross_entropy, y, +# predict, override) +# end + @testset_accelerated "folds specified" accel (exclude=[CPUThreads],) begin x1 = ones(10) x2 = ones(10) @@ -70,8 +104,9 @@ end # check detection of incompatible measure (cross_entropy): @test_throws ArgumentError evaluate!(mach, resampling=resampling, measure=[cross_entropy, rmslp1], + verbosity=verb, acceleration=accel) - result = evaluate!(mach, resampling=resampling, + result = evaluate!(mach, resampling=resampling, verbosity=verb, measure=[my_rms, my_mav, rmslp1], acceleration=accel) v = [1/2, 3/4, 1/2, 3/4, 1/2] @@ -86,64 +121,64 @@ end @test result.measurement[2] ≈ mean(v) end -@testset "repeated resampling" begin - x1 = ones(20) - x2 = ones(20) - X = (x1=x1, x2=x2) - y = rand(20) - - holdout = Holdout(fraction_train=0.75, rng=123) - model = Models.DeterministicConstantRegressor() - mach = machine(model, X, y) - result = evaluate!(mach, resampling=holdout, - measure=[rms, rmslp1], repeats=6) - per_fold = result.per_fold[1] - @test unique(per_fold) |> length == 6 - @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check - - cv = CV(nfolds=3, rng=123) - model = Models.DeterministicConstantRegressor() - mach = machine(model, X, y) - result = evaluate!(mach, resampling=cv, - measure=[rms, rmslp1], repeats=6) - per_fold = result.per_fold[1] - @test unique(per_fold) |> length == 18 - @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check -end - -@testset_accelerated "holdout" accel begin - x1 = ones(4) - x2 = ones(4) - X = (x1=x1, x2=x2) - y = [1.0, 1.0, 2.0, 2.0] - - @test MLJBase.show_as_constructed(Holdout) - holdout = Holdout(fraction_train=0.75) - model = Models.DeterministicConstantRegressor() - mach = machine(model, X, y) - result = evaluate!(mach, resampling=holdout, - measure=[rms, rmslp1], acceleration=accel) - result = evaluate!(mach, verbosity=0, resampling=holdout, - acceleration=accel) - result.measurement[1] ≈ 2/3 - - # test direct evaluation of a model + data: - result = evaluate(model, X, y, verbosity=0, - resampling=holdout, measure=rms) - @test result.measurement[1] ≈ 2/3 - - X = (x=rand(100),) - y = rand(100) - mach = machine(model, X, y) - evaluate!(mach, verbosity=0, - resampling=Holdout(shuffle=true, rng=123), acceleration=accel) - e1 = evaluate!(mach, verbosity=0, - resampling=Holdout(shuffle=true), - acceleration=accel).measurement[1] - @test e1 != evaluate!(mach, verbosity=0, - resampling=Holdout(), - acceleration=accel).measurement[1] -end +# @testset "repeated resampling" begin +# x1 = ones(20) +# x2 = ones(20) +# X = (x1=x1, x2=x2) +# y = rand(20) + +# holdout = Holdout(fraction_train=0.75, rng=123) +# model = Models.DeterministicConstantRegressor() +# mach = machine(model, X, y) +# result = evaluate!(mach, resampling=holdout, verbosity=verb, +# measure=[rms, rmslp1], repeats=6) +# per_fold = result.per_fold[1] +# @test unique(per_fold) |> length == 6 +# @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check + +# cv = CV(nfolds=3, rng=123) +# model = Models.DeterministicConstantRegressor() +# mach = machine(model, X, y) +# result = evaluate!(mach, resampling=cv, verbosity=verb, +# measure=[rms, rmslp1], repeats=6) +# per_fold = result.per_fold[1] +# @test unique(per_fold) |> length == 18 +# @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check +# end + +# @testset_accelerated "holdout" accel begin +# x1 = ones(4) +# x2 = ones(4) +# X = (x1=x1, x2=x2) +# y = [1.0, 1.0, 2.0, 2.0] + +# @test MLJBase.show_as_constructed(Holdout) +# holdout = Holdout(fraction_train=0.75) +# model = Models.DeterministicConstantRegressor() +# mach = machine(model, X, y) +# result = evaluate!(mach, resampling=holdout, verbosity=verb, +# measure=[rms, rmslp1], acceleration=accel) +# result = evaluate!(mach, verbosity=1, resampling=holdout, verbosity=verb, +# acceleration=accel) +# result.measurement[1] ≈ 2/3 + +# # test direct evaluation of a model + data: +# result = evaluate(model, X, y, verbosity=1, +# resampling=holdout, measure=rms) +# @test result.measurement[1] ≈ 2/3 + +# X = (x=rand(100),) +# y = rand(100) +# mach = machine(model, X, y) +# evaluate!(mach, verbosity=verb, +# resampling=Holdout(shuffle=true, rng=123), acceleration=accel) +# e1 = evaluate!(mach, verbosity=verb, +# resampling=Holdout(shuffle=true), +# acceleration=accel).measurement[1] +# @test e1 != evaluate!(mach, verbosity=verb, +# resampling=Holdout(), +# acceleration=accel).measurement[1] +# end @testset_accelerated "cv" accel (exclude=[CPUThreads],) begin x1 = ones(10) @@ -156,47 +191,47 @@ end model = Models.DeterministicConstantRegressor() mach = machine(model, X, y) result = evaluate!(mach, resampling=cv, measure=[rms, rmslp1], - acceleration=accel) + acceleration=accel, verbosity=verb) @test result.per_fold[1] ≈ [1/2, 3/4, 1/2, 3/4, 1/2] - shuffled = evaluate!(mach, resampling=CV(shuffle=true), + shuffled = evaluate!(mach, resampling=CV(shuffle=true), verbosity=verb, acceleration=accel) # using rms default @test shuffled.measurement[1] != result.measurement[1] end -@testset "stratified_cv" begin - - # check in explicit example: - y = categorical(['c', 'a', 'b', 'a', 'c', 'x', - 'c', 'a', 'a', 'b', 'b', 'b', 'b', 'b']) - rows = [14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 3, 2, 1] - @test y[rows] == collect("bbbbbaaccabac") - scv = StratifiedCV(nfolds=3) - pairs = MLJBase.train_test_pairs(scv, rows, nothing, y) - @test pairs == [([12, 11, 10, 8, 5, 4, 3, 2, 1], [14, 13, 9, 7]), - ([14, 13, 10, 9, 7, 4, 3, 2, 1], [12, 11, 8, 5]), - ([14, 13, 12, 11, 9, 8, 7, 5], [10, 4, 3, 2, 1])] - scv_random = StratifiedCV(nfolds=3, shuffle=true) - pairs_random = MLJBase.train_test_pairs(scv_random, rows, nothing, y) - @test pairs != pairs_random - - # wrong target type throws error: - @test_throws Exception MLJBase.train_test_pairs(scv, rows, nothing, get.(y)) - - # too many folds throws error: - @test_throws Exception MLJBase.train_test_pairs(StratifiedCV(nfolds=4), - rows, nothing, y) - - # check class distribution is preserved in a larger randomized example: - N = 30 - y = shuffle(vcat(fill(:a, N), fill(:b, 2N), - fill(:c, 3N), fill(:d, 4N))) |> categorical; - d = Distributions.fit(MLJBase.UnivariateFinite, y) - pairs = MLJBase.train_test_pairs(scv, 1:10N, nothing, y) - folds = vcat(first.(pairs), last.(pairs)) - @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) -end +# @testset "stratified_cv" begin + +# # check in explicit example: +# y = categorical(['c', 'a', 'b', 'a', 'c', 'x', +# 'c', 'a', 'a', 'b', 'b', 'b', 'b', 'b']) +# rows = [14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 3, 2, 1] +# @test y[rows] == collect("bbbbbaaccabac") +# scv = StratifiedCV(nfolds=3) +# pairs = MLJBase.train_test_pairs(scv, rows, nothing, y) +# @test pairs == [([12, 11, 10, 8, 5, 4, 3, 2, 1], [14, 13, 9, 7]), +# ([14, 13, 10, 9, 7, 4, 3, 2, 1], [12, 11, 8, 5]), +# ([14, 13, 12, 11, 9, 8, 7, 5], [10, 4, 3, 2, 1])] +# scv_random = StratifiedCV(nfolds=3, shuffle=true) +# pairs_random = MLJBase.train_test_pairs(scv_random, rows, nothing, y) +# @test pairs != pairs_random + +# # wrong target type throws error: +# @test_throws Exception MLJBase.train_test_pairs(scv, rows, nothing, get.(y)) + +# # too many folds throws error: +# @test_throws Exception MLJBase.train_test_pairs(StratifiedCV(nfolds=4), +# rows, nothing, y) + +# # check class distribution is preserved in a larger randomized example: +# N = 30 +# y = shuffle(vcat(fill(:a, N), fill(:b, 2N), +# fill(:c, 3N), fill(:d, 4N))) |> categorical; +# d = Distributions.fit(MLJBase.UnivariateFinite, y) +# pairs = MLJBase.train_test_pairs(scv, 1:10N, nothing, y) +# folds = vcat(first.(pairs), last.(pairs)) +# @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) +# end @testset_accelerated "sample weights in evaluation" accel (exclude=[CPUThreads],) begin # cv: @@ -208,8 +243,8 @@ end cv=CV(nfolds=2) model = Models.DeterministicConstantRegressor() mach = machine(model, X, y) - e = evaluate!(mach, resampling=cv, measure=l1, - weights=w, verbosity=0, acceleration=accel).measurement[1] + e = evaluate!(mach, resampling=cv, measure=l1, + weights=w, verbosity=verb, acceleration=accel).measurement[1] @test e ≈ (1/3 + 13/14)/2 end @@ -227,14 +262,14 @@ end e1=evaluate(resampling_machine).measurement[1] mach = machine(ridge_model, X, y) @test e1 ≈ evaluate!(mach, resampling=holdout, - measure=mav, verbosity=0, + measure=mav, verbosity=verb, acceleration=accel).measurement[1] ridge_model.lambda=1.0 fit!(resampling_machine, verbosity=2) e2=evaluate(resampling_machine).measurement[1] @test e1 != e2 resampler.weights = rand(N) - fit!(resampling_machine, verbosity=0) + fit!(resampling_machine, verbosity=verb) e3=evaluate(resampling_machine).measurement[1] @test e3 != e2 @@ -279,33 +314,35 @@ end mach = machine(ConstantClassifier(), X, y) e = evaluate!(mach, resampling=Holdout(fraction_train=0.6), operation=predict_mode, measure=misclassification_rate, - acceleration=accel) + acceleration=accel, verbosity=verb) @test e.measurement[1] ≈ 1.0 # with weights in training and evaluation: mach = machine(ConstantClassifier(), X, y, w) e = evaluate!(mach, resampling=Holdout(fraction_train=0.6), operation=predict_mode, measure=misclassification_rate, - acceleration=accel) + acceleration=accel, verbosity=verb) @test e.measurement[1] ≈ 1/3 # with weights in training but overriden in evaluation: e = evaluate!(mach, resampling=Holdout(fraction_train=0.6), operation=predict_mode, measure=misclassification_rate, - weights = fill(1, 5), acceleration=accel) + weights = fill(1, 5), acceleration=accel, verbosity=verb) @test e.measurement[1] ≈ 1/2 @test_throws(DimensionMismatch, evaluate!(mach, resampling=Holdout(fraction_train=0.6), operation=predict_mode, measure=misclassification_rate, - weights = fill(1, 100), acceleration=accel)) + weights = fill(1, 100), acceleration=accel, + verbosity=verb)) @test_throws(ArgumentError, evaluate!(mach, resampling=Holdout(fraction_train=0.6), operation=predict_mode, measure=misclassification_rate, - weights = fill('a', 5), acceleration=accel)) + weights = fill('a', 5), acceleration=accel, + verbosity=verb)) # resampling on a subset of all rows: model = @load KNNClassifier @@ -322,13 +359,13 @@ end mach1 = machine(model, Xsmall, ysmall, wsmall) e1 = evaluate!(mach1, resampling=CV(), measure=misclassification_rate, - operation=predict_mode, acceleration=accel) + operation=predict_mode, acceleration=accel, verbosity=verb) mach2 = machine(model, X, y, w) e2 = evaluate!(mach2, resampling=CV(), measure=misclassification_rate, operation=predict_mode, - rows=rows, acceleration=accel) + rows=rows, acceleration=accel, verbosity=verb) @test e1.per_fold ≈ e2.per_fold @@ -342,7 +379,8 @@ end mach = machine(model, X, y, w) e2 = evaluate!(mach, resampling=CV(); measure=misclassification_rate, - operation=predict_mode, acceleration=accel).measurement[1] + operation=predict_mode, + acceleration=accel, verbosity=verb).measurement[1] @test e1 ≈ e2 @@ -357,11 +395,11 @@ end e1 = evaluate(resampling_machine).measurement[1] mach = machine(model, X, y, w) e2 = evaluate!(mach, resampling=CV(); - measure=misclassification_rate, - operation=predict_mode, - weights=weval, acceleration=accel).measurement[1] + measure=misclassification_rate, + operation=predict_mode, + weights=weval, + acceleration=accel, verbosity=verb).measurement[1] - # XXX Please fix this test as currently non-deterministic @test e1 ≈ e2 end diff --git a/test/runtests.jl b/test/runtests.jl index 792caa2c..18f2e3eb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using Distributed -addprocs(5) +addprocs(4) @everywhere begin using MLJModelInterface diff --git a/test/test_utilities.jl b/test/test_utilities.jl index 4ce601c5..95b61ce3 100644 --- a/test/test_utilities.jl +++ b/test/test_utilities.jl @@ -25,15 +25,15 @@ function testset_accelerated(name::String, var, ex; exclude=[]) for res in resources if any(x->typeof(res)<:x, exclude) push!(final_ex.args, quote - $var = $res - @testset $(name*" (accelerated with $(typeof(res).name))") begin - @test_broken false - end + $var = $res + @testset $(name*" (accelerated with $(typeof(res).name))") begin + @test_broken false + end end) else push!(final_ex.args, quote - $var = $res - @testset $(name*" (accelerated with $(typeof(res).name))") $ex + $var = $res + @testset $(name*" (accelerated with $(typeof(res).name))") $ex end) end end From 6f8e6a9bcc154975a59fb103afb0897638918958 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 18:50:09 +1300 Subject: [PATCH 06/12] all mutlithreading resampling tests now passing!! --- src/MLJBase.jl | 3 ++ src/resampling.jl | 92 ++++++++++++++++++++++++++++++++-------------- test/resampling.jl | 66 ++++++++++++++++++--------------- 3 files changed, 105 insertions(+), 56 deletions(-) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index db147966..90b8317b 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -28,6 +28,9 @@ using Distributed using ComputationalResources using ComputationalResources: CPUProcesses using ProgressMeter +@static if VERSION >= v"1.3.0-DEV.573" + import .Threads +end # Operations & extensions import LossFunctions diff --git a/src/resampling.jl b/src/resampling.jl index 2559eefa..8b207e40 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -291,9 +291,20 @@ function Base.show(io::IO, e::PerformanceEvaluation) print(io, "PerformanceEvaluation$summary") end - +# =============================================================== ## EVALUATION METHODS +# --------------------------------------------------------------- +# Helpers + +function actual_rows(rows, N, verbosity) + unspecified_rows = (rows === nothing) + _rows = unspecified_rows ? (1:N) : rows + unspecified_rows || + @info "Creating subsamples from a subset of all rows. " + return _rows +end + function _check_measure(model, measure, y, operation, override) override && (return nothing) @@ -377,6 +388,9 @@ function _process_weights_measures(weights, measures, mach, end +# -------------------------------------------------------------- +# User interface points: `evaluate!` and `evaluate` + """ evaluate!(mach, resampling=CV(), @@ -517,13 +531,16 @@ See the machine version `evaluate!` for the complete list of options. evaluate(model::Supervised, args...; kwargs...) = evaluate!(machine(model, args...); kwargs...) -const AbstractRow = Union{AbstractVector{<:Integer}, Colon} -const TrainTestPair = Tuple{AbstractRow,AbstractRow} -const TrainTestPairs = AbstractVector{<:TrainTestPair} +# -------------------------------------------------------------- +# Resource-specific methods to distribute a function over +# processes/threads. -function _evaluate!(func, ::CPU1, nfolds, channel) +# Here `func` is always going to be `get_measurements`; see later + +# machines has only one element: +function _evaluate!(func, machines, ::CPU1, nfolds, channel) generator = (begin - r = func(k) + r = func(machines[1], k) put!(channel, true) r end for k in 1:nfolds) @@ -531,29 +548,47 @@ function _evaluate!(func, ::CPU1, nfolds, channel) put!(channel, false) return ret end -function _evaluate!(func, ::CPUProcesses, nfolds, channel) + +# machines has only one element: +function _evaluate!(func, machines, ::CPUProcesses, nfolds, channel) ret = @distributed vcat for k in 1:nfolds - r = func(k) + r = func(machines[1], k) put!(channel, true) r end put!(channel, false) return ret end + @static if VERSION >= v"1.3.0-DEV.573" - function _evaluate!(func, ::CPUThreads, nfolds, channel) - task_vec = [Threads.@spawn begin - r=func(k) - put!(channel, true) - r - end - for k in 1:nfolds] - ret = reduce(vcat, fetch.(task_vec)) - put!(channel, false) - return ret +# one machine for each thread; cycle through available threads: +function _evaluate!(func, machines, ::CPUThreads, nfolds, channel) + + nfolds + results = Vector{Any}(undef, nfolds) + nthreads = Threads.nthreads() + j = 0 + while j < nfolds + Δj = min(nthreads, nfolds - j) + Threads.@threads for k in (j + 1):(j + Δj) + id = mod(k - 1, nthreads) + 1 + results[k] = func(machines[id], k) + put!(channel, true) + end + j += Δj end + put!(channel, false) + return reduce(vcat, results) +end end +# ------------------------------------------------------------ +# Core `evaluation` method, operating on train-test pairs + +const AbstractRow = Union{AbstractVector{<:Integer}, Colon} +const TrainTestPair = Tuple{AbstractRow,AbstractRow} +const TrainTestPairs = AbstractVector{<:TrainTestPair} + # Evaluation when resampling is a TrainTestPairs (CORE EVALUATOR): function evaluate!(mach::Machine, resampling, weights, rows, verbosity, repeats, @@ -573,6 +608,13 @@ function evaluate!(mach::Machine, resampling, weights, nmeasures = length(measures) + machines = [mach,] + @static if VERSION >= v"1.3.0-DEV.573" + clones = [machine(mach.model, mach.args...) + for i in 1:(Threads.nthreads() - 1)] + append!(machines, clones) + end + # set up progress meter and a remote channel for communication p = Progress(nfolds, dt=0, @@ -582,7 +624,7 @@ function evaluate!(mach::Machine, resampling, weights, color=:yellow) channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) - function get_measurements(k) + function get_measurements(mach, k) train, test = resampling[k] fit!(mach; rows=train, verbosity=verbosity-1, force=force) Xtest = selectrows(X, test) @@ -613,6 +655,7 @@ function evaluate!(mach::Machine, resampling, weights, @async global measurements_flat = _evaluate!(get_measurements, + machines, acceleration, nfolds, channel) @@ -659,14 +702,9 @@ function evaluate!(mach::Machine, resampling, weights, end -function actual_rows(rows, N, verbosity) - unspecified_rows = (rows === nothing) - _rows = unspecified_rows ? (1:N) : rows - unspecified_rows || @info "Creating subsamples from a subset of all rows. " - return _rows -end +# ---------------------------------------------------------------- +# Evaluation when `resampling` is a ResamplingStrategy -# Evaluation when resampling is a ResamplingStrategy: function evaluate!(mach::Machine, resampling::ResamplingStrategy, weights, rows, verbosity, repeats, args...) @@ -683,7 +721,7 @@ function evaluate!(mach::Machine, resampling::ResamplingStrategy, end - +# ==================================================================== ## RESAMPLER - A MODEL WRAPPER WITH `evaluate` OPERATION """ diff --git a/test/resampling.jl b/test/resampling.jl index 9a977bec..e3f66c91 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -16,37 +16,45 @@ using Test using MLJBase import Distributions import StatsBase +@static if VERSION >= v"1.3.0-DEV.573" + using .Threads +end -@testset_accelerated "dispatch of resources and progress meter" accel begin - - @everywhere begin - nfolds = 6 - nmeasures = 2 - func(k) = (sleep(0.01*rand()); fill(1:k, nmeasures)) - end - - channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) - p = Progress(nfolds, dt=0) +# @testset_accelerated "dispatch of resources and progress meter" accel begin + +# @everywhere begin +# nfolds = 6 +# nmeasures = 2 +# func(mach, k) = (sleep(0.01*rand()); fill(1:(k - mach), nmeasures)) +# end + +# machines = [0,] +# @static if VERSION >= v"1.3.0-DEV.573" +# append!(machines, fill(0, nthreads() - 1)) +# end + +# channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) +# p = Progress(nfolds, dt=0) - @sync begin +# @sync begin - # printing the progress bar - @async while take!(channel) - next!(p) - end +# # printing the progress bar +# @async while take!(channel) +# next!(p) +# end - @async begin - global result = - MLJBase._evaluate!(func, accel, nfolds, channel) - end - end - - @test result == - [1:1, 1:1, 1:2, 1:2, 1:3, 1:3, 1:4, 1:4, 1:5, 1:5, 1:6, 1:6] +# @async begin +# global result = +# MLJBase._evaluate!(func, machines, accel, nfolds, channel) +# end +# end + +# @test result == +# [1:1, 1:1, 1:2, 1:2, 1:3, 1:3, 1:4, 1:4, 1:5, 1:5, 1:6, 1:6] - close(channel) +# close(channel) -end +# end # @test CV(nfolds=6) == CV(nfolds=6) @@ -82,7 +90,7 @@ end # predict, override) # end -@testset_accelerated "folds specified" accel (exclude=[CPUThreads],) begin +@testset_accelerated "folds specified" accel begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) @@ -180,7 +188,7 @@ end # acceleration=accel).measurement[1] # end -@testset_accelerated "cv" accel (exclude=[CPUThreads],) begin +@testset_accelerated "cv" accel begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) @@ -233,7 +241,7 @@ end # @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) # end -@testset_accelerated "sample weights in evaluation" accel (exclude=[CPUThreads],) begin +@testset_accelerated "sample weights in evaluation" accel begin # cv: x1 = ones(4) x2 = ones(4) @@ -304,7 +312,7 @@ struct DummyResamplingStrategy <: MLJBase.ResamplingStrategy end @test e.measurement[1] ≈ 1.0 end -@testset_accelerated "sample weights in training and evaluation" accel (exclude=[CPUThreads],) begin +@testset_accelerated "sample weights in training and evaluation" accel begin yraw = ["Perry", "Antonia", "Perry", "Antonia", "Skater"] X = (x=rand(5),) y = categorical(yraw) From a596d93f49e6e2965350f6241c6a66453ed07a1c Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 18:55:54 +1300 Subject: [PATCH 07/12] re-instantate remaining resampling tests --- test/resampling.jl | 304 ++++++++++++++++++++++----------------------- 1 file changed, 152 insertions(+), 152 deletions(-) diff --git a/test/resampling.jl b/test/resampling.jl index e3f66c91..0d3ad8a8 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -20,75 +20,75 @@ import StatsBase using .Threads end -# @testset_accelerated "dispatch of resources and progress meter" accel begin +@testset_accelerated "dispatch of resources and progress meter" accel begin -# @everywhere begin -# nfolds = 6 -# nmeasures = 2 -# func(mach, k) = (sleep(0.01*rand()); fill(1:(k - mach), nmeasures)) -# end + @everywhere begin + nfolds = 6 + nmeasures = 2 + func(mach, k) = (sleep(0.01*rand()); fill(1:(k - mach), nmeasures)) + end -# machines = [0,] -# @static if VERSION >= v"1.3.0-DEV.573" -# append!(machines, fill(0, nthreads() - 1)) -# end + machines = [0,] + @static if VERSION >= v"1.3.0-DEV.573" + append!(machines, fill(0, nthreads() - 1)) + end -# channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) -# p = Progress(nfolds, dt=0) + channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) + p = Progress(nfolds, dt=0) -# @sync begin + @sync begin -# # printing the progress bar -# @async while take!(channel) -# next!(p) -# end + # printing the progress bar + @async while take!(channel) + next!(p) + end -# @async begin -# global result = -# MLJBase._evaluate!(func, machines, accel, nfolds, channel) -# end -# end - -# @test result == -# [1:1, 1:1, 1:2, 1:2, 1:3, 1:3, 1:4, 1:4, 1:5, 1:5, 1:6, 1:6] + @async begin + global result = + MLJBase._evaluate!(func, machines, accel, nfolds, channel) + end + end + + @test result == + [1:1, 1:1, 1:2, 1:2, 1:3, 1:3, 1:4, 1:4, 1:5, 1:5, 1:6, 1:6] -# close(channel) - -# end - - -# @test CV(nfolds=6) == CV(nfolds=6) -# @test CV(nfolds=5) != CV(nfolds=6) -# @test MLJBase.train_test_pairs(CV(), 1:10) != -# MLJBase.train_test_pairs(CV(shuffle=true), 1:10) -# @test MLJBase.train_test_pairs(Holdout(), 1:10) != -# MLJBase.train_test_pairs(Holdout(shuffle=true), 1:10) - -# @testset "checking measure/model compatibility" begin -# model = ConstantRegressor() -# y = rand(4) -# override=false -# @test MLJBase._check_measure(:junk, :junk, :junk, :junk, true) == nothing -# @test_throws(ArgumentError, -# MLJBase._check_measure(model, rms, y, predict, override)) -# @test MLJBase._check_measure(model, rms, y, predict_mean, override) == -# nothing -# @test MLJBase._check_measure(model, rms, y, predict_median, override) == -# nothing -# y=categorical(collect("abc")) -# @test_throws(ArgumentError, -# MLJBase._check_measure(model, rms, y, -# predict_median, override)) -# model = ConstantClassifier() -# @test_throws(ArgumentError, -# MLJBase._check_measure(model, misclassification_rate, y, -# predict, override)) -# @test MLJBase._check_measure(model, misclassification_rate, y, -# predict_mode, override) == nothing -# model = Models.DeterministicConstantClassifier() -# @test_throws ArgumentError MLJBase._check_measure(model, cross_entropy, y, -# predict, override) -# end + close(channel) + +end + + +@test CV(nfolds=6) == CV(nfolds=6) +@test CV(nfolds=5) != CV(nfolds=6) +@test MLJBase.train_test_pairs(CV(), 1:10) != + MLJBase.train_test_pairs(CV(shuffle=true), 1:10) +@test MLJBase.train_test_pairs(Holdout(), 1:10) != + MLJBase.train_test_pairs(Holdout(shuffle=true), 1:10) + +@testset "checking measure/model compatibility" begin + model = ConstantRegressor() + y = rand(4) + override=false + @test MLJBase._check_measure(:junk, :junk, :junk, :junk, true) == nothing + @test_throws(ArgumentError, + MLJBase._check_measure(model, rms, y, predict, override)) + @test MLJBase._check_measure(model, rms, y, predict_mean, override) == + nothing + @test MLJBase._check_measure(model, rms, y, predict_median, override) == + nothing + y=categorical(collect("abc")) + @test_throws(ArgumentError, + MLJBase._check_measure(model, rms, y, + predict_median, override)) + model = ConstantClassifier() + @test_throws(ArgumentError, + MLJBase._check_measure(model, misclassification_rate, y, + predict, override)) + @test MLJBase._check_measure(model, misclassification_rate, y, + predict_mode, override) == nothing + model = Models.DeterministicConstantClassifier() + @test_throws ArgumentError MLJBase._check_measure(model, cross_entropy, y, + predict, override) +end @testset_accelerated "folds specified" accel begin x1 = ones(10) @@ -129,64 +129,64 @@ end @test result.measurement[2] ≈ mean(v) end -# @testset "repeated resampling" begin -# x1 = ones(20) -# x2 = ones(20) -# X = (x1=x1, x2=x2) -# y = rand(20) - -# holdout = Holdout(fraction_train=0.75, rng=123) -# model = Models.DeterministicConstantRegressor() -# mach = machine(model, X, y) -# result = evaluate!(mach, resampling=holdout, verbosity=verb, -# measure=[rms, rmslp1], repeats=6) -# per_fold = result.per_fold[1] -# @test unique(per_fold) |> length == 6 -# @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check - -# cv = CV(nfolds=3, rng=123) -# model = Models.DeterministicConstantRegressor() -# mach = machine(model, X, y) -# result = evaluate!(mach, resampling=cv, verbosity=verb, -# measure=[rms, rmslp1], repeats=6) -# per_fold = result.per_fold[1] -# @test unique(per_fold) |> length == 18 -# @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check -# end - -# @testset_accelerated "holdout" accel begin -# x1 = ones(4) -# x2 = ones(4) -# X = (x1=x1, x2=x2) -# y = [1.0, 1.0, 2.0, 2.0] - -# @test MLJBase.show_as_constructed(Holdout) -# holdout = Holdout(fraction_train=0.75) -# model = Models.DeterministicConstantRegressor() -# mach = machine(model, X, y) -# result = evaluate!(mach, resampling=holdout, verbosity=verb, -# measure=[rms, rmslp1], acceleration=accel) -# result = evaluate!(mach, verbosity=1, resampling=holdout, verbosity=verb, -# acceleration=accel) -# result.measurement[1] ≈ 2/3 - -# # test direct evaluation of a model + data: -# result = evaluate(model, X, y, verbosity=1, -# resampling=holdout, measure=rms) -# @test result.measurement[1] ≈ 2/3 - -# X = (x=rand(100),) -# y = rand(100) -# mach = machine(model, X, y) -# evaluate!(mach, verbosity=verb, -# resampling=Holdout(shuffle=true, rng=123), acceleration=accel) -# e1 = evaluate!(mach, verbosity=verb, -# resampling=Holdout(shuffle=true), -# acceleration=accel).measurement[1] -# @test e1 != evaluate!(mach, verbosity=verb, -# resampling=Holdout(), -# acceleration=accel).measurement[1] -# end +@testset "repeated resampling" begin + x1 = ones(20) + x2 = ones(20) + X = (x1=x1, x2=x2) + y = rand(20) + + holdout = Holdout(fraction_train=0.75, rng=123) + model = Models.DeterministicConstantRegressor() + mach = machine(model, X, y) + result = evaluate!(mach, resampling=holdout, verbosity=verb, + measure=[rms, rmslp1], repeats=6) + per_fold = result.per_fold[1] + @test unique(per_fold) |> length == 6 + @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check + + cv = CV(nfolds=3, rng=123) + model = Models.DeterministicConstantRegressor() + mach = machine(model, X, y) + result = evaluate!(mach, resampling=cv, verbosity=verb, + measure=[rms, rmslp1], repeats=6) + per_fold = result.per_fold[1] + @test unique(per_fold) |> length == 18 + @test abs(mean(per_fold) - std(y)) < 0.06 # very rough check +end + +@testset_accelerated "holdout" accel begin + x1 = ones(4) + x2 = ones(4) + X = (x1=x1, x2=x2) + y = [1.0, 1.0, 2.0, 2.0] + + @test MLJBase.show_as_constructed(Holdout) + holdout = Holdout(fraction_train=0.75) + model = Models.DeterministicConstantRegressor() + mach = machine(model, X, y) + result = evaluate!(mach, resampling=holdout, verbosity=verb, + measure=[rms, rmslp1], acceleration=accel) + result = evaluate!(mach, resampling=holdout, verbosity=verb, + acceleration=accel) + result.measurement[1] ≈ 2/3 + + # test direct evaluation of a model + data: + result = evaluate(model, X, y, verbosity=1, + resampling=holdout, measure=rms) + @test result.measurement[1] ≈ 2/3 + + X = (x=rand(100),) + y = rand(100) + mach = machine(model, X, y) + evaluate!(mach, verbosity=verb, + resampling=Holdout(shuffle=true, rng=123), acceleration=accel) + e1 = evaluate!(mach, verbosity=verb, + resampling=Holdout(shuffle=true), + acceleration=accel).measurement[1] + @test e1 != evaluate!(mach, verbosity=verb, + resampling=Holdout(), + acceleration=accel).measurement[1] +end @testset_accelerated "cv" accel begin x1 = ones(10) @@ -208,38 +208,38 @@ end @test shuffled.measurement[1] != result.measurement[1] end -# @testset "stratified_cv" begin - -# # check in explicit example: -# y = categorical(['c', 'a', 'b', 'a', 'c', 'x', -# 'c', 'a', 'a', 'b', 'b', 'b', 'b', 'b']) -# rows = [14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 3, 2, 1] -# @test y[rows] == collect("bbbbbaaccabac") -# scv = StratifiedCV(nfolds=3) -# pairs = MLJBase.train_test_pairs(scv, rows, nothing, y) -# @test pairs == [([12, 11, 10, 8, 5, 4, 3, 2, 1], [14, 13, 9, 7]), -# ([14, 13, 10, 9, 7, 4, 3, 2, 1], [12, 11, 8, 5]), -# ([14, 13, 12, 11, 9, 8, 7, 5], [10, 4, 3, 2, 1])] -# scv_random = StratifiedCV(nfolds=3, shuffle=true) -# pairs_random = MLJBase.train_test_pairs(scv_random, rows, nothing, y) -# @test pairs != pairs_random - -# # wrong target type throws error: -# @test_throws Exception MLJBase.train_test_pairs(scv, rows, nothing, get.(y)) - -# # too many folds throws error: -# @test_throws Exception MLJBase.train_test_pairs(StratifiedCV(nfolds=4), -# rows, nothing, y) - -# # check class distribution is preserved in a larger randomized example: -# N = 30 -# y = shuffle(vcat(fill(:a, N), fill(:b, 2N), -# fill(:c, 3N), fill(:d, 4N))) |> categorical; -# d = Distributions.fit(MLJBase.UnivariateFinite, y) -# pairs = MLJBase.train_test_pairs(scv, 1:10N, nothing, y) -# folds = vcat(first.(pairs), last.(pairs)) -# @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) -# end +@testset "stratified_cv" begin + + # check in explicit example: + y = categorical(['c', 'a', 'b', 'a', 'c', 'x', + 'c', 'a', 'a', 'b', 'b', 'b', 'b', 'b']) + rows = [14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 3, 2, 1] + @test y[rows] == collect("bbbbbaaccabac") + scv = StratifiedCV(nfolds=3) + pairs = MLJBase.train_test_pairs(scv, rows, nothing, y) + @test pairs == [([12, 11, 10, 8, 5, 4, 3, 2, 1], [14, 13, 9, 7]), + ([14, 13, 10, 9, 7, 4, 3, 2, 1], [12, 11, 8, 5]), + ([14, 13, 12, 11, 9, 8, 7, 5], [10, 4, 3, 2, 1])] + scv_random = StratifiedCV(nfolds=3, shuffle=true) + pairs_random = MLJBase.train_test_pairs(scv_random, rows, nothing, y) + @test pairs != pairs_random + + # wrong target type throws error: + @test_throws Exception MLJBase.train_test_pairs(scv, rows, nothing, get.(y)) + + # too many folds throws error: + @test_throws Exception MLJBase.train_test_pairs(StratifiedCV(nfolds=4), + rows, nothing, y) + + # check class distribution is preserved in a larger randomized example: + N = 30 + y = shuffle(vcat(fill(:a, N), fill(:b, 2N), + fill(:c, 3N), fill(:d, 4N))) |> categorical; + d = Distributions.fit(MLJBase.UnivariateFinite, y) + pairs = MLJBase.train_test_pairs(scv, 1:10N, nothing, y) + folds = vcat(first.(pairs), last.(pairs)) + @test all([Distributions.fit(MLJBase.UnivariateFinite, y[fold]) ≈ d for fold in folds]) +end @testset_accelerated "sample weights in evaluation" accel begin # cv: From 9f61fad9dd7005c382b0583ba01ed09c41eebdb3 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 18:59:26 +1300 Subject: [PATCH 08/12] addprocs(4) -> addprocs() oops --- test/runtests.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 18f2e3eb..da739437 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ using Distributed -addprocs(4) +addprocs() + +@info "nprocs() = $(nprocs())" @everywhere begin using MLJModelInterface From 670dd9ac123d2a2005976cab117cf6ea23cc39af Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 19:15:55 +1300 Subject: [PATCH 09/12] re-instate full tests suite --- test/runtests.jl | 74 ++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index da739437..6267197d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,12 @@ using Distributed addprocs() @info "nprocs() = $(nprocs())" +@static if VERSION >= v"1.3.0-DEV.573" + import .Threads + @info "nthreads() = $(Threads.nthreads())" +else + @info "Running julia $(VERSION). Multithreading tests excluded. " +end @everywhere begin using MLJModelInterface @@ -29,49 +35,49 @@ print("Loading some models for testing...") include_everywhere("_models/models.jl") print("\r \r") -# @testset "misc" begin -# @test include("utilities.jl") -# @test include("distributions.jl") -# @test include("parameter_inspection.jl") -# @test include("equality.jl") -# @test include("info_dict.jl") -# @test include("static.jl") -# end +@testset "misc" begin + @test include("utilities.jl") + @test include("distributions.jl") + @test include("parameter_inspection.jl") + @test include("equality.jl") + @test include("info_dict.jl") + @test include("static.jl") +end -# @testset "interface" begin -# @test include("interface/interface.jl") -# end +@testset "interface" begin + @test include("interface/interface.jl") +end -# @testset "measures" begin -# @test include("measures/measures.jl") -# end +@testset "measures" begin + @test include("measures/measures.jl") +end @testset "resampling" begin @test include("resampling.jl") end -# @testset "data" begin -# @test include("data/data.jl") -# @test include("data/datasets.jl") -# @test include("data/datasets_synthetic.jl") -# end +@testset "data" begin + @test include("data/data.jl") + @test include("data/datasets.jl") + @test include("data/datasets_synthetic.jl") +end -# @testset "machines+composition" begin -# @test include("machines.jl") -# @test include("composition/composites.jl") -# @test include("composition/pipelines.jl") -# @test include("composition/pipeline_static.jl") -# @test include("composition/networks.jl") +@testset "machines+composition" begin + @test include("machines.jl") + @test include("composition/composites.jl") + @test include("composition/pipelines.jl") + @test include("composition/pipeline_static.jl") + @test include("composition/networks.jl") -# VERSION ≥ v"1.3.0-" && @test include("composition/arrows.jl") -# end + VERSION ≥ v"1.3.0-" && @test include("composition/arrows.jl") +end -# @testset "hyperparam" begin -# @test include("hyperparam/one_dimensional_ranges.jl") -# @test include("hyperparam/one_dimensional_range_methods.jl") -# end +@testset "hyperparam" begin + @test include("hyperparam/one_dimensional_ranges.jl") + @test include("hyperparam/one_dimensional_range_methods.jl") +end -# @testset "openml" begin -# @test include("openml.jl") -# end +@testset "openml" begin + @test include("openml.jl") +end From cfd43f7a313c203cce9c16efe078dfc099431373 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 19 Feb 2020 20:45:38 +1300 Subject: [PATCH 10/12] try adding mulitple threads to travis runs --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 81441743..9f09fe69 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,8 @@ language: julia os: - linux +env: + - JULIA_NUM_THREADS=30 julia: - 1.1 - 1.2 From 92572b4f930c0185928f1b219085e11e4c4f6143 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 20 Feb 2020 10:33:20 +1300 Subject: [PATCH 11/12] replace use of Threads.@threads with @spawn for better composability --- src/MLJBase.jl | 4 +-- src/resampling.jl | 70 +++++++++++++++++++++++++----------------- test/resampling.jl | 14 ++++----- test/test_utilities.jl | 4 +-- 4 files changed, 51 insertions(+), 41 deletions(-) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index 90b8317b..2bf3ebd2 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -28,9 +28,7 @@ using Distributed using ComputationalResources using ComputationalResources: CPUProcesses using ProgressMeter -@static if VERSION >= v"1.3.0-DEV.573" - import .Threads -end +import .Threads # Operations & extensions import LossFunctions diff --git a/src/resampling.jl b/src/resampling.jl index 8b207e40..7dadf662 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -1,3 +1,4 @@ +# ================================================================== ## RESAMPLING STRATEGIES abstract type ResamplingStrategy <: MLJType end @@ -9,7 +10,7 @@ function ==(s1::S, s2::S) where S <: ResamplingStrategy return all(getfield(s1, fld) == getfield(s2, fld) for fld in fieldnames(S)) end -# fallbacks: +# fallbacks for method to be implemented by each new strategy: train_test_pairs(s::ResamplingStrategy, rows, X, y, w) = train_test_pairs(s, rows, X, y) train_test_pairs(s::ResamplingStrategy, rows, X, y) = @@ -17,7 +18,6 @@ train_test_pairs(s::ResamplingStrategy, rows, X, y) = train_test_pairs(s::ResamplingStrategy, rows, y) = train_test_pairs(s, rows) - # Helper to interpret rng, shuffle in case either is `nothing` or if # `rng` is an integer: function shuffle_and_rng(shuffle, rng) @@ -36,6 +36,9 @@ function shuffle_and_rng(shuffle, rng) return shuffle, rng end +# ---------------------------------------------------------------- +# Holdout + """ holdout = Holdout(; fraction_train=0.7, shuffle=nothing, @@ -83,6 +86,8 @@ function train_test_pairs(holdout::Holdout, rows) end +# ---------------------------------------------------------------- +# Cross-validation (vanilla) """ cv = CV(; nfolds=6, shuffle=nothing, rng=nothing) @@ -154,6 +159,9 @@ function train_test_pairs(cv::CV, rows) return ret end +# ---------------------------------------------------------------- +# Cross-validation (stratified; for `Finite` targets) + """ stratified_cv = StratifiedCV(; nfolds=6, shuffle=false, @@ -265,8 +273,8 @@ function train_test_pairs(stratified_cv::StratifiedCV, rows, X, y) end - -## EVALUATION TYPE +# ================================================================ +## EVALUATION RESULT TYPE const PerformanceEvaluation = NamedTuple{(:measure, :measurement, :per_fold, :per_observation)} @@ -531,9 +539,9 @@ See the machine version `evaluate!` for the complete list of options. evaluate(model::Supervised, args...; kwargs...) = evaluate!(machine(model, args...); kwargs...) -# -------------------------------------------------------------- -# Resource-specific methods to distribute a function over -# processes/threads. +# ------------------------------------------------------------------- +# Resource-specific methods to distribute a function parameterized by +# fold number `k` over processes/threads. # Here `func` is always going to be `get_measurements`; see later @@ -563,22 +571,22 @@ end @static if VERSION >= v"1.3.0-DEV.573" # one machine for each thread; cycle through available threads: function _evaluate!(func, machines, ::CPUThreads, nfolds, channel) - - nfolds - results = Vector{Any}(undef, nfolds) nthreads = Threads.nthreads() - j = 0 - while j < nfolds - Δj = min(nthreads, nfolds - j) - Threads.@threads for k in (j + 1):(j + Δj) - id = mod(k - 1, nthreads) + 1 - results[k] = func(machines[id], k) + tasks = map(1:nfolds) do k + Threads.@spawn begin + id = Threads.threadid() + if !haskey(machines, id) + machines[id] = + machine(machines[1].model, machines[1].args...) + end + r = func(machines[id], k) put!(channel, true) + r end - j += Δj end + ret = reduce(vcat, fetch.(tasks)) put!(channel, false) - return reduce(vcat, results) + return ret end end @@ -608,17 +616,15 @@ function evaluate!(mach::Machine, resampling, weights, nmeasures = length(measures) - machines = [mach,] - @static if VERSION >= v"1.3.0-DEV.573" - clones = [machine(mach.model, mach.args...) - for i in 1:(Threads.nthreads() - 1)] - append!(machines, clones) - end + # For multithreading we need a clone of `mach` for each thread + # doing work. These are instantiated as needed except for + # threadid=1. + machines = Dict(1 => mach) # set up progress meter and a remote channel for communication p = Progress(nfolds, dt=0, - desc="Evaluating over a total of $nfolds folds: ", + desc="Evaluating over $nfolds folds: ", barglyphs=BarGlyphs("[=> ]"), barlen=25, color=:yellow) @@ -642,7 +648,7 @@ function evaluate!(mach::Machine, resampling, weights, if acceleration isa CPUProcesses if verbosity > 0 - @info "Distributing cross-validation computation " * + @info "Distributing evaluations " * "among $(nworkers()) workers." end end @@ -820,14 +826,20 @@ function MLJBase.fit(resampler::Resampler, verbosity::Int, args...) end -# in special case of holdout, we can reuse the underlying model's -# machine, provided the training_fraction has not changed: +# in special case of non-shuffled, non-repeated holdout, we can reuse +# the underlying model's machine, provided the training_fraction has +# not changed: function MLJBase.update(resampler::Resampler{Holdout}, verbosity::Int, fitresult, cache, args...) old_mach, old_resampling = cache - if old_resampling.fraction_train == resampler.resampling.fraction_train + reusable = !resampler.resampling.shuffle && + resampler.repeats == 1 && + old_resampling.fraction_train == + resampler.resampling.fraction_train + + if reusable mach = old_mach else mach = machine(resampler.model, args...) diff --git a/test/resampling.jl b/test/resampling.jl index 0d3ad8a8..6a4e766d 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -22,16 +22,16 @@ end @testset_accelerated "dispatch of resources and progress meter" accel begin + X = (x = [1, ],) + y = [2.0, ] + @everywhere begin nfolds = 6 nmeasures = 2 - func(mach, k) = (sleep(0.01*rand()); fill(1:(k - mach), nmeasures)) + func(mach, k) = (sleep(0.01*rand()); fill(1:k, nmeasures)) end - machines = [0,] - @static if VERSION >= v"1.3.0-DEV.573" - append!(machines, fill(0, nthreads() - 1)) - end + machines = Dict(1 => machine(ConstantRegressor(), X, y)) channel = RemoteChannel(()->Channel{Bool}(nfolds) , 1) p = Progress(nfolds, dt=0) @@ -39,11 +39,11 @@ end @sync begin # printing the progress bar - @async while take!(channel) + t1 = @async while take!(channel) next!(p) end - @async begin + t2 = @async begin global result = MLJBase._evaluate!(func, machines, accel, nfolds, channel) end diff --git a/test/test_utilities.jl b/test/test_utilities.jl index 95b61ce3..b5da9d8d 100644 --- a/test/test_utilities.jl +++ b/test/test_utilities.jl @@ -26,14 +26,14 @@ function testset_accelerated(name::String, var, ex; exclude=[]) if any(x->typeof(res)<:x, exclude) push!(final_ex.args, quote $var = $res - @testset $(name*" (accelerated with $(typeof(res).name))") begin + @testset $(name*" ($(typeof(res).name))") begin @test_broken false end end) else push!(final_ex.args, quote $var = $res - @testset $(name*" (accelerated with $(typeof(res).name))") $ex + @testset $(name*" ($(typeof(res).name))") $ex end) end end From 39c70583de44e58e72df626973b5c6fd10673526 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 20 Feb 2020 11:14:01 +1300 Subject: [PATCH 12/12] bump to 0.11.8 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 45467b3d..77735a2e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJBase" uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" authors = ["Anthony D. Blaom "] -version = "0.11.7" +version = "0.11.8" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"