TuringLang · arzwa · Nov 22, 2020 · Nov 22, 2020 · Nov 22, 2020 · Nov 22, 2020
diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl
@@ -9,7 +9,7 @@ import Random
 
 # Exports
 export MetropolisHastings, DensityModel, RWMH, StaticMH, StaticProposal, 
-    RandomWalkProposal, Ensemble, StretchProposal
+    RandomWalkProposal, Ensemble, StretchProposal, AdaptiveProposal
 
 # Reexports
 export sample, MCMCThreads, MCMCDistributed
@@ -104,5 +104,6 @@ end
 include("proposal.jl")
 include("mh-core.jl")
 include("emcee.jl")
+include("adaptive.jl")
 
 end # module AdvancedMH
diff --git a/src/adaptive.jl b/src/adaptive.jl
@@ -0,0 +1,107 @@
+"""
+    Adaptor(; tune=25, target=0.44, bound=10., δmax=0.2)
+
+A helper struct for univariate adaptive proposal kernels. This tracks the
+number of accepted proposals and the total number of attempted proposals.  The
+proposal kernel is tuned every `tune` proposals, such that the scale (log(σ) in
+the case of a Normal kernel, log(b) for a Uniform kernel) of the proposal is
+increased (decreased) by `δ(n) = min(δmax, 1/√n)` at tuning step `n` if the
+estimated acceptance probability is higher (lower) than `target`. The target
+acceptance probability defaults to 0.44 which is supposedly optimal for 1D
+proposals. To ensure ergodicity, the scale of the proposal has to be bounded
+(by `bound`), although this is often not required in practice.
+"""
+mutable struct Adaptor
+    accepted::Int
+    total::Int
+    tune::Int         # tuning interval
+    target::Float64   # target acceptance rate
+    bound::Float64    # bound on logσ of Gaussian kernel
+    δmax::Float64     # maximum adaptation step
+end
+
+function Adaptor(; tune=25, target=0.44, bound=10., δmax=0.2)
+    return Adaptor(0, 0, tune, target, bound, δmax)
+end
+
+"""
+    AdaptiveProposal{P} 
+
+An adaptive Metropolis-Hastings proposal. In order for this to work, the
+proposal kernel should implement the `adapted(proposal, δ)` method, where `δ`
+is the increment/decrement applied to the scale of the proposal distribution
+during adaptation (e.g. for a Normal distribution the scale is `log(σ)`, so
+that after adaptation the proposal is `Normal(0, exp(log(σ) + δ))`).
+
+# Example
+```julia
+julia>  p = AdaptiveProposal(Uniform(-0.2, 0.2));
+
+julia> rand(p)
+0.07975590594518434
+```
+
+# References 
+
+Roberts, Gareth O., and Jeffrey S. Rosenthal. "Examples of adaptive MCMC."
+Journal of Computational and Graphical Statistics 18.2 (2009): 349-367.
+"""
+mutable struct AdaptiveProposal{P} <: Proposal{P}
+    proposal::P
+    adaptor::Adaptor
+end
+
+function AdaptiveProposal(p; kwargs...) 
+    return AdaptiveProposal(p, Adaptor(; kwargs...))
+end
+
+accepted!(p::AdaptiveProposal) = p.adaptor.accepted += 1
+accepted!(p::Vector{<:AdaptiveProposal}) = map(accepted!, p)
+accepted!(p::NamedTuple{names}) where names = map(x->accepted!(getfield(p, x)), names)
+
+# this is defined because the first draw has no transition yet (I think)
+function propose(rng::Random.AbstractRNG, p::AdaptiveProposal, m::DensityModel)
+    return rand(rng, p.proposal)
+end
+
+# the actual proposal happens here
+function propose(
+    rng::Random.AbstractRNG,
+    proposal::AdaptiveProposal{<:Union{Distribution,Proposal}},
+    model::DensityModel,
+    t
+)
+    consider_adaptation!(proposal) 
+    return t + rand(rng, proposal.proposal)
+end
+
+function q(proposal::AdaptiveProposal, t, t_cond) 
+    return logpdf(proposal, t - t_cond)
+end
+
+function consider_adaptation!(p)
+    (p.adaptor.total % p.adaptor.tune == 0) && adapt!(p)
+    p.adaptor.total += 1
+end
+
+function adapt!(p::AdaptiveProposal)
+    a = p.adaptor
+    a.total == 0 && return 
+    δ  = min(a.δmax, sqrt(a.tune / a.total))  # diminishing adaptation
+    α  = a.accepted / a.tune  # acceptance ratio
+    p_ = adapted(p.proposal, α > a.target ? δ : -δ, a.bound) 
+    a.accepted = 0 
+    p.proposal = p_
+end
+
+function adapted(d::Normal, δ, bound=Inf)
+    _lσ = log(d.σ) + δ
+    lσ = sign(_lσ) * min(bound, abs(_lσ))
+    return Normal(d.μ, exp(lσ))
+end
+
+function adapted(d::Uniform, δ, bound=Inf)
+    lσ = log(d.b) + δ
+    σ  = exp(sign(lσ) * min(bound, abs(lσ)))
+    return Uniform(-σ, σ)
+end
diff --git a/src/mh-core.jl b/src/mh-core.jl
@@ -208,8 +208,12 @@ function AbstractMCMC.step(
 
     # Decide whether to return the previous params or the new one.
     if -Random.randexp(rng) < logα
+        accepted!(spl.proposal)
         return params, params
     else
         return params_prev, params_prev
     end
 end
+
+function accepted!(p::P) where P<:Proposal end
+
diff --git a/src/proposal.jl b/src/proposal.jl
@@ -103,4 +103,4 @@ function q(
     t_cond
 )
     return q(proposal(t_cond), t, t_cond)
-end
+end
diff --git a/test/emcee.jl b/test/emcee.jl
@@ -18,7 +18,7 @@
             Random.seed!(100)
             sampler = Ensemble(1_000, StretchProposal([InverseGamma(2, 3), Normal(0, 1)]))
             chain = sample(model, sampler, 1_000;
-                           param_names = ["s", "m"], chain_type = Chains)
+                           param_names = ["s", "m"], chain_type = Chains, progress = false)
 
             @test mean(chain["s"]) ≈ 49/24 atol=0.1
             @test mean(chain["m"]) ≈ 7/6 atol=0.1
@@ -43,7 +43,7 @@
             Random.seed!(100)
             sampler = Ensemble(1_000, StretchProposal(MvNormal(2, 1)))
             chain = sample(model, sampler, 1_000;
-                           param_names = ["logs", "m"], chain_type = Chains)
+                           param_names = ["logs", "m"], chain_type = Chains, progress = false)
 
             @test mean(exp, chain["logs"]) ≈ 49/24 atol=0.1
             @test mean(chain["m"]) ≈ 7/6 atol=0.1

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -16,7 +16,11 @@ using Test
     # Define the components of a basic model.
     insupport(θ) = θ[2] >= 0
     dist(θ) = Normal(θ[1], θ[2])
-    density(θ) = insupport(θ) ? sum(logpdf.(dist(θ), data)) : -Inf
+
+    # using `let` prevents surprises when data is redefined in some testset
+    density = let data = data
+        θ -> insupport(θ) ? sum(logpdf.(dist(θ), data)) : -Inf
+    end
 
     # Construct a DensityModel.
     model = DensityModel(density)
@@ -27,8 +31,9 @@ using Test
         spl2 = StaticMH(MvNormal([0.0, 0.0], 1))
 
         # Sample from the posterior.
-        chain1 = sample(model, spl1, 100000; chain_type=StructArray, param_names=["μ", "σ"])
-        chain2 = sample(model, spl2, 100000; chain_type=StructArray, param_names=["μ", "σ"])
+        kwargs = (progress=false, chain_type=StructArray, param_names=["μ", "σ"])
+        chain1 = sample(model, spl1, 100000; kwargs...)
+        chain2 = sample(model, spl2, 100000; kwargs...)
 
         # chn_mean ≈ dist_mean atol=atol_v
         @test mean(chain1.μ) ≈ 0.0 atol=0.1
@@ -43,8 +48,28 @@ using Test
         spl2 = RWMH(MvNormal([0.0, 0.0], 1))
 
         # Sample from the posterior.
-        chain1 = sample(model, spl1, 100000; chain_type=StructArray, param_names=["μ", "σ"])
-        chain2 = sample(model, spl2, 100000; chain_type=StructArray, param_names=["μ", "σ"])
+        kwargs = (progress=false, chain_type=StructArray, param_names=["μ", "σ"])
+        chain1 = sample(model, spl1, 100000; kwargs...)
+        chain2 = sample(model, spl2, 100000; kwargs...)
+
+        # chn_mean ≈ dist_mean atol=atol_v
+        @test mean(chain1.μ) ≈ 0.0 atol=0.1
+        @test mean(chain1.σ) ≈ 1.0 atol=0.1
+        @test mean(chain2.μ) ≈ 0.0 atol=0.1
+        @test mean(chain2.σ) ≈ 1.0 atol=0.1
+    end
+
+    @testset "Adaptive random walk" begin
+        # Set up our sampler with initial parameters.
+        p1 = [AdaptiveProposal(Normal(0,.4)), AdaptiveProposal(Normal(0,1.2))]
+        p2 = (μ=AdaptiveProposal(Normal(0,1.4)), σ=AdaptiveProposal(Normal(0,0.2)))
+        spl1 = MetropolisHastings(p1)
+        spl2 = MetropolisHastings(p2)
+
+        # Sample from the posterior.
+        kwargs = (progress=false, chain_type=StructArray, param_names=["μ", "σ"])
+        chain1 = sample(model, spl1, 100000; kwargs...)
+        chain2 = sample(model, spl2, 100000; kwargs...)
 
         # chn_mean ≈ dist_mean atol=atol_v
         @test mean(chain1.μ) ≈ 0.0 atol=0.1
@@ -53,17 +78,27 @@ using Test
         @test mean(chain2.σ) ≈ 1.0 atol=0.1
     end
 
+    @testset "Compare adaptive to simple random walk" begin
+        data = rand(Normal(2., 1.), 500)
+        m1 = DensityModel(x -> loglikelihood(Normal(x,1), data))
+        p1 = RandomWalkProposal(Normal())
+        p2 = AdaptiveProposal(Normal())
+        kwargs = (progress=false, chain_type=Chains)
+        c1 = sample(m1, MetropolisHastings(p1), 10000; kwargs...)
+        c2 = sample(m1, MetropolisHastings(p2), 10000; kwargs...)
+        @test ess(c2).nt.ess > ess(c1).nt.ess
+    end
+
     @testset "parallel sampling" begin
         spl1 = StaticMH([Normal(0,1), Normal(0, 1)])
 
-        chain1 = sample(model, spl1, MCMCDistributed(), 10000, 4;
-                        param_names=["μ", "σ"], chain_type=Chains)
+        kwargs = (progress=false, chain_type=Chains, param_names=["μ", "σ"])
+        chain1 = sample(model, spl1, MCMCDistributed(), 10000, 4; kwargs...)
         @test mean(chain1["μ"]) ≈ 0.0 atol=0.1
         @test mean(chain1["σ"]) ≈ 1.0 atol=0.1
 
         if VERSION >= v"1.3"
-            chain2 = sample(model, spl1, MCMCThreads(), 10000, 4;
-                            param_names=["μ", "σ"], chain_type=Chains)
+            chain2 = sample(model, spl1, MCMCThreads(), 10000, 4; kwargs...)
             @test mean(chain2["μ"]) ≈ 0.0 atol=0.1
             @test mean(chain2["σ"]) ≈ 1.0 atol=0.1
         end
@@ -80,10 +115,11 @@ using Test
         p3 = (a=StaticProposal(Normal(0,1)), b=StaticProposal(InverseGamma(2,3)))
         p4 = StaticProposal((x=1.0) -> Normal(x, 1))
 
-        c1 = sample(m1, MetropolisHastings(p1), 100; chain_type=Vector{NamedTuple})
-        c2 = sample(m2, MetropolisHastings(p2), 100; chain_type=Vector{NamedTuple})
-        c3 = sample(m3, MetropolisHastings(p3), 100; chain_type=Vector{NamedTuple})
-        c4 = sample(m4, MetropolisHastings(p4), 100; chain_type=Vector{NamedTuple})
+        kwargs = (chain_type=Vector{NamedTuple}, progress=false)
+        c1 = sample(m1, MetropolisHastings(p1), 100; kwargs...)
+        c2 = sample(m2, MetropolisHastings(p2), 100; kwargs...)
+        c3 = sample(m3, MetropolisHastings(p3), 100; kwargs...)
+        c4 = sample(m4, MetropolisHastings(p4), 100; kwargs...)
 
         @test keys(c1[1]) == (:param_1, :lp)
         @test keys(c2[1]) == (:param_1, :param_2, :lp)
@@ -98,11 +134,10 @@ using Test
         val = [0.4, 1.2]
 
         # Sample from the posterior.
-        chain1 = sample(model, spl1, 10, init_params = val)
+        chain1 = sample(model, spl1, 10, init_params = val, progress=false)
 
         @test chain1[1].params == val
     end
 
     @testset "EMCEE" begin include("emcee.jl") end
 end
-
-Original file line number
+Diff line change
@@ Expand Up / @@ -103,4 +103,4 @@ function q( @@
         t_cond
     )
         return q(proposal(t_cond), t, t_cond)
-    end
+    end