From aa613cba161b839bd57d35071d39c464d114d13a Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 27 Jun 2024 13:35:50 +1200
Subject: [PATCH 1/4] bump [compat] MLJModelInterface = "1.11",
 StatisticalTraits = "3.4"

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 792ffb6a..d39569e1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -47,7 +47,7 @@ DelimitedFiles = "1"
 Distributions = "0.25.3"
 InvertedIndices = "1"
 LearnAPI = "0.1"
-MLJModelInterface = "1.10"
+MLJModelInterface = "1.11"
 Missings = "0.4, 1"
 OrderedCollections = "1.1"
 Parameters = "0.12"
@@ -58,7 +58,7 @@ Reexport = "1.2"
 ScientificTypes = "3"
 StatisticalMeasures = "0.1.1"
 StatisticalMeasuresBase = "0.1.1"
-StatisticalTraits = "3.3"
+StatisticalTraits = "3.4"
 Statistics = "1"
 StatsBase = "0.32, 0.33, 0.34"
 Tables = "0.2, 1.0"

From cb152085d66fbd8847130af091657254e9dff2c1 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 27 Jun 2024 13:37:07 +1200
Subject: [PATCH 2/4] bump 1.6

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d39569e1..80203bc0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJBase"
 uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "1.5.0"
+version = "1.6"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

From 4d37eedd5dce1d3f43409880181ae25b2394eb4e Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 27 Jun 2024 15:15:25 +1200
Subject: [PATCH 3/4] make pipelines support `Unsupervised` with target in fit

---
 src/composition/models/pipelines.jl  | 18 ++++++++++++--
 test/composition/models/pipelines.jl | 35 ++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl
index 0ea85297..fb7682a9 100644
--- a/src/composition/models/pipelines.jl
+++ b/src/composition/models/pipelines.jl
@@ -225,6 +225,15 @@ implements it (some clustering models). Similarly, calling `transform`
 on a supervised pipeline calls `transform` on the supervised
 component.
 
+### Transformers that need a target in training
+
+Some transformers that have type `Unsupervised` (so that the output of `transform` is
+propagated in pipelines) also see a target variable in training. An example are so-called
+target encoders (which transform categorical input features, based on some target
+observations). Provided they appear before any `Supervised` component in the pipelines,
+such models are supported. Of course a target must be provided whenever training such a
+pipeline, whether or not it contains a `Supervised` component.
+
 ### Optional key-word arguments
 
 - `prediction_type`  -
@@ -444,9 +453,13 @@ function extend(front::Front{Pred}, ::Static, name, cache, args...)
     Front(transform(mach, active(front)), front.transform, Pred())
 end
 
-function extend(front::Front{Trans}, component::Unsupervised, name, cache, args...)
+function extend(front::Front{Trans}, component::Unsupervised, name, cache, ::Any, sources...)
     a = active(front)
-    mach = machine(name, a; cache=cache)
+    if target_in_fit(component)
+        mach = machine(name, a, first(sources); cache=cache)
+    else
+        mach = machine(name, a; cache=cache)
+    end
     Front(predict(mach, a), transform(mach, a), Trans())
 end
 
@@ -598,6 +611,7 @@ function MMI.iteration_parameter(pipe::SupervisedPipeline)
 end
 
 MMI.target_scitype(p::SupervisedPipeline) = target_scitype(supervised_component(p))
+MMI.target_in_fit(p::SomePipeline) = any(target_in_fit, components(p))
 
 MMI.package_name(::Type{<:SomePipeline}) = "MLJBase"
 MMI.load_path(::Type{<:SomePipeline}) = "MLJBase.Pipeline"
diff --git a/test/composition/models/pipelines.jl b/test/composition/models/pipelines.jl
index e213cdc2..c90143a7 100644
--- a/test/composition/models/pipelines.jl
+++ b/test/composition/models/pipelines.jl
@@ -544,6 +544,7 @@ end
     # inverse transform:
     p = Pipeline(UnivariateBoxCoxTransformer,
                  UnivariateStandardizer)
+    @test !target_in_fit(p)
     xtrain = rand(rng, 10)
     mach = machine(p, xtrain)
     fit!(mach, verbosity=0)
@@ -702,6 +703,40 @@ end
     @test Set(features) == Set(keys(X))
 end
 
+struct SupervisedTransformer <: Unsupervised end
+
+MLJBase.fit(::SupervisedTransformer, verbosity, X, y) = (mean(y), nothing, nothing)
+MLJBase.transform(::SupervisedTransformer, fitresult, X) =
+   fitresult*MLJBase.matrix(X) |> MLJBase.table
+MLJBase.target_in_fit(::Type{<:SupervisedTransformer}) = true
+
+struct DummyTransformer <: Unsupervised end
+MLJBase.fit(::DummyTransformer, verbosity, X) = (nothing, nothing, nothing)
+MLJBase.transform(::DummyTransformer, fitresult, X) = X
+
+@testset "supervised transformers in a pipeline" begin
+    X = MLJBase.table((a=fill(10.0, 3),))
+    y = fill(2, 3)
+    pipe = SupervisedTransformer() |> DeterministicConstantRegressor()
+    @test target_in_fit(pipe)
+    mach = machine(pipe, X, y)
+    fit!(mach, verbosity=0)
+    @test predict(mach, X) == fill(2.0, 3)
+
+    pipe2 = DummyTransformer |> pipe
+    @test target_in_fit(pipe2)
+    mach = machine(pipe2, X, y)
+    fit!(mach, verbosity=0)
+    @test predict(mach, X) == fill(2.0, 3)
+
+    pipe3 = DummyTransformer |> SupervisedTransformer |> DummyTransformer
+    @test target_in_fit(pipe3)
+    mach = machine(pipe3, X, y)
+    fit!(mach, verbosity=0)
+    @test transform(mach, X).x1 == fill(20.0, 3)
+end
+
+
 end # module
 
 true

From 63344b7471a640b3ea8b64126fd616670bbf9e21 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Wed, 3 Jul 2024 08:01:59 +1200
Subject: [PATCH 4/4] tweak docstring

---
 src/composition/models/pipelines.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl
index fb7682a9..0d3c4c6f 100644
--- a/src/composition/models/pipelines.jl
+++ b/src/composition/models/pipelines.jl
@@ -228,11 +228,11 @@ component.
 ### Transformers that need a target in training
 
 Some transformers that have type `Unsupervised` (so that the output of `transform` is
-propagated in pipelines) also see a target variable in training. An example are so-called
-target encoders (which transform categorical input features, based on some target
-observations). Provided they appear before any `Supervised` component in the pipelines,
-such models are supported. Of course a target must be provided whenever training such a
-pipeline, whether or not it contains a `Supervised` component.
+propagated in pipelines) may require a target variable for training. An example are
+so-called target encoders (which transform categorical input features, based on some
+target observations). Provided they appear before any `Supervised` component in the
+pipelines, such models are supported. Of course a target must be provided whenever
+training such a pipeline, whether or not it contains a `Supervised` component.
 
 ### Optional key-word arguments