pyannote · clement-pages · Jun 8, 2023 · Jun 12, 2023 · Jun 12, 2023 · Jun 15, 2023
diff --git a/pyannote/audio/core/model.py b/pyannote/audio/core/model.py
@@ -194,6 +194,9 @@ def receptive_field(self) -> SlidingWindow:
     def prepare_data(self):
         self.task.prepare_data()
 
+    def prepare_data(self):
+        self.task.prepare_data()
+
     def setup(self, stage=None):
         if stage == "fit":
             # let the task know about the trainer (e.g for broadcasting

diff --git a/pyannote/audio/models/blocks/pooling.py b/pyannote/audio/models/blocks/pooling.py
@@ -28,7 +28,9 @@
 import torch.nn.functional as F
 
 
-def _pool(sequences: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
+def _pool(
+        sequences: torch.Tensor, weights: torch.Tensor, compute_mean: bool, compute_std:bool
+    ) -> torch.Tensor:
     """Helper function to compute statistics pooling
 
     Assumes that weights are already interpolated to match the number of frames
@@ -50,16 +52,24 @@ def _pool(sequences: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
     weights = weights.unsqueeze(dim=1)
     # (batch, 1, frames)
 
+    stats = []
+
     v1 = weights.sum(dim=2) + 1e-8
     mean = torch.sum(sequences * weights, dim=2) / v1
 
-    dx2 = torch.square(sequences - mean.unsqueeze(2))
-    v2 = torch.square(weights).sum(dim=2)
+    if compute_mean:
+        stats.append(mean)
+
+    if compute_std:
+        dx2 = torch.square(sequences - mean.unsqueeze(2))
+        v2 = torch.square(weights).sum(dim=2)
 
-    var = torch.sum(dx2 * weights, dim=2) / (v1 - v2 / v1 + 1e-8)
-    std = torch.sqrt(var)
+        var = torch.sum(dx2 * weights, dim=2) / (v1 - v2 / v1 + 1e-8)
+        std = torch.sqrt(var)
 
-    return torch.cat([mean, std], dim=1)
+        stats.append(std)
+
+    return torch.cat(stats, dim=1)
 
 
 class StatsPool(nn.Module):
@@ -68,14 +78,33 @@ class StatsPool(nn.Module):
     Compute temporal mean and (unbiased) standard deviation
     and returns their concatenation.
 
+    Parameters
+    ----------
+
+    compute_mean: bool, optional
+        whether to compute (and return) temporal mean.
+        Default to True
+    compute_std: bool, optional
+        whether to compute (and return) temporal standard deviation.
+        Default to True
+
     Reference
     ---------
     https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
 
     """
 
+    def __init__(
+        self,
+        compute_mean: Optional[bool] = True,
+        computde_std: Optional[bool] = True,
+    ):
+        super().__init__()
+        self.compute_mean = compute_mean
+        self.compute_std = computde_std
+
     def forward(
-        self, sequences: torch.Tensor, weights: Optional[torch.Tensor] = None
+        self, sequences: torch.Tensor, weights: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         """Forward pass
 
@@ -122,7 +151,7 @@ def forward(
 
         output = torch.stack(
             [
-                _pool(sequences, weights[:, speaker, :])
+                _pool(sequences, weights[:, speaker, :], self.compute_mean, self.compute_std)
                 for speaker in range(num_speakers)
             ],
             dim=1,

diff --git a/pyannote/audio/models/joint/__init__.py b/pyannote/audio/models/joint/__init__.py
@@ -0,0 +1,27 @@
+# MIT License
+#
+# Copyright (c) 2020 CNRS
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from .end_to_end_diarization import (
+    WavLMEnd2EndDiarization, WavLMEnd2EndDiarizationv2, WavLMEnd2EndDiarizationv3
+)
+
+__all__ = ["WavLMEnd2EndDiarization", "WavLMEnd2EndDiarizationv2", "WavLMEnd2EndDiarizationv3"]