diff --git a/pyannote/audio/models/embedding/xvector.py b/pyannote/audio/models/embedding/xvector.py index 3161876e3..55490bf47 100644 --- a/pyannote/audio/models/embedding/xvector.py +++ b/pyannote/audio/models/embedding/xvector.py @@ -342,8 +342,11 @@ def forward( Batch of weights with shape (batch, frame). """ - outputs = self.sincnet(waveforms).squeeze(dim=1) + # outputs = self.sincnet(waveforms).squeeze(dim=1) + outputs = self.sincnet(waveforms) + for tdnn in self.tdnns: outputs = tdnn(outputs) + outputs = self.stats_pool(outputs, weights=weights) return self.embedding(outputs)