-
Notifications
You must be signed in to change notification settings - Fork 124
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[diar] merge voxconverse v3 into v2 and update results in README.md (#…
…352) * [diar] deprecate silero-vad v3.1 in v1 * [diar] 1. deprecate silero-vad v3.1 in v2; 2. add the "cluster_type" parameter and merge v3 into v2; * [diar] update README.md for voxconverse repices * [diar] update README.md * [diar] update voxconverse/v2/run.sh * [docs] update README.md
- Loading branch information
Showing
12 changed files
with
48 additions
and
259 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
This is a **WeSpeaker** speaker diarization recipe on the Voxconverse 2020 dataset. It focused on a ``in the wild`` scenario, which was collected from YouTube videos with a semi-automatic pipeline and released for the diarization track in VoxSRC 2020 Challenge. See https://www.robots.ox.ac.uk/~vgg/data/voxconverse/ for more detailed information. | ||
|
||
Two recipes are provided, including **v1** and **v2**. Their only difference is that in **v2**, we split the Fbank extraction, embedding extraction and clustering modules to different stages. We recommend newcomers to follow the **v2** recipe and run it stage by stage. | ||
|
||
🔥 UPDATE 2024.08.20: | ||
* silero-vad v5.1 is used in place of v3.1 | ||
* umap dimensionality reduction + hdbscan clustering is also supported in v2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
#!/bin/bash | ||
# Copyright (c) 2022-2023 Xu Xiang | ||
# 2022 Zhengyang Chen ([email protected]) | ||
# 2024 Hongji Wang ([email protected]) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -18,8 +19,9 @@ | |
|
||
stage=-1 | ||
stop_stage=-1 | ||
sad_type="oracle" | ||
partition="dev" | ||
sad_type="oracle" # oracle/system | ||
partition="dev" # dev/test | ||
cluster_type="spectral" # spectral/umap | ||
|
||
# do cmn on the sub-segment or on the vad segment | ||
subseg_cmn=true | ||
|
@@ -36,11 +38,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then | |
wget -c https://github.com/usnistgov/SCTK/archive/refs/tags/v2.4.12.zip -O external_tools/SCTK-v2.4.12.zip | ||
unzip -o external_tools/SCTK-v2.4.12.zip -d external_tools | ||
|
||
# [2] Download voice activity detection model pretrained by Silero Team | ||
wget -c https://github.com/snakers4/silero-vad/archive/refs/tags/v3.1.zip -O external_tools/silero-vad-v3.1.zip | ||
unzip -o external_tools/silero-vad-v3.1.zip -d external_tools | ||
|
||
# [3] Download ResNet34 speaker model pretrained by WeSpeaker Team | ||
# [2] Download ResNet34 speaker model pretrained by WeSpeaker Team | ||
mkdir -p pretrained_models | ||
|
||
wget -c https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet34_LM.onnx -O pretrained_models/voxceleb_resnet34_LM.onnx | ||
|
@@ -101,7 +99,6 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then | |
if [[ "x${sad_type}" == "xsystem" ]]; then | ||
# System SAD: applying 'silero' VAD | ||
python3 wespeaker/diar/make_system_sad.py \ | ||
--repo-path external_tools/silero-vad-3.1 \ | ||
--scp data/${partition}/wav.scp \ | ||
--min-duration $min_duration > data/${partition}/system_sad | ||
fi | ||
|
@@ -144,24 +141,24 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then | |
fi | ||
|
||
|
||
# Applying spectral clustering algorithm | ||
# Applying spectral or ump+hdbscan clustering algorithm | ||
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then | ||
|
||
[ -f "exp/spectral_cluster/${partition}_${sad_type}_sad_labels" ] && rm exp/spectral_cluster/${partition}_${sad_type}_sad_labels | ||
[ -f "exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_labels" ] && rm exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_labels | ||
|
||
echo "Doing spectral clustering and store the result in exp/spectral_cluster/${partition}_${sad_type}_sad_labels" | ||
echo "Doing ${cluster_type} clustering and store the result in exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_labels" | ||
echo "..." | ||
python3 wespeaker/diar/spectral_clusterer.py \ | ||
python3 wespeaker/diar/${cluster_type}_clusterer.py \ | ||
--scp exp/${partition}_${sad_type}_sad_embedding/emb.scp \ | ||
--output exp/spectral_cluster/${partition}_${sad_type}_sad_labels | ||
--output exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_labels | ||
fi | ||
|
||
|
||
# Convert labels to RTTMs | ||
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then | ||
python3 wespeaker/diar/make_rttm.py \ | ||
--labels exp/spectral_cluster/${partition}_${sad_type}_sad_labels \ | ||
--channel 1 > exp/spectral_cluster/${partition}_${sad_type}_sad_rttm | ||
--labels exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_labels \ | ||
--channel 1 > exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_rttm | ||
fi | ||
|
||
|
||
|
@@ -173,18 +170,18 @@ if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then | |
perl external_tools/SCTK-2.4.12/src/md-eval/md-eval.pl \ | ||
-c 0.25 \ | ||
-r <(cat ${ref_dir}/${partition}/*.rttm) \ | ||
-s exp/spectral_cluster/${partition}_${sad_type}_sad_rttm 2>&1 | tee exp/spectral_cluster/${partition}_${sad_type}_sad_res | ||
-s exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_rttm 2>&1 | tee exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_res | ||
|
||
if [ ${get_each_file_res} -eq 1 ];then | ||
single_file_res_dir=exp/spectral_cluster/${partition}_${sad_type}_single_file_res | ||
single_file_res_dir=exp/${cluster_type}_cluster/${partition}_${sad_type}_single_file_res | ||
mkdir -p $single_file_res_dir | ||
echo -e "\nGet the DER results for each file and the results will be stored underd ${single_file_res_dir}\n..." | ||
|
||
awk '{print $2}' exp/spectral_cluster/${partition}_${sad_type}_sad_rttm | sort -u | while read file_name; do | ||
awk '{print $2}' exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_rttm | sort -u | while read file_name; do | ||
perl external_tools/SCTK-2.4.12/src/md-eval/md-eval.pl \ | ||
-c 0.25 \ | ||
-r <(cat ${ref_dir}/${partition}/${file_name}.rttm) \ | ||
-s <(grep "${file_name}" exp/spectral_cluster/${partition}_${sad_type}_sad_rttm) > ${single_file_res_dir}/${partition}_${file_name}_res | ||
-s <(grep "${file_name}" exp/${cluster_type}_cluster/${partition}_${sad_type}_sad_rttm) > ${single_file_res_dir}/${partition}_${file_name}_res | ||
done | ||
echo "Done!" | ||
fi | ||
|
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.