From b5da60d9bfb757fb6d4402288ad3e5d7a1eabadb Mon Sep 17 00:00:00 2001 From: "Rohdin Johan A." Date: Mon, 26 Aug 2024 15:52:53 +0200 Subject: [PATCH] Some yapf fixes --- examples/sre/v3/README.md | 2 +- tools/copy_data_dir.sh | 6 ++-- wespeaker/bin/apply_embd_proc.py | 27 +++++++++++------ wespeaker/bin/prep_embd_proc.py | 7 +++-- wespeaker/utils/embedding_processing.py | 39 ++++++++++++------------- 5 files changed, 45 insertions(+), 36 deletions(-) diff --git a/examples/sre/v3/README.md b/examples/sre/v3/README.md index 32fc0add..82f1ca35 100644 --- a/examples/sre/v3/README.md +++ b/examples/sre/v3/README.md @@ -7,7 +7,7 @@ Similarly to ../v2, this recipe uses silero vad https://github.com/snakers4/silero-vad downloaded from here https://github.com/snakers4/silero-vad/archive/refs/tags/v4.0.zip If you intended to use this recipe for an evaluation/competition, make sure to check that -it is allowed to use the data used to train Silero. +it is allowed to use the data that has been used to train Silero. ### Instructions * Set the paths in stage 1. The variable ```sre_data_dir``` is assumed to be prepared by diff --git a/tools/copy_data_dir.sh b/tools/copy_data_dir.sh index 9f781242..c4cd4db6 100755 --- a/tools/copy_data_dir.sh +++ b/tools/copy_data_dir.sh @@ -51,7 +51,7 @@ else awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $utt_list $src_dir/utt2spk > $dest_dir/utt2spk elif [ ! -z "$spk_list" ];then #echo "A" - awk 'NR==FNR{a[$1];next}$2 in a{print $0}' $spk_list $src_dir/utt2spk > $dest_dir/utt2spk + awk 'NR==FNR{a[$1];next}$2 in a{print $0}' $spk_list $src_dir/utt2spk > $dest_dir/utt2spk else cp $src_dir/utt2spk $dest_dir/utt2spk fi @@ -66,12 +66,12 @@ else cat $scrdir/spk2utt | tools/spk2utt_to_utt2spk.pl \ | awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $utt_list - \ | tools/utt2spk_to_spk2utt.pl > $dest_dir/spk2utt - + elif [ ! -z "$spk_list" ];then awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $spk_list $src_dir/spk2utt > $dest_dir/spk2utt else cp $src_dir/spk2utt $dest_dir/spk2utt - fi + fi fi diff --git a/wespeaker/bin/apply_embd_proc.py b/wespeaker/bin/apply_embd_proc.py index 37f7a5af..a7149ed4 100644 --- a/wespeaker/bin/apply_embd_proc.py +++ b/wespeaker/bin/apply_embd_proc.py @@ -22,11 +22,17 @@ xxx """ parser = argparse.ArgumentParser() - parser.add_argument('--path', type=str, default='', + parser.add_argument('--path', + type=str, + default='', help='Path to processing chain.') - parser.add_argument('--input', type=str, default='', + parser.add_argument('--input', + type=str, + default='', help='Input scp file.') - parser.add_argument('--output', type=str, default='', + parser.add_argument('--output', + type=str, + default='', help='Output scp/ark file.') args = parser.parse_args() @@ -34,20 +40,21 @@ processingChain.load(args.path) embd = [] - utt = [] + utt = [] for k, v in kaldiio.load_scp_sequential(args.input): - utt.append(k) + utt.append(k) embd.append(v) embd = np.array(embd) utt = np.array(utt) - print("Read {} embeddings of dimension {}.".format(embd.shape[0], embd.shape[1])) + print("Read {} embeddings of dimension {}.".format(embd.shape[0], + embd.shape[1])) embd = processingChain(embd) # Store both ark and scp if extention '.ark,scp' or '.scp,ark'. Or, only # ark if extension is '.ark' - output_file = args.output + output_file = args.output if output_file.endswith('ark,scp') or output_file.endswith('scp,ark'): output_file = output_file.rstrip('ark,scp') output_file = output_file.rstrip('scp,ark') @@ -63,6 +70,8 @@ e = embd[i] writer(u, e) else: - raise Exception("Invalid file extension of output file {}".format(output_file)) + raise Exception( + "Invalid file extension of output file {}".format(output_file)) - print("Wrote {} embeddings of dimension {}.".format(embd.shape[0], embd.shape[1])) + print("Wrote {} embeddings of dimension {}.".format( + embd.shape[0], embd.shape[1])) diff --git a/wespeaker/bin/prep_embd_proc.py b/wespeaker/bin/prep_embd_proc.py index e592d30b..445eab08 100644 --- a/wespeaker/bin/prep_embd_proc.py +++ b/wespeaker/bin/prep_embd_proc.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import argparse from wespeaker.utils.embedding_processing import EmbeddingProcessingChain @@ -22,8 +21,10 @@ xxx """ parser = argparse.ArgumentParser() - parser.add_argument('--chain', type=str, - default='whitening | length-norm ', help='') + parser.add_argument('--chain', + type=str, + default='whitening | length-norm ', + help='') parser.add_argument('--path', type=str) args = parser.parse_args() diff --git a/wespeaker/utils/embedding_processing.py b/wespeaker/utils/embedding_processing.py index 178a4c17..7595ffac 100644 --- a/wespeaker/utils/embedding_processing.py +++ b/wespeaker/utils/embedding_processing.py @@ -21,13 +21,13 @@ def chain_string_to_dict(chain_string=None): - # This function converts an input string into a list and dictionary - # structure suitable for use by the embedding processing classes below. + # This function converts an input string into a list and dictionary + # structure suitable for use by the embedding processing classes below. # For example, - # "mean-subtract --scp mean1_xvector.scp | length-norm " | + # "mean-subtract --scp mean1_xvector.scp | length-norm " | # "| lda --scp lda_xvector.scp --utt2spk utt2spk --dim $lda_dim " # "| length-norm" - # (The above three lines is supposed to be one long string but style + # (The above three lines is supposed to be one long string but style # rules prevents it from be written that way here.) # becomes # [ @@ -74,8 +74,8 @@ def compute_mean_and_lda_scatter_matrices(self, utt2spk_file, equal_speaker_weight=False, current_chain=None): - # equal_speaker_weight: If True, each speaker is considered equally - # important in the calculation of the mean and scatter matrices. If + # equal_speaker_weight: If True, each speaker is considered equally + # important in the calculation of the mean and scatter matrices. If # False, speakers are weighted by their number of utterances. if current_chain is None: current_chain = [] @@ -89,7 +89,7 @@ def compute_mean_and_lda_scatter_matrices(self, for s in speakers: embd_s = current_chain(np.vstack(embeddings_dict[s])) count_s = embd_s.shape[0] - # With bias=False we need at least 2 speakers, with bias=True we + # With bias=False we need at least 2 speakers, with bias=True we # need at least 1. But this would result in covariance matrix = 0 # for all its elements. (This is not necessarily wrong). if count_s > 1: @@ -144,12 +144,12 @@ def __init__(self, args, current_chain=None): scp_file, utt2spk_file, current_chain=current_chain) E, M = spl.eigh(WC) - # Floor the within-class covariance eigenvalues. We noticed that this + # Floor the within-class covariance eigenvalues. We noticed that this # was done in Kaldi. E_floor = np.max(E) * eps E[E < E_floor] = E_floor - """ - # The new within-class covariance. + """ + # The new within-class covariance. WC = M.dot(np.diag(E).dot(M.T)) D, lda = spl.eigh( BC, WC ) # The output of eigh is sorted in self.lda = lda[:,-dim:] # ascending order so we so we kee @@ -157,10 +157,10 @@ def __init__(self, args, current_chain=None): """ # Since we have already found the eigen decomposition of WC, we could # whiten it by T1 = 1 / sqrt(E), I = T1 WC T1'. So instead of solving - # spl.eigh( BC, WC ) we can apply T1 on BC and solve - # spl.eigh( T1 BC T1', T1 WC T1' ) - # = spl.eigh( T1 BC T1', I ) - # = spl.eigh( T1 BC T1') + # spl.eigh( BC, WC ) we can apply T1 on BC and solve + # spl.eigh( T1 BC T1', T1 WC T1' ) + # = spl.eigh( T1 BC T1', I ) + # = spl.eigh( T1 BC T1') # as follows. However, T1 then needs to be inlcluded when transforming # the data. In either case, the result is that after LDA transform, the # data will have white WC and diagonal BC @@ -169,10 +169,9 @@ def __init__(self, args, current_chain=None): D, lda = spl.eigh(BC) self.lda = np.dot(T1.T, lda[:, -dim:]) - print( - " Input dimension: {}, output dimension: {}," - " sum of all eigenvalues {:.2f}, sum of kept eigenvalues {:.2f}" - .format(len(D), dim, np.sum(D), np.sum(D[-dim:]))) + print(" Input dimension: {}, output dimension: {}," + " sum of all eigenvalues {:.2f}, sum of kept eigenvalues {:.2f}". + format(len(D), dim, np.sum(D), np.sum(D[-dim:]))) print(" All eigenvalues: {}".format(D)) def __call__(self, embd): @@ -189,9 +188,9 @@ def __call__(self, embd): embd_proc /= np.sqrt((embd_proc**2).sum( axis=1)[:, np.newaxis]) # This would make the lengths equal to one """ - Todo: For Kaldi compatibility we may want to add this as option as + Todo: For Kaldi compatibility we may want to add this as option as well as Kaldi style normalization. - embd_proc *= np.sqrt(embd_normed.shape[1]) + embd_proc *= np.sqrt(embd_normed.shape[1]) """ return (embd_proc)