Skip to content

Commit

Permalink
Some yapf fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
gulamungon committed Aug 26, 2024
1 parent ec6d4f7 commit b5da60d
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 36 deletions.
2 changes: 1 addition & 1 deletion examples/sre/v3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Similarly to ../v2, this recipe uses silero vad https://github.com/snakers4/silero-vad
downloaded from here https://github.com/snakers4/silero-vad/archive/refs/tags/v4.0.zip
If you intended to use this recipe for an evaluation/competition, make sure to check that
it is allowed to use the data used to train Silero.
it is allowed to use the data that has been used to train Silero.

### Instructions
* Set the paths in stage 1. The variable ```sre_data_dir``` is assumed to be prepared by
Expand Down
6 changes: 3 additions & 3 deletions tools/copy_data_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ else
awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $utt_list $src_dir/utt2spk > $dest_dir/utt2spk
elif [ ! -z "$spk_list" ];then
#echo "A"
awk 'NR==FNR{a[$1];next}$2 in a{print $0}' $spk_list $src_dir/utt2spk > $dest_dir/utt2spk
awk 'NR==FNR{a[$1];next}$2 in a{print $0}' $spk_list $src_dir/utt2spk > $dest_dir/utt2spk
else
cp $src_dir/utt2spk $dest_dir/utt2spk
fi
Expand All @@ -66,12 +66,12 @@ else
cat $scrdir/spk2utt | tools/spk2utt_to_utt2spk.pl \
| awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $utt_list - \
| tools/utt2spk_to_spk2utt.pl > $dest_dir/spk2utt

elif [ ! -z "$spk_list" ];then
awk 'NR==FNR{a[$1];next}$1 in a{print $0}' $spk_list $src_dir/spk2utt > $dest_dir/spk2utt
else
cp $src_dir/spk2utt $dest_dir/spk2utt
fi
fi
fi


Expand Down
27 changes: 18 additions & 9 deletions wespeaker/bin/apply_embd_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,32 +22,39 @@
xxx
"""
parser = argparse.ArgumentParser()
parser.add_argument('--path', type=str, default='',
parser.add_argument('--path',
type=str,
default='',
help='Path to processing chain.')
parser.add_argument('--input', type=str, default='',
parser.add_argument('--input',
type=str,
default='',
help='Input scp file.')
parser.add_argument('--output', type=str, default='',
parser.add_argument('--output',
type=str,
default='',
help='Output scp/ark file.')
args = parser.parse_args()

processingChain = EmbeddingProcessingChain()
processingChain.load(args.path)

embd = []
utt = []
utt = []
for k, v in kaldiio.load_scp_sequential(args.input):
utt.append(k)
utt.append(k)
embd.append(v)
embd = np.array(embd)
utt = np.array(utt)

print("Read {} embeddings of dimension {}.".format(embd.shape[0], embd.shape[1]))
print("Read {} embeddings of dimension {}.".format(embd.shape[0],
embd.shape[1]))

embd = processingChain(embd)

# Store both ark and scp if extention '.ark,scp' or '.scp,ark'. Or, only
# ark if extension is '.ark'
output_file = args.output
output_file = args.output
if output_file.endswith('ark,scp') or output_file.endswith('scp,ark'):
output_file = output_file.rstrip('ark,scp')
output_file = output_file.rstrip('scp,ark')
Expand All @@ -63,6 +70,8 @@
e = embd[i]
writer(u, e)
else:
raise Exception("Invalid file extension of output file {}".format(output_file))
raise Exception(
"Invalid file extension of output file {}".format(output_file))

print("Wrote {} embeddings of dimension {}.".format(embd.shape[0], embd.shape[1]))
print("Wrote {} embeddings of dimension {}.".format(
embd.shape[0], embd.shape[1]))
7 changes: 4 additions & 3 deletions wespeaker/bin/prep_embd_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse

from wespeaker.utils.embedding_processing import EmbeddingProcessingChain
Expand All @@ -22,8 +21,10 @@
xxx
"""
parser = argparse.ArgumentParser()
parser.add_argument('--chain', type=str,
default='whitening | length-norm ', help='')
parser.add_argument('--chain',
type=str,
default='whitening | length-norm ',
help='')
parser.add_argument('--path', type=str)
args = parser.parse_args()

Expand Down
39 changes: 19 additions & 20 deletions wespeaker/utils/embedding_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@


def chain_string_to_dict(chain_string=None):
# This function converts an input string into a list and dictionary
# structure suitable for use by the embedding processing classes below.
# This function converts an input string into a list and dictionary
# structure suitable for use by the embedding processing classes below.
# For example,
# "mean-subtract --scp mean1_xvector.scp | length-norm " |
# "mean-subtract --scp mean1_xvector.scp | length-norm " |
# "| lda --scp lda_xvector.scp --utt2spk utt2spk --dim $lda_dim "
# "| length-norm"
# (The above three lines is supposed to be one long string but style
# (The above three lines is supposed to be one long string but style
# rules prevents it from be written that way here.)
# becomes
# [
Expand Down Expand Up @@ -74,8 +74,8 @@ def compute_mean_and_lda_scatter_matrices(self,
utt2spk_file,
equal_speaker_weight=False,
current_chain=None):
# equal_speaker_weight: If True, each speaker is considered equally
# important in the calculation of the mean and scatter matrices. If
# equal_speaker_weight: If True, each speaker is considered equally
# important in the calculation of the mean and scatter matrices. If
# False, speakers are weighted by their number of utterances.
if current_chain is None:
current_chain = []
Expand All @@ -89,7 +89,7 @@ def compute_mean_and_lda_scatter_matrices(self,
for s in speakers:
embd_s = current_chain(np.vstack(embeddings_dict[s]))
count_s = embd_s.shape[0]
# With bias=False we need at least 2 speakers, with bias=True we
# With bias=False we need at least 2 speakers, with bias=True we
# need at least 1. But this would result in covariance matrix = 0
# for all its elements. (This is not necessarily wrong).
if count_s > 1:
Expand Down Expand Up @@ -144,23 +144,23 @@ def __init__(self, args, current_chain=None):
scp_file, utt2spk_file, current_chain=current_chain)

E, M = spl.eigh(WC)
# Floor the within-class covariance eigenvalues. We noticed that this
# Floor the within-class covariance eigenvalues. We noticed that this
# was done in Kaldi.
E_floor = np.max(E) * eps
E[E < E_floor] = E_floor
"""
# The new within-class covariance.
"""
# The new within-class covariance.
WC = M.dot(np.diag(E).dot(M.T))
D, lda = spl.eigh( BC, WC ) # The output of eigh is sorted in
self.lda = lda[:,-dim:] # ascending order so we so we kee
self.T1 = np.eye(self.m.shape[0]) # the "dim" last eigenvectors.
"""
# Since we have already found the eigen decomposition of WC, we could
# whiten it by T1 = 1 / sqrt(E), I = T1 WC T1'. So instead of solving
# spl.eigh( BC, WC ) we can apply T1 on BC and solve
# spl.eigh( T1 BC T1', T1 WC T1' )
# = spl.eigh( T1 BC T1', I )
# = spl.eigh( T1 BC T1')
# spl.eigh( BC, WC ) we can apply T1 on BC and solve
# spl.eigh( T1 BC T1', T1 WC T1' )
# = spl.eigh( T1 BC T1', I )
# = spl.eigh( T1 BC T1')
# as follows. However, T1 then needs to be inlcluded when transforming
# the data. In either case, the result is that after LDA transform, the
# data will have white WC and diagonal BC
Expand All @@ -169,10 +169,9 @@ def __init__(self, args, current_chain=None):
D, lda = spl.eigh(BC)
self.lda = np.dot(T1.T, lda[:, -dim:])

print(
" Input dimension: {}, output dimension: {},"
" sum of all eigenvalues {:.2f}, sum of kept eigenvalues {:.2f}"
.format(len(D), dim, np.sum(D), np.sum(D[-dim:])))
print(" Input dimension: {}, output dimension: {},"
" sum of all eigenvalues {:.2f}, sum of kept eigenvalues {:.2f}".
format(len(D), dim, np.sum(D), np.sum(D[-dim:])))
print(" All eigenvalues: {}".format(D))

def __call__(self, embd):
Expand All @@ -189,9 +188,9 @@ def __call__(self, embd):
embd_proc /= np.sqrt((embd_proc**2).sum(
axis=1)[:, np.newaxis]) # This would make the lengths equal to one
"""
Todo: For Kaldi compatibility we may want to add this as option as
Todo: For Kaldi compatibility we may want to add this as option as
well as Kaldi style normalization.
embd_proc *= np.sqrt(embd_normed.shape[1])
embd_proc *= np.sqrt(embd_normed.shape[1])
"""
return (embd_proc)

Expand Down

0 comments on commit b5da60d

Please sign in to comment.