Skip to content

Commit

Permalink
[lint] auto format all by pre-commit, including c++, python
Browse files Browse the repository at this point in the history
  • Loading branch information
user01 authored and user01 committed Dec 7, 2023
1 parent 3afff44 commit 8f49658
Show file tree
Hide file tree
Showing 71 changed files with 779 additions and 542 deletions.
93 changes: 93 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IncludeIsMainRegex: '([-_](test|unittest))?$'
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Auto
TabWidth: 8
UseTab: Never
...
18 changes: 9 additions & 9 deletions examples/cnceleb/v2/local/choose_utts_to_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,11 @@
"because this script tries to merge utterances from the "
"same speaker as much as possible, and also needs to produce"
"an output utt2spk map.")
parser.add_argument(
"utt2dur_in",
type=str,
metavar="<utt2dur-in>",
help="Filename of [input] utterance-to-duration map, with lines like 'utt1 1.23'."
)
parser.add_argument("utt2dur_in",
type=str,
metavar="<utt2dur-in>",
help="Filename of [input] utterance-to-duration map, "
"with lines like 'utt1 1.23'.")
parser.add_argument(
"utt2utts_out",
type=str,
Expand All @@ -70,9 +69,10 @@
"utt2spk_out",
type=str,
metavar="<utt2spk-out>",
help="Filename of [output] utt2spk map, which maps new utterances to original "
"speakers. If utterances were combined across speakers, we map the new "
"utterance to the speaker that contributed the most to them.")
help="Filename of [output] utt2spk map, which maps new utterances to "
"original speakers. If utterances were combined across speakers, "
"we map the new utterance to the speaker that contributed the most to them."
)
parser.add_argument(
"utt2dur_out",
type=str,
Expand Down
1 change: 0 additions & 1 deletion examples/sre/v2/local/filter_utt_accd_dur.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import fire


Expand Down
1 change: 0 additions & 1 deletion examples/sre/v2/local/generate_sre_aug.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import fire

Expand Down
28 changes: 16 additions & 12 deletions examples/sre/v2/local/make_system_sad.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import os

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
Expand All @@ -35,20 +35,21 @@

def get_args():
parser = argparse.ArgumentParser(description='')
parser.add_argument('--repo-path', required=True,
parser.add_argument('--repo-path',
required=True,
help='VAD model repo path')
parser.add_argument('--scp', required=True, help='wav scp')
parser.add_argument('--min-duration', required=True,
type=float, help='min duration')
parser.add_argument('--min-duration',
required=True,
type=float,
help='min duration')
args = parser.parse_args()

return args


@functools.lru_cache(maxsize=1)
def load_wav(
wav_rxfilename,
):
def load_wav(wav_rxfilename, ):
""" This function reads audio file and return data in pytorch tensor.
"lru_cache" holds recently loaded audio so that can be called
many times on the same audio file.
Expand All @@ -57,7 +58,8 @@ def load_wav(
"""
if wav_rxfilename.endswith('|'):
# input piped command
p = subprocess.Popen(wav_rxfilename[:-1], shell=True,
p = subprocess.Popen(wav_rxfilename[:-1],
shell=True,
stdout=subprocess.PIPE)
data, samplerate = torchaudio.load(io.BytesIO(p.stdout.read()))
elif wav_rxfilename == '-':
Expand All @@ -82,8 +84,11 @@ def read_scp(scp):
return utt_wav_pair


def silero_vad(utt_wav_pair, repo_path, min_duration,
sampling_rate=8000, threshold=0.25):
def silero_vad(utt_wav_pair,
repo_path,
min_duration,
sampling_rate=8000,
threshold=0.25):

def module_from_file(module_name, file_path):
spec = importlib.util.spec_from_file_location(module_name, file_path)
Expand All @@ -102,8 +107,7 @@ def module_from_file(module_name, file_path):
wav, sr = load_wav(wav)
assert sr == sampling_rate
speech_timestamps = utils_vad.get_speech_timestamps(
wav, model, sampling_rate=sampling_rate,
threshold=threshold)
wav, model, sampling_rate=sampling_rate, threshold=threshold)

vad_result = ""
for item in speech_timestamps:
Expand Down
60 changes: 37 additions & 23 deletions examples/voxconverse/v1/diar/clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import os

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
Expand Down Expand Up @@ -42,20 +42,28 @@ def get_args():
parser.add_argument('--scp', required=True, help='wav scp')
parser.add_argument('--segments', required=True, help='vad segments')
parser.add_argument('--output', required=True, help='output label file')
parser.add_argument('--source', required=True,
help='onnx model')
parser.add_argument('--device', default='cuda',
parser.add_argument('--source', required=True, help='onnx model')
parser.add_argument('--device',
default='cuda',
help='inference device type: cpu or cuda')
parser.add_argument('--batch-size', type=int, default=96,
parser.add_argument('--batch-size',
type=int,
default=96,
help='batch size for embedding extraction')
args = parser.parse_args()

return args


def compute_embeddings(scp, segments, source, device,
batch_size, sampling_rate=16000,
window_secs=1.50, period_secs=0.75, frame_shift=10):
def compute_embeddings(scp,
segments,
source,
device,
batch_size,
sampling_rate=16000,
window_secs=1.50,
period_secs=0.75,
frame_shift=10):

def read_segments(segments):
utt_to_segments = OrderedDict()
Expand Down Expand Up @@ -97,13 +105,12 @@ def subsegment(wav, segments, window_fs, period_fs):
for (seg, begin, end) in segments:
seg_begin = int(begin * sampling_rate)
seg_end = int(end * sampling_rate)
seg_signal = signal[seg_begin: seg_end + 1, :]
seg_signal = signal[seg_begin:seg_end + 1, :]
seg_length = seg_end - seg_begin

if seg_length <= window_fs:
subseg = seg + "-{:08d}-{:08d}".format(
0,
int(seg_length / sampling_rate * 1000 // frame_shift))
0, int(seg_length / sampling_rate * 1000 // frame_shift))
subseg_signal = repeat_to_fill(seg_signal, window_fs)

subsegs.append(subseg)
Expand All @@ -116,15 +123,19 @@ def subsegment(wav, segments, window_fs, period_fs):
int(subseg_begin / sampling_rate * 1000 / frame_shift),
int(subseg_end / sampling_rate * 1000 / frame_shift))
subseg_signal = repeat_to_fill(
seg_signal[subseg_begin: subseg_end + 1, :], window_fs)
seg_signal[subseg_begin:subseg_end + 1, :], window_fs)

subsegs.append(subseg)
subseg_signals.append(subseg_signal)

return subsegs, subseg_signals

def compute_fbank(wavs, num_mel_bins=80, frame_length=25,
frame_shift=10, dither=0.0, sample_frequency=16000):
def compute_fbank(wavs,
num_mel_bins=80,
frame_length=25,
frame_shift=10,
dither=0.0,
sample_frequency=16000):

feats = []
for wav in wavs:
Expand Down Expand Up @@ -155,14 +166,15 @@ def init_session(source, device):
opts = ort.SessionOptions()
opts.inter_op_num_threads = 1
opts.intra_op_num_threads = 1
session = ort.InferenceSession(source, sess_options=opts,
session = ort.InferenceSession(source,
sess_options=opts,
providers=providers)
return session

def extract_embeddings(wavs, batch_size):
embeddings = []
for i in range(0, wavs.size(0), batch_size):
batch_wavs = wavs[i: i + batch_size, :]
batch_wavs = wavs[i:i + batch_size, :]
batch_feats = compute_fbank(batch_wavs)
batch_embs = session.run(input_feed={'feats': batch_feats.numpy()},
output_names=['embs'])[0].squeeze()
Expand Down Expand Up @@ -193,8 +205,8 @@ def extract_embeddings(wavs, batch_size):
segments = utt_to_segments[utt]

# Extract wav data using sliding window with overlap for each utterance
utt_subsegs, utt_subseg_signals = subsegment(wav, segments,
window_fs, period_fs)
utt_subsegs, utt_subseg_signals = subsegment(wav, segments, window_fs,
period_fs)
# Convert a list of Tensor to a Tensor
utt_subseg_signals = torch.stack(utt_subseg_signals).squeeze(-1)

Expand Down Expand Up @@ -256,8 +268,8 @@ def kmeans(data):
# Compute Laplacian
laplacian_matrix = laplacian(pruned_similarity_matrix)
# Compute spectral embeddings
spectral_embeddings = spectral(laplacian_matrix, num_spks,
min_num_spks, max_num_spks)
spectral_embeddings = spectral(laplacian_matrix, num_spks, min_num_spks,
max_num_spks)
# Assign class labels
labels = kmeans(spectral_embeddings)

Expand All @@ -268,8 +280,7 @@ def main():
args = get_args()

print('Segmenting and extracting speaker embeddings')
subsegs_list, embeddings_list = compute_embeddings(args.scp,
args.segments,
subsegs_list, embeddings_list = compute_embeddings(args.scp, args.segments,
args.source,
args.device,
args.batch_size)
Expand All @@ -279,7 +290,10 @@ def main():
with cf.ProcessPoolExecutor() as executor, open(args.output, 'w') as f:
for (subsegs, labels) in zip(subsegs_list,
executor.map(cluster, embeddings_list)):
[print(subseg, label, file=f) for (subseg, label) in zip(subsegs, labels)]
[
print(subseg, label, file=f)
for (subseg, label) in zip(subsegs, labels)
]


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 8f49658

Please sign in to comment.