[lint] auto format all by pre-commit, including c++, python

wenet-e2e · Dec 7, 2023 · 8f49658 · 8f49658
1 parent 3afff44
commit 8f49658
Show file tree

Hide file tree

Showing 71 changed files with 779 additions and 542 deletions.
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,93 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Auto
+TabWidth:        8
+UseTab:          Never
+...
diff --git a/examples/cnceleb/v2/local/choose_utts_to_combine.py b/examples/cnceleb/v2/local/choose_utts_to_combine.py
@@ -54,12 +54,11 @@
     "because this script tries to merge utterances from the "
     "same speaker as much as possible, and also needs to produce"
     "an output utt2spk map.")
-parser.add_argument(
-    "utt2dur_in",
-    type=str,
-    metavar="<utt2dur-in>",
-    help="Filename of [input] utterance-to-duration map, with lines like 'utt1 1.23'."
-)
+parser.add_argument("utt2dur_in",
+                    type=str,
+                    metavar="<utt2dur-in>",
+                    help="Filename of [input] utterance-to-duration map, "
+                    "with lines like 'utt1 1.23'.")
 parser.add_argument(
     "utt2utts_out",
     type=str,
@@ -70,9 +69,10 @@
     "utt2spk_out",
     type=str,
     metavar="<utt2spk-out>",
-    help="Filename of [output] utt2spk map, which maps new utterances to original "
-    "speakers.  If utterances were combined across speakers, we map the new "
-    "utterance to the speaker that contributed the most to them.")
+    help="Filename of [output] utt2spk map, which maps new utterances to "
+    "original speakers.  If utterances were combined across speakers, "
+    "we map the new utterance to the speaker that contributed the most to them."
+)
 parser.add_argument(
     "utt2dur_out",
     type=str,

diff --git a/examples/sre/v2/local/filter_utt_accd_dur.py b/examples/sre/v2/local/filter_utt_accd_dur.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import fire
 
 

diff --git a/examples/sre/v2/local/generate_sre_aug.py b/examples/sre/v2/local/generate_sre_aug.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
 import fire
 

diff --git a/examples/sre/v2/local/make_system_sad.py b/examples/sre/v2/local/make_system_sad.py
@@ -13,8 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
+
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
@@ -35,20 +35,21 @@
 
 def get_args():
     parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--repo-path', required=True,
+    parser.add_argument('--repo-path',
+                        required=True,
                         help='VAD model repo path')
     parser.add_argument('--scp', required=True, help='wav scp')
-    parser.add_argument('--min-duration', required=True,
-                        type=float, help='min duration')
+    parser.add_argument('--min-duration',
+                        required=True,
+                        type=float,
+                        help='min duration')
     args = parser.parse_args()
 
     return args
 
 
 @functools.lru_cache(maxsize=1)
-def load_wav(
-    wav_rxfilename,
-):
+def load_wav(wav_rxfilename, ):
     """ This function reads audio file and return data in pytorch tensor.
         "lru_cache" holds recently loaded audio so that can be called
         many times on the same audio file.
@@ -57,7 +58,8 @@ def load_wav(
     """
     if wav_rxfilename.endswith('|'):
         # input piped command
-        p = subprocess.Popen(wav_rxfilename[:-1], shell=True,
+        p = subprocess.Popen(wav_rxfilename[:-1],
+                             shell=True,
                              stdout=subprocess.PIPE)
         data, samplerate = torchaudio.load(io.BytesIO(p.stdout.read()))
     elif wav_rxfilename == '-':
@@ -82,8 +84,11 @@ def read_scp(scp):
     return utt_wav_pair
 
 
-def silero_vad(utt_wav_pair, repo_path, min_duration,
-               sampling_rate=8000, threshold=0.25):
+def silero_vad(utt_wav_pair,
+               repo_path,
+               min_duration,
+               sampling_rate=8000,
+               threshold=0.25):
 
     def module_from_file(module_name, file_path):
         spec = importlib.util.spec_from_file_location(module_name, file_path)
@@ -102,8 +107,7 @@ def module_from_file(module_name, file_path):
     wav, sr = load_wav(wav)
     assert sr == sampling_rate
     speech_timestamps = utils_vad.get_speech_timestamps(
-        wav, model, sampling_rate=sampling_rate,
-        threshold=threshold)
+        wav, model, sampling_rate=sampling_rate, threshold=threshold)
 
     vad_result = ""
     for item in speech_timestamps:

diff --git a/examples/voxconverse/v1/diar/clusterer.py b/examples/voxconverse/v1/diar/clusterer.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
+
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
@@ -42,20 +42,28 @@ def get_args():
     parser.add_argument('--scp', required=True, help='wav scp')
     parser.add_argument('--segments', required=True, help='vad segments')
     parser.add_argument('--output', required=True, help='output label file')
-    parser.add_argument('--source', required=True,
-                        help='onnx model')
-    parser.add_argument('--device', default='cuda',
+    parser.add_argument('--source', required=True, help='onnx model')
+    parser.add_argument('--device',
+                        default='cuda',
                         help='inference device type: cpu or cuda')
-    parser.add_argument('--batch-size', type=int, default=96,
+    parser.add_argument('--batch-size',
+                        type=int,
+                        default=96,
                         help='batch size for embedding extraction')
     args = parser.parse_args()
 
     return args
 
 
-def compute_embeddings(scp, segments, source, device,
-                       batch_size, sampling_rate=16000,
-                       window_secs=1.50, period_secs=0.75, frame_shift=10):
+def compute_embeddings(scp,
+                       segments,
+                       source,
+                       device,
+                       batch_size,
+                       sampling_rate=16000,
+                       window_secs=1.50,
+                       period_secs=0.75,
+                       frame_shift=10):
 
     def read_segments(segments):
         utt_to_segments = OrderedDict()
@@ -97,13 +105,12 @@ def subsegment(wav, segments, window_fs, period_fs):
         for (seg, begin, end) in segments:
             seg_begin = int(begin * sampling_rate)
             seg_end = int(end * sampling_rate)
-            seg_signal = signal[seg_begin: seg_end + 1, :]
+            seg_signal = signal[seg_begin:seg_end + 1, :]
             seg_length = seg_end - seg_begin
 
             if seg_length <= window_fs:
                 subseg = seg + "-{:08d}-{:08d}".format(
-                    0,
-                    int(seg_length / sampling_rate * 1000 // frame_shift))
+                    0, int(seg_length / sampling_rate * 1000 // frame_shift))
                 subseg_signal = repeat_to_fill(seg_signal, window_fs)
 
                 subsegs.append(subseg)
@@ -116,15 +123,19 @@ def subsegment(wav, segments, window_fs, period_fs):
                         int(subseg_begin / sampling_rate * 1000 / frame_shift),
                         int(subseg_end / sampling_rate * 1000 / frame_shift))
                     subseg_signal = repeat_to_fill(
-                        seg_signal[subseg_begin: subseg_end + 1, :], window_fs)
+                        seg_signal[subseg_begin:subseg_end + 1, :], window_fs)
 
                     subsegs.append(subseg)
                     subseg_signals.append(subseg_signal)
 
         return subsegs, subseg_signals
 
-    def compute_fbank(wavs, num_mel_bins=80, frame_length=25,
-                      frame_shift=10, dither=0.0, sample_frequency=16000):
+    def compute_fbank(wavs,
+                      num_mel_bins=80,
+                      frame_length=25,
+                      frame_shift=10,
+                      dither=0.0,
+                      sample_frequency=16000):
 
         feats = []
         for wav in wavs:
@@ -155,14 +166,15 @@ def init_session(source, device):
         opts = ort.SessionOptions()
         opts.inter_op_num_threads = 1
         opts.intra_op_num_threads = 1
-        session = ort.InferenceSession(source, sess_options=opts,
+        session = ort.InferenceSession(source,
+                                       sess_options=opts,
                                        providers=providers)
         return session
 
     def extract_embeddings(wavs, batch_size):
         embeddings = []
         for i in range(0, wavs.size(0), batch_size):
-            batch_wavs = wavs[i: i + batch_size, :]
+            batch_wavs = wavs[i:i + batch_size, :]
             batch_feats = compute_fbank(batch_wavs)
             batch_embs = session.run(input_feed={'feats': batch_feats.numpy()},
                                      output_names=['embs'])[0].squeeze()
@@ -193,8 +205,8 @@ def extract_embeddings(wavs, batch_size):
         segments = utt_to_segments[utt]
 
         # Extract wav data using sliding window with overlap for each utterance
-        utt_subsegs, utt_subseg_signals = subsegment(wav, segments,
-                                                     window_fs, period_fs)
+        utt_subsegs, utt_subseg_signals = subsegment(wav, segments, window_fs,
+                                                     period_fs)
         # Convert a list of Tensor to a Tensor
         utt_subseg_signals = torch.stack(utt_subseg_signals).squeeze(-1)
 
@@ -256,8 +268,8 @@ def kmeans(data):
     # Compute Laplacian
     laplacian_matrix = laplacian(pruned_similarity_matrix)
     # Compute spectral embeddings
-    spectral_embeddings = spectral(laplacian_matrix, num_spks,
-                                   min_num_spks, max_num_spks)
+    spectral_embeddings = spectral(laplacian_matrix, num_spks, min_num_spks,
+                                   max_num_spks)
     # Assign class labels
     labels = kmeans(spectral_embeddings)
 
@@ -268,8 +280,7 @@ def main():
     args = get_args()
 
     print('Segmenting and extracting speaker embeddings')
-    subsegs_list, embeddings_list = compute_embeddings(args.scp,
-                                                       args.segments,
+    subsegs_list, embeddings_list = compute_embeddings(args.scp, args.segments,
                                                        args.source,
                                                        args.device,
                                                        args.batch_size)
@@ -279,7 +290,10 @@ def main():
     with cf.ProcessPoolExecutor() as executor, open(args.output, 'w') as f:
         for (subsegs, labels) in zip(subsegs_list,
                                      executor.map(cluster, embeddings_list)):
-            [print(subseg, label, file=f) for (subseg, label) in zip(subsegs, labels)]
+            [
+                print(subseg, label, file=f)
+                for (subseg, label) in zip(subsegs, labels)
+            ]
 
 
 if __name__ == '__main__':
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,7 +12,6 @@
		# See the License for the specific language governing permissions and
		# limitations under the License.


		import fire


Expand Down