From 1029a8129ee9adf8bdcc21baf59c919a70abcfd9 Mon Sep 17 00:00:00 2001 From: JayZed Date: Mon, 30 Sep 2024 12:57:37 -0400 Subject: [PATCH 1/2] Use ISO 639-2 3 letter language codes with ffmpeg ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language. If we give it the newer ISO 639-3 code it won't find the audio stream because its trying to match it by name. --- .../subliminal_patch/providers/whisperai.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py index 866585cdb..6f21f97f2 100644 --- a/custom_libs/subliminal_patch/providers/whisperai.py +++ b/custom_libs/subliminal_patch/providers/whisperai.py @@ -16,6 +16,7 @@ import ffmpeg import functools +import pycountry # These are all the languages Whisper supports. # from whisper.tokenizer import LANGUAGES @@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"): # initialize to default above set_log_level() +# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language +# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different +# for example it wants 'ger' instead of 'deu' for the German language +# or 'fre' instead of 'fra' for the French language +def get_ISO_639_2_code(iso639_3_code): + # find the language using ISO 639-3 code + language = pycountry.languages.get(alpha_3=iso639_3_code) + # get the ISO 639-2 code or use the original input if there isn't a match + iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code + logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')") + return iso639_2_code + @functools.lru_cache(2) def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): logger.debug("Encoding audio stream to WAV with ffmpeg") @@ -140,7 +153,10 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): # This launches a subprocess to decode audio while down-mixing and resampling as necessary. inp = ffmpeg.input(path, threads=0) if audio_stream_language: - logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}") + # There is more than one audio stream, so pick the requested one by name + # Use the ISO 639-2 code if available + audio_stream_language = get_ISO_639_2_code(audio_stream_language) + logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}") inp = inp[f'a:m:language:{audio_stream_language}'] out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \ From aef03573cb0ecddf8b59a3f1bdb01c303ea3c754 Mon Sep 17 00:00:00 2001 From: JayZed Date: Mon, 30 Sep 2024 14:35:49 -0400 Subject: [PATCH 2/2] Minor optimization of module import --- custom_libs/subliminal_patch/providers/whisperai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py index 6f21f97f2..0546717a2 100644 --- a/custom_libs/subliminal_patch/providers/whisperai.py +++ b/custom_libs/subliminal_patch/providers/whisperai.py @@ -16,7 +16,7 @@ import ffmpeg import functools -import pycountry +from pycountry import languages # These are all the languages Whisper supports. # from whisper.tokenizer import LANGUAGES @@ -139,7 +139,7 @@ def set_log_level(newLevel="INFO"): # or 'fre' instead of 'fra' for the French language def get_ISO_639_2_code(iso639_3_code): # find the language using ISO 639-3 code - language = pycountry.languages.get(alpha_3=iso639_3_code) + language = languages.get(alpha_3=iso639_3_code) # get the ISO 639-2 code or use the original input if there isn't a match iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')")