From adb6401dd0cae4869e5af15c820ebdc4c47c127d Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Tue, 19 Mar 2024 06:11:02 +0100 Subject: [PATCH] Unpin numpy version (#1828) --- ...55-mms-massively-multilingual-speech.ipynb | 268 ++++-------------- .../262-softvc-voice-conversion.ipynb | 54 +--- 2 files changed, 65 insertions(+), 257 deletions(-) diff --git a/notebooks/255-mms-massively-multilingual-speech/255-mms-massively-multilingual-speech.ipynb b/notebooks/255-mms-massively-multilingual-speech/255-mms-massively-multilingual-speech.ipynb index add3e3884ac..b745b5f25bb 100644 --- a/notebooks/255-mms-massively-multilingual-speech/255-mms-massively-multilingual-speech.ipynb +++ b/notebooks/255-mms-massively-multilingual-speech/255-mms-massively-multilingual-speech.ipynb @@ -4,10 +4,7 @@ "cell_type": "markdown", "id": "c87087c91122e3f8", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "# MMS: Scaling Speech Technology to 1000+ languages with OpenVINO™\n", @@ -27,10 +24,7 @@ "cell_type": "markdown", "id": "fa80166a11177e7a", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "\n", @@ -59,10 +53,7 @@ "cell_type": "markdown", "id": "90c7a208b1fa497b", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Prerequisites\n", @@ -78,15 +69,12 @@ "end_time": "2023-10-12T15:54:47.440197100Z", "start_time": "2023-10-12T15:54:46.774028500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ "%pip install -q --upgrade pip\n", - "%pip install -q \"transformers>=4.33.1\" \"openvino>=2023.1.0\" \"numpy>=1.21.0,<=1.24\" \"nncf>=2.7.0\" \n", + "%pip install -q \"transformers>=4.33.1\" \"openvino>=2023.1.0\" \"numpy>=1.21.0\" \"nncf>=2.9.0\" \n", "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch datasets accelerate soundfile librosa gradio jiwer" ] }, @@ -99,10 +87,7 @@ "end_time": "2023-10-12T15:54:47.591931700Z", "start_time": "2023-10-12T15:54:46.786966800Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -117,10 +102,7 @@ "cell_type": "markdown", "id": "8d81ab16ec40431a", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Prepare an example audio\n", @@ -176,10 +158,7 @@ "cell_type": "markdown", "id": "62f4f25bd4987849", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Specify `streaming=True` to not download the entire dataset." @@ -194,10 +173,7 @@ "end_time": "2023-10-12T15:54:53.101990700Z", "start_time": "2023-10-12T15:54:47.575834800Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -214,10 +190,7 @@ "cell_type": "markdown", "id": "68f9bb826d9a36dd", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Example has a dictionary structure. It contains an audio data and a text transcription." @@ -256,10 +229,7 @@ "end_time": "2023-10-12T15:54:53.320425400Z", "start_time": "2023-10-12T15:54:53.106498900Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -299,10 +269,7 @@ "cell_type": "markdown", "id": "86963727a1d32e5a", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Language Identification (LID) \n", @@ -313,10 +280,7 @@ "cell_type": "markdown", "id": "cb607febc51e3782", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Download pretrained model and processor\n", @@ -334,10 +298,7 @@ "end_time": "2023-10-12T15:54:59.110836600Z", "start_time": "2023-10-12T15:54:53.294937500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -353,10 +314,7 @@ "cell_type": "markdown", "id": "100d4f9dfff9a7d3", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Use the original model to run an inference\n", @@ -372,10 +330,7 @@ "end_time": "2023-10-12T15:55:02.814861200Z", "start_time": "2023-10-12T15:54:59.111671500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -401,10 +356,7 @@ "cell_type": "markdown", "id": "9bc6f53041bf77e4", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Convert to OpenVINO IR model and run an inference\n", @@ -415,10 +367,7 @@ "cell_type": "markdown", "id": "2fb627d3", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Select device from dropdown list for running inference using OpenVINO" @@ -433,10 +382,7 @@ "end_time": "2023-10-12T15:55:02.914590700Z", "start_time": "2023-10-12T15:55:02.908879300Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -472,10 +418,7 @@ "cell_type": "markdown", "id": "ca15564e", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Convert model to OpenVINO format and compile it" @@ -490,10 +433,7 @@ "end_time": "2023-10-12T15:55:12.102555300Z", "start_time": "2023-10-12T15:55:02.924532500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -532,10 +472,7 @@ "cell_type": "markdown", "id": "40193d2a396bb746", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Now it is possible to run an inference. " @@ -550,10 +487,7 @@ "end_time": "2023-10-12T15:55:12.119092Z", "start_time": "2023-10-12T15:55:12.119092Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -577,10 +511,7 @@ "end_time": "2023-10-12T15:55:13.705838100Z", "start_time": "2023-10-12T15:55:12.119092Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -602,10 +533,7 @@ "cell_type": "markdown", "id": "346a0954d96d40df", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Let's check another language." @@ -658,10 +586,7 @@ "end_time": "2023-10-12T15:55:17.815597900Z", "start_time": "2023-10-12T15:55:13.721895900Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -708,10 +633,7 @@ "end_time": "2023-10-12T15:55:18.506184200Z", "start_time": "2023-10-12T15:55:17.815597900Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -731,10 +653,7 @@ "cell_type": "markdown", "id": "e010ed384d1e8ee7", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Automatic Speech Recognition (ASR)\n", @@ -745,10 +664,7 @@ "cell_type": "markdown", "id": "fe4536f63fe7e612", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Download pretrained model and processor\n", @@ -781,10 +697,7 @@ "cell_type": "markdown", "id": "5896f5fd08f62071", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "You can look at all supported languages:" @@ -799,10 +712,7 @@ "end_time": "2023-10-12T15:55:24.860305100Z", "start_time": "2023-10-12T15:55:24.845930900Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -824,10 +734,7 @@ "cell_type": "markdown", "id": "541c53d1c740d668", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Switch out the language adapters by calling the `load_adapter()` function for the model and `set_target_lang()` for the tokenizer. Pass the target language as an input - `\"detect_language_id\"` which was detected in the previous step." @@ -842,10 +749,7 @@ "end_time": "2023-10-12T15:55:25.029800800Z", "start_time": "2023-10-12T15:55:24.860305100Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -868,10 +772,7 @@ "cell_type": "markdown", "id": "de68b1eac717cc26", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Use the original model for inference\n", @@ -887,10 +788,7 @@ "end_time": "2023-10-12T15:55:26.524665500Z", "start_time": "2023-10-12T15:55:25.032584500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -916,10 +814,7 @@ "cell_type": "markdown", "id": "bda2f58170bfa2f4", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Convert to OpenVINO IR model and run inference\n", @@ -980,10 +875,7 @@ "cell_type": "markdown", "id": "e4fb2cd466365800", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Run inference." @@ -1029,10 +921,7 @@ "cell_type": "markdown", "id": "6c57dd01", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Quantization\n", @@ -1059,9 +948,6 @@ "start_time": "2023-10-12T15:55:36.148877700Z" }, "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "test_replace": { "value=False": "value=True" } @@ -1100,10 +986,7 @@ "cell_type": "markdown", "id": "9bc6116f", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Let's load skip magic extension to skip quantization if to_quantize is not selected" @@ -1118,10 +1001,7 @@ "end_time": "2023-10-12T15:55:36.170645600Z", "start_time": "2023-10-12T15:55:36.163169700Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -1151,10 +1031,7 @@ "end_time": "2023-10-12T15:55:36.170645600Z", "start_time": "2023-10-12T15:55:36.164170900Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -1182,10 +1059,7 @@ "end_time": "2023-10-12T15:55:40.725145300Z", "start_time": "2023-10-12T15:55:36.211259800Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -1232,10 +1106,7 @@ "cell_type": "markdown", "id": "5f659976", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Language identification model quantization\n", @@ -1253,10 +1124,7 @@ "end_time": "2023-10-12T15:55:51.500695900Z", "start_time": "2023-10-12T15:55:40.807758Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -1327,10 +1195,7 @@ "end_time": "2023-10-12T15:55:52.495642700Z", "start_time": "2023-10-12T15:55:51.504925300Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -1352,10 +1217,7 @@ "cell_type": "markdown", "id": "8ea8dc6b", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Speech recognition model quantization\n", @@ -1373,10 +1235,7 @@ "end_time": "2023-10-12T15:56:01.626285500Z", "start_time": "2023-10-12T15:55:52.491524500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -1449,10 +1308,7 @@ "end_time": "2023-10-12T15:56:12.738307100Z", "start_time": "2023-10-12T15:56:01.643402100Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -1482,10 +1338,7 @@ "cell_type": "markdown", "id": "3d7702bf", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Compare model size, performance and accuracy\n", @@ -1503,10 +1356,7 @@ "end_time": "2023-10-12T15:56:12.738307100Z", "start_time": "2023-10-12T15:56:12.738307100Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { @@ -1543,10 +1393,7 @@ "cell_type": "markdown", "id": "35db21f7", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "Secondly we compare accuracy values of the original and quantized models on a test split of MLS dataset. We rely on the Word Error Rate (WER) metric and compute accuracy as `(1 - WER)`.\n", @@ -1564,9 +1411,6 @@ "start_time": "2023-10-12T15:56:12.740287600Z" }, "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "test_replace": { "TEST_DATASET_SIZE = 20": "TEST_DATASET_SIZE = 1" } @@ -1665,10 +1509,7 @@ "cell_type": "markdown", "id": "ba1a229fd290ec31", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Interactive demo with Gradio\n", @@ -1686,10 +1527,7 @@ "end_time": "2023-10-12T16:17:33.520443100Z", "start_time": "2023-10-12T16:13:07.494290500Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [ { diff --git a/notebooks/262-softvc-voice-conversion/262-softvc-voice-conversion.ipynb b/notebooks/262-softvc-voice-conversion/262-softvc-voice-conversion.ipynb index 7f3567db802..fdda2c3777e 100644 --- a/notebooks/262-softvc-voice-conversion/262-softvc-voice-conversion.ipynb +++ b/notebooks/262-softvc-voice-conversion/262-softvc-voice-conversion.ipynb @@ -36,17 +36,14 @@ "execution_count": null, "id": "a1aaa6bb335f4efe", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ "%pip install -q --upgrade pip setuptools\n", "%pip install -q \"openvino>=2023.2.0\"\n", "!git clone https://github.com/svc-develop-team/so-vits-svc -b 4.1-Stable\n", - "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu tqdm librosa \"torch>=2.1.0\" \"torchaudio>=2.1.0\" faiss-cpu gradio \"numpy==1.23.5\" \"fairseq==0.12.2\" praat-parselmouth" + "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu tqdm librosa \"torch>=2.1.0\" \"torchaudio>=2.1.0\" faiss-cpu gradio \"numpy>=1.23.5\" \"fairseq==0.12.2\" praat-parselmouth" ] }, { @@ -62,10 +59,7 @@ "execution_count": null, "id": "5ca855b365c4e0e7", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -116,10 +110,7 @@ "end_time": "2023-10-11T16:23:39.582765900Z", "start_time": "2023-10-11T16:23:39.441185100Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -143,10 +134,7 @@ "end_time": "2023-10-11T16:24:11.594409400Z", "start_time": "2023-10-11T16:23:48.236884800Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -172,10 +160,7 @@ "end_time": "2023-10-11T16:24:34.084985700Z", "start_time": "2023-10-11T16:24:22.823731700Z" }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -205,10 +190,7 @@ "execution_count": null, "id": "a941d393fb0d9f5b", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -248,10 +230,7 @@ "execution_count": null, "id": "259b86a26d06f881", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -303,10 +282,7 @@ "execution_count": null, "id": "b907034e797533dc", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -338,10 +314,7 @@ "execution_count": null, "id": "fb890ffe86cd0a84", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -376,10 +349,7 @@ "execution_count": null, "id": "bea8d8b65b7fb95d", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -492,4 +462,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +}