Merge branch 'fastdraft_sample' of https://github.com/shira-g/openvin…

…o_notebooks into fastdraft_sample
openvinotoolkit · Dec 24, 2024 · 22cd58b · 22cd58b
2 parents ca1ede4 + 4420407
commit 22cd58b
Show file tree

Hide file tree

Showing 73 changed files with 6,042 additions and 3,050 deletions.
diff --git a/.ci/check_notebooks.py b/.ci/check_notebooks.py
@@ -18,6 +18,7 @@
     Path("notebooks/sparsity-optimization/sparsity-optimization.ipynb"),  # cpu expected to be used
     Path("notebooks/s3d-mil-nce-text-to-video-retrieval/s3d-mil-nce-text-to-video-retrieval.ipynb"),  # only cpu
     Path("notebooks/explainable-ai-2-deep-dive/explainable-ai-2-deep-dive.ipynb"),  # device-agnostic
+    Path("notebooks/localai/localai.ipynb"),  # can not change device in docker configuration on the fly
 ]
 
 

diff --git a/.ci/ignore_treon_docker.txt b/.ci/ignore_treon_docker.txt
@@ -81,4 +81,5 @@ notebooks/sam2-image-segmentation/segment-anything-2-image.ipynb
 notebooks/pixtral/pixtral.ipynb
 notebooks/llm-agent-react/llm-agent-react.ipynb
 notebooks/multilora-image-generation/multilora-image-generation.ipynb
-notebooks/llm-agent-react/llm-agent-react-langchain.ipynb
+notebooks/llm-agent-react/llm-agent-react-langchain.ipynb
+notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb
diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml
@@ -468,13 +468,6 @@
   skips:
     - python:
         - '3.9'
-- notebook: notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb
-  skips:
-    - os:
-        - macos-13
-        - ubuntu-20.04
-        - ubuntu-22.04
-        - windows-2019
 - notebook: notebooks/stable-audio/stable-audio.ipynb
   skips:
     - os:
@@ -552,6 +545,8 @@
   skips:
     - os:
         - macos-13
+    - python:
+        - '3.9'
 - notebook: notebooks/mobileclip-video-search/mobileclip-video-search.ipynb
   skips:
     - os:
@@ -562,4 +557,12 @@
         - macos-13
         - ubuntu-20.04
         - ubuntu-22.04
-        - windows-2019
+        - windows-2019
+- notebook: notebooks/glm-edge-v/glm-edge-v.ipynb
+  skips:
+    - os:
+        - macos-13
+- notebook: notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb
+  skips:
+    - os:
+        - macos-13
diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt
@@ -139,6 +139,7 @@ convolutional
 coreference
 CoSENT
 cpm
+cpp
 CPUs
 cpu
 CRNN
@@ -435,6 +436,7 @@ LangChain
 langchain
 Lasinger
 latents
+LocalAI
 lookahead
 LCMs
 LCMScheduler
@@ -500,6 +502,7 @@ microservices
 MiDaS
 MidasNet
 Midjourney
+minicpm
 MiniCPM
 MiniLM
 mistralai
@@ -610,6 +613,7 @@ OpenVINO
 openvino
 OpenVino
 OpenVINO's
+OpenVINOMultiModal
 openvoice
 OpenVoice
 OpenVoiceBaseClass
@@ -690,6 +694,7 @@ precomputed
 preconverted
 prefetching
 preformatted
+prem
 PrePostProcessing
 prepostprocessing
 prepostprocessor
@@ -992,6 +997,7 @@ VITS
 vitt
 VL
 vl
+VLM
 VLModel
 VM
 Vladlen
@@ -1031,12 +1037,14 @@ xt
 xvector
 xxl
 XYWH
+YAML
 Yiqin
 YOLO
 YOLOv
 yolov
 Youri
 youri
+YouTube
 Zafrir
 ZavyChromaXL
 Zongyuan

diff --git a/check_install.py b/check_install.py
@@ -174,10 +174,6 @@ def kernel_check():
             "See https://github.com/openvinotoolkit/openvino_notebooks/wiki/Windows"
         )
 
-if not DEVTOOLS_INSTALLED:
-    print()
-    print("OpenVINO development tools are not installed in this Python environment. \n" "Please follow the instructions in the README to install `openvino`\n")
-
 if not NO_BROKEN_REQUIREMENTS:
     print()
     print("`pip check` shows broken requirements:")

diff --git a/notebooks/README.md b/notebooks/README.md
@@ -50,6 +50,7 @@
 - [Structure Extraction with NuExtract and OpenVINO](./nuextract-structure-extraction/nuextract-structure-extraction.ipynb)
 - [Visual-language assistant with nanoLLaVA and OpenVINO](./nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb)
 - [Controllable Music Generation with MusicGen and OpenVINO](./music-generation/music-generation.ipynb)
+- [Multimodal RAG for video analytics with LlamaIndex](./multimodal-rag/multimodal-rag-llamaindex.ipynb)
 - [Multi LoRA Image Generation](./multilora-image-generation/multilora-image-generation.ipynb)
 - [Visual Content Search using MobileCLIP and OpenVINO](./mobileclip-video-search/mobileclip-video-search.ipynb)
 - [Visual-language assistant with Llama-3.2-11B-Vision and OpenVINO](./mllama-3.2/mllama-3.2.ipynb)
@@ -76,6 +77,7 @@
 - [InstantID: Zero-shot Identity-Preserving Generation using OpenVINO](./instant-id/instant-id.ipynb)
 - [Image generation with HunyuanDIT and OpenVINO](./hunyuan-dit-image-generation/hunyuan-dit-image-generation.ipynb)
 - [Object detection and masking from prompts with GroundedSAM (GroundingDINO + SAM) and OpenVINO](./grounded-segment-anything/grounded-segment-anything.ipynb)
+- [Visual-language assistant with GLM-Edge-V and OpenVINO](./glm-edge-v/glm-edge-v.ipynb)
 - [Image generation with Flux.1 and OpenVINO](./flux.1-image-generation/flux.1-image-generation.ipynb)
 - [Florence-2: Open Source Vision Foundation Model](./florence2/florence2.ipynb)
 - [Frame interpolation using FILM and OpenVINO](./film-slowmo/film-slowmo.ipynb)
@@ -100,6 +102,7 @@
 - [OpenVINO Tokenizers: Incorporate Text Processing Into OpenVINO Pipelines](./openvino-tokenizers/openvino-tokenizers.ipynb)
 - [Convert models from ModelScope to OpenVINO](./modelscope-to-openvino/modelscope-to-openvino.ipynb)
 - [Hello Model Server](./model-server/model-server.ipynb)
+- [LocalAI and OpenVINO](./localai/localai.ipynb)
 - [Quantize NLP models with Post-Training Quantization in NNCF](./language-quantize-bert/language-quantize-bert.ipynb)
 - [Convert a JAX Model to OpenVINO™ IR](./jax-to-openvino/jax-classification-to-openvino.ipynb)
 - [Quantization of Image Classification Models](./image-classification-quantization/image-classification-quantization.ipynb)
@@ -229,6 +232,7 @@
 - [Visual-language assistant with nanoLLaVA and OpenVINO](./nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb)
 - [Named entity recognition with OpenVINO™](./named-entity-recognition/named-entity-recognition.ipynb)
 - [Controllable Music Generation with MusicGen and OpenVINO](./music-generation/music-generation.ipynb)
+- [Multimodal RAG for video analytics with LlamaIndex](./multimodal-rag/multimodal-rag-llamaindex.ipynb)
 - [Multi LoRA Image Generation](./multilora-image-generation/multilora-image-generation.ipynb)
 - [Visual Content Search using MobileCLIP and OpenVINO](./mobileclip-video-search/mobileclip-video-search.ipynb)
 - [MMS: Scaling Speech Technology to 1000+ languages with OpenVINO™](./mms-massively-multilingual-speech/mms-massively-multilingual-speech.ipynb)
@@ -260,6 +264,7 @@
 - [Handwritten Chinese and Japanese OCR with OpenVINO™](./handwritten-ocr/handwritten-ocr.ipynb)
 - [Object detection and masking from prompts with GroundedSAM (GroundingDINO + SAM) and OpenVINO](./grounded-segment-anything/grounded-segment-anything.ipynb)
 - [Grammatical Error Correction with OpenVINO](./grammar-correction/grammar-correction.ipynb)
+- [Visual-language assistant with GLM-Edge-V and OpenVINO](./glm-edge-v/glm-edge-v.ipynb)
 - [High-Quality Text-Free One-Shot Voice Conversion with FreeVC and OpenVINO™](./freevc-voice-conversion/freevc-voice-conversion.ipynb)
 - [Image generation with Flux.1 and OpenVINO](./flux.1-image-generation/flux.1-image-generation.ipynb)
 - [Florence-2: Open Source Vision Foundation Model](./florence2/florence2.ipynb)

diff --git a/notebooks/blip-visual-language-processing/blip-visual-language-processing.ipynb b/notebooks/blip-visual-language-processing/blip-visual-language-processing.ipynb
@@ -820,6 +820,8 @@
    "source": [
     "%%skip not $to_quantize.value\n",
     "\n",
+    "import aiohttp\n",
+    "\n",
     "import numpy as np\n",
     "from datasets import load_dataset\n",
     "from tqdm.notebook import tqdm\n",
@@ -881,7 +883,13 @@
     "    Prepares a vision-text dataset for quantization.\n",
     "    \"\"\"\n",
     "    split = f\"train[:{opt_init_steps}]\" if not streaming else \"train\"\n",
-    "    dataset = load_dataset(\"HuggingFaceM4/VQAv2\", split=split, streaming=streaming, trust_remote_code=True)\n",
+    "    dataset = load_dataset(\n",
+    "        \"HuggingFaceM4/VQAv2\", \n",
+    "        split=split, \n",
+    "        streaming=streaming, \n",
+    "        trust_remote_code=True,\n",
+    "        storage_options={'client_kwargs': {'timeout': aiohttp.ClientTimeout(total=3600)}}\n",
+    "    )\n",
     "    dataset = dataset.shuffle(seed=42)\n",
     "    if streaming:\n",
     "        dataset = dataset.take(opt_init_steps)\n",

diff --git a/notebooks/catvton/gradio_helper.py b/notebooks/catvton/gradio_helper.py
@@ -29,7 +29,7 @@ def image_grid(imgs, rows, cols):
 
 def make_demo(pipeline, mask_processor, automasker, output_dir):
     def submit_function(person_image, cloth_image, cloth_type, num_inference_steps, guidance_scale, seed, show_type):
-        width = 1024
+        width = 768
         height = 1024
         person_image, mask = person_image["background"], person_image["layers"][0]
         mask = Image.open(mask).convert("L")

diff --git a/notebooks/catvton/ov_catvton_helper.py b/notebooks/catvton/ov_catvton_helper.py
@@ -96,9 +96,9 @@ def convert_pipeline_models(pipeline):
     convert(VaeDecoder(pipeline.vae), VAE_DECODER_PATH, torch.zeros(1, 4, 128, 96))
     del pipeline.vae
 
-    inpainting_latent_model_input = torch.zeros(2, 9, 256, 96)
+    inpainting_latent_model_input = torch.rand(2, 9, 256, 96)
     timestep = torch.tensor(0)
-    encoder_hidden_states = torch.zeros(2, 1, 768)
+    encoder_hidden_states = torch.Tensor(0)
     example_input = (inpainting_latent_model_input, timestep, encoder_hidden_states)
 
     convert(UNetWrapper(pipeline.unet), UNET_PATH, example_input)
@@ -233,13 +233,12 @@ def get_compiled_automasker(automasker, core, device, densepose_processor_path,
     return automasker
 
 
-def get_pipeline_selection_option(optimized_pipe=None):
+def get_pipeline_selection_option(is_optimized_pipe_available=False):
     import ipywidgets as widgets
 
-    model_available = optimized_pipe is not None
     use_quantized_models = widgets.Checkbox(
-        value=model_available,
+        value=is_optimized_pipe_available,
         description="Use quantized models",
-        disabled=not model_available,
+        disabled=not is_optimized_pipe_available,
     )
     return use_quantized_models
diff --git a/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb b/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb
@@ -136,12 +136,20 @@
     "    \"diffusers>=0.14.0\",\n",
     "    \"matplotlib>=3.4\",\n",
     "    \"transformers>=4.30.2\",\n",
-    "    \"controlnet-aux>=0.0.6\",\n",
+    "    \"einops\",\n",
+    "    \"timm\",\n",
     "    \"gradio>=3.36\",\n",
+    "    \"datasets>=2.14.6\",\n",
+    "    \"nncf>=2.7.0\",\n",
+    "    \"opencv-python\",\n",
+    "    \"scipy\",\n",
+    "    \"filelock\",\n",
+    "    \"scikit-image\",\n",
     "    \"--extra-index-url\",\n",
     "    \"https://download.pytorch.org/whl/cpu\",\n",
     ")\n",
-    "pip_install(\"openvino>=2023.1.0\", \"datasets>=2.14.6\", \"nncf>=2.7.0\", \"opencv-python\")"
+    "pip_install(\"--no-deps\", \"contorlnet-aux>=0.0.6\")\n",
+    "pip_install(\"openvino>=2023.1.0\")"
    ]
   },
   {
@@ -245,7 +253,7 @@
     "\n",
     "image_path = Path(\"example_image.jpg\")\n",
     "if not image_path.exists():\n",
-    "    download_file(image_path, filename=\"example_image.jpg\")\n",
+    "    download_file(example_url, filename=\"example_image.jpg\")\n",
     "\n",
     "img = Image.open(image_path)\n",
     "pose = pose_estimator(img)\n",

diff --git a/notebooks/depth-anything/depth-anything-v2.ipynb b/notebooks/depth-anything/depth-anything-v2.ipynb
@@ -197,11 +197,10 @@
     "from notebook_utils import download_file, device_widget, quantization_widget\n",
     "\n",
     "\n",
-    "if not Path(\"furseal.png\").exists():\n",
-    "    download_file(\n",
-    "        \"https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb\",\n",
-    "        \"furseal.png\",\n",
-    "    )\n",
+    "download_file(\n",
+    "    \"https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb\",\n",
+    "    \"furseal.png\",\n",
+    ")\n",
     "\n",
     "Image.open(\"furseal.png\").resize((600, 400))"
    ]
@@ -547,11 +546,10 @@
    "source": [
     "VIDEO_FILE = \"./Coco Walking in Berkeley.mp4\"\n",
     "\n",
-    "if not Path(VIDEO_FILE).exists():\n",
-    "    download_file(\n",
-    "        \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4\",\n",
-    "        VIDEO_FILE,\n",
-    "    )\n",
+    "download_file(\n",
+    "    \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4\",\n",
+    "    VIDEO_FILE,\n",
+    ")\n",
     "\n",
     "# Number of seconds of input video to process. Set `NUM_SECONDS` to 0 to process\n",
     "# the full video.\n",

diff --git a/notebooks/dynamicrafter-animating-images/dynamicrafter-animating-images.ipynb b/notebooks/dynamicrafter-animating-images/dynamicrafter-animating-images.ipynb
@@ -200,7 +200,7 @@
     "        hf_hub_download(repo_id=REPO_ID, filename=\"model.ckpt\", local_dir=\"./checkpoints/dynamicrafter_256_v1/\", local_dir_use_symlinks=False)\n",
     "\n",
     "    ckpt_path = \"checkpoints/dynamicrafter_256_v1/model.ckpt\"\n",
-    "    config_file = \"dynamicrafter/configs/inference_256_v1.0.yaml\"\n",
+    "    config_file = \"DynamiCrafter/configs/inference_256_v1.0.yaml\"\n",
     "    config = OmegaConf.load(config_file)\n",
     "    model_config = config.pop(\"model\", OmegaConf.create())\n",
     "    model_config[\"params\"][\"unet_config\"][\"params\"][\"use_checkpoint\"] = False\n",
@@ -875,7 +875,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "e9a0137a",
    "metadata": {},
    "outputs": [
@@ -898,7 +898,7 @@
     }
    ],
    "source": [
-    "image_path = \"dynamicrafter/prompts/256/art.png\"\n",
+    "image_path = \"DynamiCrafter/prompts/256/art.png\"\n",
     "prompt = \"man fishing in a boat at sunset\"\n",
     "seed = 234\n",
     "image = Image.open(image_path)\n",
@@ -1629,7 +1629,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "1e77da42",
    "metadata": {},
    "outputs": [
@@ -1652,7 +1652,7 @@
    "source": [
     "%%skip not $to_quantize.value\n",
     "\n",
-    "image_path = \"dynamicrafter/prompts/256/art.png\"\n",
+    "image_path = \"DynamiCrafter/prompts/256/art.png\"\n",
     "prompt = \"man fishing in a boat at sunset\"\n",
     "seed = 234\n",
     "image = Image.open(image_path)\n",

diff --git a/notebooks/dynamicrafter-animating-images/gradio_helper.py b/notebooks/dynamicrafter-animating-images/gradio_helper.py
@@ -4,11 +4,11 @@
 css = """#input_img {max-width: 256px !important} #output_vid {max-width: 256px; max-height: 256px}"""
 
 i2v_examples_256 = [
-    ["dynamicrafter/prompts/256/art.png", "man fishing in a boat at sunset", 50, 7.5, 1.0, 3, 234],
-    ["dynamicrafter/prompts/256/boy.png", "boy walking on the street", 50, 7.5, 1.0, 3, 125],
-    ["dynamicrafter/prompts/256/dance1.jpeg", "two people dancing", 50, 7.5, 1.0, 3, 116],
-    ["dynamicrafter/prompts/256/fire_and_beach.jpg", "a campfire on the beach and the ocean waves in the background", 50, 7.5, 1.0, 3, 111],
-    ["dynamicrafter/prompts/256/guitar0.jpeg", "bear playing guitar happily, snowing", 50, 7.5, 1.0, 3, 122],
+    ["DynamiCrafter/prompts/256/art.png", "man fishing in a boat at sunset", 50, 7.5, 1.0, 3, 234],
+    ["DynamiCrafter/prompts/256/boy.png", "boy walking on the street", 50, 7.5, 1.0, 3, 125],
+    ["DynamiCrafter/prompts/256/dance1.jpeg", "two people dancing", 50, 7.5, 1.0, 3, 116],
+    ["DynamiCrafter/prompts/256/fire_and_beach.jpg", "a campfire on the beach and the ocean waves in the background", 50, 7.5, 1.0, 3, 111],
+    ["DynamiCrafter/prompts/256/guitar0.jpeg", "bear playing guitar happily, snowing", 50, 7.5, 1.0, 3, 122],
 ]
 
 

diff --git a/notebooks/efficient-sam/efficient-sam.ipynb b/notebooks/efficient-sam/efficient-sam.ipynb
@@ -67,7 +67,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install -q \"openvino>=2023.3.0\" \"nncf>=2.7.0\" opencv-python \"gradio>=4.13\" \"matplotlib>=3.4\" torch torchvision tqdm  --extra-index-url https://download.pytorch.org/whl/cpu"
+    "import platform\n",
+    "\n",
+    "%pip install -q \"openvino>=2024.5.0\" \"nncf>=2.14.0\"\n",
+    "%pip install -q \"torch>=2.2.0\" \"torchaudio>=2.2.0\" \"torchvision>=0.17.0\"  --extra-index-url https://download.pytorch.org/whl/cpu\n",
+    "%pip install -q opencv-python \"gradio>=4.13\" \"matplotlib>=3.4\" tqdm\n",
+    "\n",
+    "if platform.system() == \"Darwin\":\n",
+    "    %pip install -q \"numpy<2.0.0\""
    ]
   },
   {