From 70302d1885d8f38a2fb71b4fd8266919ad2f8038 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Wed, 27 Mar 2024 15:55:48 +0400
Subject: [PATCH 01/19] Update cpp sample with reshaping logic

---
 .../stable_diffusion_1_5/cpp/src/main.cpp     | 95 ++++++++++++++++---
 1 file changed, 84 insertions(+), 11 deletions(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index 38e9877991..79117afee7 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -17,6 +17,9 @@
 #include "lora.hpp"
 #include "imwrite.hpp"
 
+const size_t TOKENIZER_MODEL_MAX_LENGTH = 77; // 'model_max_length' from 'tokenizer_config.json'
+const size_t VAE_SCALE_FACTOR = 8;
+
 class Timer {
     const decltype(std::chrono::steady_clock::now()) m_start;
 public:
@@ -70,13 +73,66 @@ void apply_lora(std::shared_ptr<ov::Model> model, InsertLoRA::LoRAMap& lora_map)
     }
 }
 
-StableDiffusionModels compile_models(const std::string& model_path, const std::string& device,
-                                     const std::string& lora_path, const float alpha, const bool use_cache) {
+void reshape_text_encoder(std::shared_ptr<ov::Model> model, size_t batch_size, size_t tokenizer_model_max_length) {
+    ov::PartialShape input_shape = model->input(0).get_partial_shape();
+    input_shape[0] = batch_size;
+    input_shape[1] = tokenizer_model_max_length;
+    std::map<std::string, ov::PartialShape> name_to_shape{{model->input(0).get_any_name(), input_shape}};
+    model->reshape(name_to_shape);
+}
+
+void reshape_unet_encoder(std::shared_ptr<ov::Model> model, int64_t batch_size, int64_t height, int64_t width, 
+                          int64_t num_images_per_prompt, int64_t tokenizer_model_max_length) {
+    batch_size *= num_images_per_prompt;
+    // The factor of 2 comes from the guidance scale > 1
+    for (auto input : model->inputs()) {
+        if (input.get_any_name().find("timestep_cond") == std::string::npos) {
+            batch_size *= 2;
+            break;
+        }
+    }
+
+    height = height / VAE_SCALE_FACTOR;
+    width = width / VAE_SCALE_FACTOR;
+    
+    std::map<std::string, ov::PartialShape> name_to_shape;
+
+    for (auto input : model->inputs()) {
+        std::string input_name = input.get_any_name();
+        name_to_shape[input_name] = input.get_partial_shape();
+        if (input_name == "timestep") {
+            name_to_shape[input_name][0] = 1;
+        } else if (input_name == "sample") {
+            int64_t in_channels = 4; // 'in_channels' parameter from 'unet/config.json'
+            name_to_shape[input_name] = {batch_size, in_channels, height, width};
+        } else if (input_name == "time_ids") {
+            name_to_shape[input_name][0] = batch_size;
+        } else {
+            name_to_shape[input_name][0] = batch_size;
+            name_to_shape[input_name][1] = TOKENIZER_MODEL_MAX_LENGTH;
+        }
+    }
+
+    model->reshape(name_to_shape);
+}
+
+void reshape_vae_decoder(std::shared_ptr<ov::Model> model, int64_t height, int64_t width) {
+    height = height / VAE_SCALE_FACTOR;
+    width = width / VAE_SCALE_FACTOR;
+    int64_t latent_channels = 4; // 'latent_channels' parameter from 'vae_decoder/config.json'
+    std::map<std::string, ov::PartialShape> name_to_shape{{model->input(0).get_any_name(), {1, latent_channels, height, width}}};
+    model->reshape(name_to_shape);
+}
+
+StableDiffusionModels compile_models(const std::string& model_path, const std::string& device, const std::string& lora_path,
+                                     const float alpha, const bool use_cache, const bool use_dynamic_shapes,
+                                     const size_t batch_size, const size_t height, const size_t width, const size_t num_images) {
     StableDiffusionModels models;
 
     ov::Core core;
     if (use_cache)
         core.set_property(ov::cache_dir("./cache_dir"));
+    
     core.add_extension(TOKENIZERS_LIBRARY_PATH);
 
     // read LoRA weights
@@ -90,6 +146,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     {
         Timer t("Loading and compiling text encoder");
         auto text_encoder_model = core.read_model(model_path + "/text_encoder/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_text_encoder(text_encoder_model, batch_size, TOKENIZER_MODEL_MAX_LENGTH);
+        }
         apply_lora(text_encoder_model, lora_weights["text_encoder"]);
         models.text_encoder = core.compile_model(text_encoder_model, device);
     }
@@ -98,6 +157,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     {
         Timer t("Loading and compiling UNet");
         auto unet_model = core.read_model(model_path + "/unet/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_unet_encoder(unet_model, batch_size, height, width, num_images, TOKENIZER_MODEL_MAX_LENGTH);
+        }
         apply_lora(unet_model, lora_weights["unet"]);
         models.unet = core.compile_model(unet_model, device);
     }
@@ -106,6 +168,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     {
         Timer t("Loading and compiling VAE decoder");
         auto vae_decoder_model = core.read_model(model_path + "/vae_decoder/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_vae_decoder(vae_decoder_model, height, width);
+        }
         ov::preprocess::PrePostProcessor ppp(vae_decoder_model);
         ppp.output().model().set_layout("NCHW");
         ppp.output().tensor().set_layout("NHWC");
@@ -123,10 +188,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
 }
 
 ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, std::string& neg_prompt) {
-    const size_t MAX_LENGTH = 77; // 'model_max_length' from 'tokenizer_config.json'
     const size_t HIDDEN_SIZE = static_cast<size_t>(models.text_encoder.output(0).get_partial_shape()[2].get_length());
     const int32_t EOS_TOKEN_ID = 49407, PAD_TOKEN_ID = EOS_TOKEN_ID;
-    const ov::Shape input_ids_shape({1, MAX_LENGTH});
+    const ov::Shape input_ids_shape({1, TOKENIZER_MODEL_MAX_LENGTH});
 
     ov::InferRequest tokenizer_req = models.tokenizer.create_infer_request();
     ov::InferRequest text_encoder_req = models.text_encoder.create_infer_request();
@@ -147,10 +211,10 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
         text_encoder_req.infer();
     };
 
-    ov::Tensor text_embeddings(ov::element::f32, {2, MAX_LENGTH, HIDDEN_SIZE});
+    ov::Tensor text_embeddings(ov::element::f32, {2, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE});
 
-    compute_text_embeddings(neg_prompt, ov::Tensor(text_embeddings, {0, 0, 0}, {1, MAX_LENGTH, HIDDEN_SIZE}));
-    compute_text_embeddings(pos_prompt, ov::Tensor(text_embeddings, {1, 0, 0}, {2, MAX_LENGTH, HIDDEN_SIZE}));
+    compute_text_embeddings(neg_prompt, ov::Tensor(text_embeddings, {0, 0, 0}, {1, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
+    compute_text_embeddings(pos_prompt, ov::Tensor(text_embeddings, {1, 0, 0}, {2, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
 
     return text_embeddings;
 }
@@ -218,7 +282,8 @@ int32_t main(int32_t argc, char* argv[]) try {
     ("c,useCache", "Use model caching", cxxopts::value<bool>()->default_value("false"))
     ("r,readNPLatent", "Read numpy generated latents from file", cxxopts::value<bool>()->default_value("false"))
     ("m,modelPath", "Specify path of SD model IRs", cxxopts::value<std::string>()->default_value("../models/dreamlike-anime-1.0"))
-    ("t,type", "Specify the type of SD model IRs (e.g., FP16_static or FP16_dyn)", cxxopts::value<std::string>()->default_value("FP16_static"))
+    ("t,type", "Specify the type of SD model IRs (FP32, FP16 or INT8)", cxxopts::value<std::string>()->default_value("FP16"))
+    ("dynamic", "Specify the model input shape to use dynamic shape", cxxopts::value<bool>()->default_value("false"))
     ("l,loraPath", "Specify path of LoRA file. (*.safetensors).", cxxopts::value<std::string>()->default_value(""))
     ("a,alpha", "alpha for LoRA", cxxopts::value<float>()->default_value("0.75"))("h,help", "Print usage");
     cxxopts::ParseResult result;
@@ -248,6 +313,7 @@ int32_t main(int32_t argc, char* argv[]) try {
     const bool read_np_latent = result["readNPLatent"].as<bool>();
     const std::string model_base_path = result["modelPath"].as<std::string>();
     const std::string model_type = result["type"].as<std::string>();
+    const bool use_dynamic_shapes = result["dynamic"].as<bool>();
     const std::string lora_path = result["loraPath"].as<std::string>();
     const float alpha = result["alpha"].as<float>();
 
@@ -263,13 +329,20 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     std::cout << "OpenVINO version: " << ov::get_openvino_version() << std::endl;
 
-    // Stable Diffusion pipeline
+    const std::string model_path = model_base_path + "/" + model_type;
+    if (!std::filesystem::exists(model_path)) {
+        std::cerr << "Model IRs for type " << model_type << " don't exist in directory " << model_path << "\n";
+        std::cerr << "Refer to README.md to know how to export OpenVINO model with particular data type." << std::endl;
+        return EXIT_FAILURE;
+    }
 
-    StableDiffusionModels models = compile_models(model_base_path + "/" + model_type, device, lora_path, alpha, use_cache);
+    // Stable Diffusion pipeline
+    const size_t batch_size = 1;
+    StableDiffusionModels models = compile_models(model_path, device, lora_path, alpha, use_cache, use_dynamic_shapes, batch_size, height, width, num_images);
     ov::InferRequest unet_infer_request = models.unet.create_infer_request();
 
     ov::PartialShape sample_shape = models.unet.input("sample").get_partial_shape();
-    OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == width && sample_shape[3] * 8 == height),
+    OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == height && sample_shape[3] * 8 == width),
         "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
 
     Timer t("Running Stable Diffusion pipeline");

From 6b87c4ecf8063e87ce3d63df621630d15a691c45 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Wed, 27 Mar 2024 15:56:00 +0400
Subject: [PATCH 02/19] Add local gitignore

---
 image_generation/stable_diffusion_1_5/cpp/.gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 image_generation/stable_diffusion_1_5/cpp/.gitignore

diff --git a/image_generation/stable_diffusion_1_5/cpp/.gitignore b/image_generation/stable_diffusion_1_5/cpp/.gitignore
new file mode 100644
index 0000000000..ffdf359cbe
--- /dev/null
+++ b/image_generation/stable_diffusion_1_5/cpp/.gitignore
@@ -0,0 +1,3 @@
+build
+images
+models

From 53d30da8eae1b74f28b3f9fe1307c033137f56a4 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 29 Mar 2024 16:01:57 +0400
Subject: [PATCH 03/19] Change default model path

---
 image_generation/stable_diffusion_1_5/cpp/src/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index 79117afee7..53f7cb925c 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -281,7 +281,7 @@ int32_t main(int32_t argc, char* argv[]) try {
     ("width", "Destination image width", cxxopts::value<size_t>()->default_value("512"))
     ("c,useCache", "Use model caching", cxxopts::value<bool>()->default_value("false"))
     ("r,readNPLatent", "Read numpy generated latents from file", cxxopts::value<bool>()->default_value("false"))
-    ("m,modelPath", "Specify path of SD model IRs", cxxopts::value<std::string>()->default_value("../models/dreamlike-anime-1.0"))
+    ("m,modelPath", "Specify path of SD model IRs", cxxopts::value<std::string>()->default_value("../models/dreamlike_anime_1_0_ov"))
     ("t,type", "Specify the type of SD model IRs (FP32, FP16 or INT8)", cxxopts::value<std::string>()->default_value("FP16"))
     ("dynamic", "Specify the model input shape to use dynamic shape", cxxopts::value<bool>()->default_value("false"))
     ("l,loraPath", "Specify path of LoRA file. (*.safetensors).", cxxopts::value<std::string>()->default_value(""))

From 57b3253d6e6a3a297c2dcac314a9053d54376f12 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 29 Mar 2024 16:02:14 +0400
Subject: [PATCH 04/19] Update readme

---
 .../stable_diffusion_1_5/cpp/README.md        | 68 ++++++++++---------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index a0cd96d911..1c467f2ff1 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -6,6 +6,10 @@ The pure C++ text-to-image pipeline, driven by the OpenVINO native C++ API for S
 
 ## Step 1: Prepare build environment
 
+Prerequisites:
+- Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html))
+
+
 C++ Packages:
 * [CMake](https://cmake.org/download/): Cross-platform build tool
 * [OpenVINO](https://docs.openvino.ai/install): Model inference
@@ -14,7 +18,7 @@ Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
-conda install openvino c-compiler cxx-compiler make
+conda install -c conda-forge  openvino c-compiler cxx-compiler make cmake
 ```
 
 ## Step 2: Convert Stable Diffusion v1.5 and Tokenizer models
@@ -22,33 +26,32 @@ conda install openvino c-compiler cxx-compiler make
 ### Stable Diffusion v1.5 model:
 
 1. Install dependencies to import models from HuggingFace:
-```shell
-conda activate openvino_sd_cpp
-python -m pip install -r scripts/requirements.txt
-python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
-```
-2. Download a huggingface SD v1.5 model like:
-- [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
-- [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.
-
-
-Example command:
-```shell
-huggingface-cli download --resume-download --local-dir-use-symlinks False dreamlike-art/dreamlike-anime-1.0 --local-dir models/dreamlike-anime-1.0
-```
-
-Please, refer to the official website for [model downloading](https://huggingface.co/docs/hub/models-downloading) to read more details.
-
-3. Run model conversion script to convert PyTorch model to OpenVINO IR via [optimum-intel](https://github.com/huggingface/optimum-intel). Please, use the script `scripts/convert_model.py` to convert the model into `FP16_static` or `FP16_dyn`, which will be saved into the `models` folder:
-```shell
-cd scripts
-python convert_model.py -b 1 -t FP16 -sd ../models/dreamlike-anime-1.0 # to convert to models with static shapes
-python convert_model.py -b 1 -t FP16 -sd ../models/dreamlike-anime-1.0 -dyn True # to keep models with dynamic shapes
-python convert_model.py -b 1 -t INT8 -sd ../models/dreamlike-anime-1.0 -dyn True # to compress the models to INT8
-```
+   ```shell
+   conda activate openvino_sd_cpp
+   python -m pip install -r scripts/requirements.txt
+   python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+   ```
+   
+2. Download and export to OpenVINO format a huggingface SD v1.5 model like:
+   - [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
+   - [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.
+
+   Example command for downloading and exporting FP16 model:
+   ```shell
+   export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
+   export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
+   # Using optimum-cli for exporting model to OpenVINO format
+   optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
+   # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.")
+   convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+   ```
+
+   You can also choose other precision and export FP32 or INT8 model.
+
+   Please, refer to the official website for [🤗 Optimum](https://huggingface.co/docs/optimum/main/en/index) and [optimum-intel](https://github.com/huggingface/optimum-intel) to read more details.
 
 > [!NOTE]
->Now the pipeline support batch size = 1 only, i.e. static model `(1, 3, 512, 512)`
+> Now the pipeline support batch size = 1 only, i.e. static model `(1, 3, 512, 512)`
 
 ### LoRA enabling with safetensors
 
@@ -69,7 +72,7 @@ cmake --build build --parallel
 
 ## Step 4: Run Pipeline
 ```shell
-./stable_diffusion [-p <posPrompt>] [-n <negPrompt>] [-s <seed>] [--height <output image>] [--width <output image>] [-d <device>] [-r <readNPLatent>] [-l <lora.safetensors>] [-a <alpha>] [-h <help>] [-m <modelPath>] [-t <modelType>]
+./build/stable_diffusion [-p <posPrompt>] [-n <negPrompt>] [-s <seed>] [--height <output image>] [--width <output image>] [-d <device>] [-r <readNPLatent>] [-l <lora.safetensors>] [-a <alpha>] [-h <help>] [-m <modelPath>] [-t <modelType>] [--dynamic]
 
 Usage:
   stable_diffusion [OPTION...]
@@ -85,8 +88,9 @@ Usage:
 * `--width arg`         Width of output image (default: 512)
 * `-c, --useCache`      Use model caching
 * `-r, --readNPLatent`  Read numpy generated latents from file
-* `-m, --modelPath arg` Specify path of SD model IR (default: ../models/dreamlike-anime-1.0)
-* `-t, --type arg`      Specify the type of SD model IR (FP16_static or FP16_dyn) (default: FP16_static)
+* `-m, --modelPath arg` Specify path of SD model IR (default: ../models/dreamlike_anime_1_0_ov)
+* `-t, --type arg`      Specify the type of SD model IRs (FP32, FP16 or INT8) (default: FP16)
+* `--dynamic`           Specify the model input shape to use dynamic shape
 * `-l, --loraPath arg`  Specify path of lora file. (*.safetensors). (default: )
 * `-a, --alpha arg`     alpha for lora (default: 0.75)
 * `-h, --help`          Print usage
@@ -102,15 +106,15 @@ Negative prompt: (empty, here couldn't use OV tokenizer, check the issues for de
 
 Read the numpy latent instead of C++ std lib for the alignment with Python pipeline
 
-* Generate image without lora `./stable_diffusion -r`
+* Generate image without lora `./build/stable_diffusion -r`
 
    ![](./without_lora.bmp)
 
-* Generate image with soulcard lora `./stable_diffusion -r`
+* Generate image with soulcard lora `./build/stable_diffusion -r`
 
    ![](./soulcard_lora.bmp)
 
-* Generate different size image with dynamic model (C++ lib generated latent): `./stable_diffusion -m ../models/dreamlike-anime-1.0 -t FP16_dyn --height 448 --width 704`
+* Generate different size image with dynamic model (C++ lib generated latent): `./build/stable_diffusion -m ../models/dreamlike_anime_1_0_ov -t FP16 --dynamic --height 448 --width 704`
 
    ![](./704x448.bmp)
 

From 01734738cb1f2595e6ed1aa9a415ded80e0fede7 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 29 Mar 2024 16:12:28 +0400
Subject: [PATCH 05/19] Remove convert_model.py, move requirements and
 np_latents to root

---
 .../stable_diffusion_1_5/cpp/README.md        |  2 +-
 .../cpp/{scripts => }/np_latents_512x512.txt  |  0
 .../cpp/{scripts => }/requirements.txt        |  0
 .../cpp/scripts/convert_model.py              | 46 -------------------
 .../stable_diffusion_1_5/cpp/src/main.cpp     |  2 +-
 5 files changed, 2 insertions(+), 48 deletions(-)
 rename image_generation/stable_diffusion_1_5/cpp/{scripts => }/np_latents_512x512.txt (100%)
 rename image_generation/stable_diffusion_1_5/cpp/{scripts => }/requirements.txt (100%)
 delete mode 100644 image_generation/stable_diffusion_1_5/cpp/scripts/convert_model.py

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 1c467f2ff1..c76ee4d90d 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -28,7 +28,7 @@ conda install -c conda-forge  openvino c-compiler cxx-compiler make cmake
 1. Install dependencies to import models from HuggingFace:
    ```shell
    conda activate openvino_sd_cpp
-   python -m pip install -r scripts/requirements.txt
+   python -m pip install -r requirements.txt
    python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
    ```
    
diff --git a/image_generation/stable_diffusion_1_5/cpp/scripts/np_latents_512x512.txt b/image_generation/stable_diffusion_1_5/cpp/np_latents_512x512.txt
similarity index 100%
rename from image_generation/stable_diffusion_1_5/cpp/scripts/np_latents_512x512.txt
rename to image_generation/stable_diffusion_1_5/cpp/np_latents_512x512.txt
diff --git a/image_generation/stable_diffusion_1_5/cpp/scripts/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt
similarity index 100%
rename from image_generation/stable_diffusion_1_5/cpp/scripts/requirements.txt
rename to image_generation/stable_diffusion_1_5/cpp/requirements.txt
diff --git a/image_generation/stable_diffusion_1_5/cpp/scripts/convert_model.py b/image_generation/stable_diffusion_1_5/cpp/scripts/convert_model.py
deleted file mode 100644
index b442dc54fe..0000000000
--- a/image_generation/stable_diffusion_1_5/cpp/scripts/convert_model.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from pathlib import Path
-import argparse
-from optimum.intel.openvino import OVStableDiffusionPipeline
-from openvino import Type, save_model
-from transformers import AutoTokenizer
-from openvino_tokenizers import convert_tokenizer
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse and return command line arguments."""
-    parser = argparse.ArgumentParser(add_help=False)
-    args = parser.add_argument_group('Options')
-    args.add_argument('-h', '--help', action = 'help',
-                      help='Show this help message and exit.')
-    args.add_argument('-b', '--batch', type = int, default = 1, required = True,
-                      help='Required. batch_size for solving single/multiple prompt->image generation.')
-    args.add_argument('-t', '--type', type = str, default = "FP32", required = True,
-                      help='Required. data type, FP32, FP16, and compressed type INT8.')
-    args.add_argument('-dyn', '--dynamic', type = bool, default = False, required = False,
-                      help='Specify the model input shape to use dynamic shape.')
-    args.add_argument('-sd','--sd_weights', type = str, default="", required = True,
-                      help='Specify the path of stable diffusion model')
-    return parser.parse_args()
-
-args = parse_args()
-
-load_in_8bit = True if args.type == "INT8" else False
-output_path = Path(args.sd_weights) / (args.type + ("_dyn" if args.dynamic else "_static"))
-
-# convert SD models to IR
-
-model = OVStableDiffusionPipeline.from_pretrained(args.sd_weights, trust_remote_code=True, export=True, compile=False, load_in_8bit=load_in_8bit)
-if args.type == "FP16":
-    model.half()
-if not args.dynamic:
-    model.reshape(args.batch, 512, 512, 1)
-
-model.save_pretrained(output_path)
-
-# convert tokenizer
-
-tokenizer_path = output_path / "tokenizer"
-hf_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-ov_tokenizer_encoder = convert_tokenizer(hf_tokenizer, tokenizer_output_type=Type.i32)
-
-save_model(ov_tokenizer_encoder, tokenizer_path / "openvino_tokenizer.xml", compress_to_fp16=False)
diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index 53f7cb925c..92f97be8ff 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -38,7 +38,7 @@ ov::Tensor randn_tensor(uint32_t height, uint32_t width, bool use_np_latents, ui
     ov::Tensor noise(ov::element::f32, {1, 4, height / 8, width / 8});
     if (use_np_latents) {
         // read np generated latents with defaut seed 42
-        const char * latent_file_name = "../scripts/np_latents_512x512.txt";
+        const char * latent_file_name = "../np_latents_512x512.txt";
         std::ifstream latent_copy_file(latent_file_name, std::ios::ate);
         OPENVINO_ASSERT(latent_copy_file.is_open(), "Cannot open ", latent_file_name);
 

From e3cec2b8ea88acd24242c679b07af3cc5191f4b4 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 29 Mar 2024 16:43:49 +0400
Subject: [PATCH 06/19] Format main.cpp with clang format

---
 .../stable_diffusion_1_5/cpp/src/main.cpp     | 161 ++++++++++++------
 1 file changed, 108 insertions(+), 53 deletions(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index 92f97be8ff..06c6024286 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -2,29 +2,28 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include <algorithm>
+#include <filesystem>
+#include <fstream>
 #include <iostream>
-#include <string>
 #include <random>
-#include <fstream>
-#include <filesystem>
-
-#include "openvino/runtime/core.hpp"
-#include "openvino/pass/manager.hpp"
-#include "openvino/core/preprocess/pre_post_process.hpp"
+#include <string>
 
 #include "cxxopts.hpp"
-#include "scheduler_lms_discrete.hpp"
-#include "lora.hpp"
 #include "imwrite.hpp"
+#include "lora.hpp"
+#include "openvino/core/preprocess/pre_post_process.hpp"
+#include "openvino/pass/manager.hpp"
+#include "openvino/runtime/core.hpp"
+#include "scheduler_lms_discrete.hpp"
 
-const size_t TOKENIZER_MODEL_MAX_LENGTH = 77; // 'model_max_length' from 'tokenizer_config.json'
+const size_t TOKENIZER_MODEL_MAX_LENGTH = 77;  // 'model_max_length' from 'tokenizer_config.json'
 const size_t VAE_SCALE_FACTOR = 8;
 
 class Timer {
     const decltype(std::chrono::steady_clock::now()) m_start;
+
 public:
-    Timer(const std::string& scope) :
-        m_start(std::chrono::steady_clock::now()) {
+    Timer(const std::string& scope) : m_start(std::chrono::steady_clock::now()) {
         (std::cout << scope << ": ").flush();
     }
 
@@ -38,12 +37,17 @@ ov::Tensor randn_tensor(uint32_t height, uint32_t width, bool use_np_latents, ui
     ov::Tensor noise(ov::element::f32, {1, 4, height / 8, width / 8});
     if (use_np_latents) {
         // read np generated latents with defaut seed 42
-        const char * latent_file_name = "../np_latents_512x512.txt";
+        const char* latent_file_name = "../np_latents_512x512.txt";
         std::ifstream latent_copy_file(latent_file_name, std::ios::ate);
         OPENVINO_ASSERT(latent_copy_file.is_open(), "Cannot open ", latent_file_name);
 
         size_t file_size = latent_copy_file.tellg() / sizeof(float);
-        OPENVINO_ASSERT(file_size >= noise.get_size(), "Cannot generate ", noise.get_shape(), " with ", latent_file_name, ". File size is small");
+        OPENVINO_ASSERT(file_size >= noise.get_size(),
+                        "Cannot generate ",
+                        noise.get_shape(),
+                        " with ",
+                        latent_file_name,
+                        ". File size is small");
 
         latent_copy_file.seekg(0, std::ios::beg);
         for (size_t i = 0; i < noise.get_size(); ++i)
@@ -81,8 +85,12 @@ void reshape_text_encoder(std::shared_ptr<ov::Model> model, size_t batch_size, s
     model->reshape(name_to_shape);
 }
 
-void reshape_unet_encoder(std::shared_ptr<ov::Model> model, int64_t batch_size, int64_t height, int64_t width, 
-                          int64_t num_images_per_prompt, int64_t tokenizer_model_max_length) {
+void reshape_unet_encoder(std::shared_ptr<ov::Model> model,
+                          int64_t batch_size,
+                          int64_t height,
+                          int64_t width,
+                          int64_t num_images_per_prompt,
+                          int64_t tokenizer_model_max_length) {
     batch_size *= num_images_per_prompt;
     // The factor of 2 comes from the guidance scale > 1
     for (auto input : model->inputs()) {
@@ -94,7 +102,7 @@ void reshape_unet_encoder(std::shared_ptr<ov::Model> model, int64_t batch_size,
 
     height = height / VAE_SCALE_FACTOR;
     width = width / VAE_SCALE_FACTOR;
-    
+
     std::map<std::string, ov::PartialShape> name_to_shape;
 
     for (auto input : model->inputs()) {
@@ -103,7 +111,7 @@ void reshape_unet_encoder(std::shared_ptr<ov::Model> model, int64_t batch_size,
         if (input_name == "timestep") {
             name_to_shape[input_name][0] = 1;
         } else if (input_name == "sample") {
-            int64_t in_channels = 4; // 'in_channels' parameter from 'unet/config.json'
+            int64_t in_channels = 4;  // 'in_channels' parameter from 'unet/config.json'
             name_to_shape[input_name] = {batch_size, in_channels, height, width};
         } else if (input_name == "time_ids") {
             name_to_shape[input_name][0] = batch_size;
@@ -119,20 +127,28 @@ void reshape_unet_encoder(std::shared_ptr<ov::Model> model, int64_t batch_size,
 void reshape_vae_decoder(std::shared_ptr<ov::Model> model, int64_t height, int64_t width) {
     height = height / VAE_SCALE_FACTOR;
     width = width / VAE_SCALE_FACTOR;
-    int64_t latent_channels = 4; // 'latent_channels' parameter from 'vae_decoder/config.json'
-    std::map<std::string, ov::PartialShape> name_to_shape{{model->input(0).get_any_name(), {1, latent_channels, height, width}}};
+    int64_t latent_channels = 4;  // 'latent_channels' parameter from 'vae_decoder/config.json'
+    std::map<std::string, ov::PartialShape> name_to_shape{
+        {model->input(0).get_any_name(), {1, latent_channels, height, width}}};
     model->reshape(name_to_shape);
 }
 
-StableDiffusionModels compile_models(const std::string& model_path, const std::string& device, const std::string& lora_path,
-                                     const float alpha, const bool use_cache, const bool use_dynamic_shapes,
-                                     const size_t batch_size, const size_t height, const size_t width, const size_t num_images) {
+StableDiffusionModels compile_models(const std::string& model_path,
+                                     const std::string& device,
+                                     const std::string& lora_path,
+                                     const float alpha,
+                                     const bool use_cache,
+                                     const bool use_dynamic_shapes,
+                                     const size_t batch_size,
+                                     const size_t height,
+                                     const size_t width,
+                                     const size_t num_images) {
     StableDiffusionModels models;
 
     ov::Core core;
     if (use_cache)
         core.set_property(ov::cache_dir("./cache_dir"));
-    
+
     core.add_extension(TOKENIZERS_LIBRARY_PATH);
 
     // read LoRA weights
@@ -195,7 +211,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
     ov::InferRequest tokenizer_req = models.tokenizer.create_infer_request();
     ov::InferRequest text_encoder_req = models.text_encoder.create_infer_request();
 
-    auto compute_text_embeddings = [&] (std::string& prompt, ov::Tensor encoder_output_tensor) {
+    auto compute_text_embeddings = [&](std::string& prompt, ov::Tensor encoder_output_tensor) {
         ov::Tensor input_ids(ov::element::i32, input_ids_shape);
         std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), PAD_TOKEN_ID);
 
@@ -213,8 +229,10 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
 
     ov::Tensor text_embeddings(ov::element::f32, {2, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE});
 
-    compute_text_embeddings(neg_prompt, ov::Tensor(text_embeddings, {0, 0, 0}, {1, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
-    compute_text_embeddings(pos_prompt, ov::Tensor(text_embeddings, {1, 0, 0}, {2, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
+    compute_text_embeddings(neg_prompt,
+                            ov::Tensor(text_embeddings, {0, 0, 0}, {1, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
+    compute_text_embeddings(pos_prompt,
+                            ov::Tensor(text_embeddings, {1, 0, 0}, {2, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}));
 
     return text_embeddings;
 }
@@ -237,7 +255,8 @@ ov::Tensor unet(ov::InferRequest req, ov::Tensor sample, ov::Tensor timestep, ov
 
     ov::Tensor noisy_residual(noise_pred_tensor.get_element_type(), noise_pred_shape);
     for (size_t i = 0; i < ov::shape_size(noise_pred_shape); ++i)
-        noisy_residual.data<float>()[i] = noise_pred_uncond[i] + guidance_scale * (noise_pred_text[i] - noise_pred_uncond[i]);
+        noisy_residual.data<float>()[i] =
+            noise_pred_uncond[i] + guidance_scale * (noise_pred_text[i] - noise_pred_uncond[i]);
 
     return noisy_residual;
 }
@@ -270,22 +289,44 @@ ov::Tensor postprocess_image(ov::Tensor decoded_image) {
 int32_t main(int32_t argc, char* argv[]) try {
     cxxopts::Options options("stable_diffusion", "Stable Diffusion implementation in C++ using OpenVINO\n");
 
-    options.add_options()
-    ("p,posPrompt", "Initial positive prompt for SD ", cxxopts::value<std::string>()->default_value("cyberpunk cityscape like Tokyo New York  with tall buildings at dusk golden hour cinematic lighting"))
-    ("n,negPrompt","Defaut is empty with space", cxxopts::value<std::string>()->default_value(" "))
-    ("d,device", "AUTO, CPU, or GPU.\nDoesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device only", cxxopts::value<std::string>()->default_value("CPU"))
-    ("step", "Number of diffusion steps", cxxopts::value<size_t>()->default_value("20"))
-    ("s,seed", "Number of random seed to generate latent for one image output", cxxopts::value<size_t>()->default_value("42"))
-    ("num", "Number of image output", cxxopts::value<size_t>()->default_value("1"))
-    ("height", "Destination image height", cxxopts::value<size_t>()->default_value("512"))
-    ("width", "Destination image width", cxxopts::value<size_t>()->default_value("512"))
-    ("c,useCache", "Use model caching", cxxopts::value<bool>()->default_value("false"))
-    ("r,readNPLatent", "Read numpy generated latents from file", cxxopts::value<bool>()->default_value("false"))
-    ("m,modelPath", "Specify path of SD model IRs", cxxopts::value<std::string>()->default_value("../models/dreamlike_anime_1_0_ov"))
-    ("t,type", "Specify the type of SD model IRs (FP32, FP16 or INT8)", cxxopts::value<std::string>()->default_value("FP16"))
-    ("dynamic", "Specify the model input shape to use dynamic shape", cxxopts::value<bool>()->default_value("false"))
-    ("l,loraPath", "Specify path of LoRA file. (*.safetensors).", cxxopts::value<std::string>()->default_value(""))
-    ("a,alpha", "alpha for LoRA", cxxopts::value<float>()->default_value("0.75"))("h,help", "Print usage");
+    options.add_options()(
+        "p,posPrompt",
+        "Initial positive prompt for SD ",
+        cxxopts::value<std::string>()->default_value(
+            "cyberpunk cityscape like Tokyo New York  with tall buildings at dusk golden hour cinematic lighting"))(
+        "n,negPrompt",
+        "Defaut is empty with space",
+        cxxopts::value<std::string>()->default_value(" "))(
+        "d,device",
+        "AUTO, CPU, or GPU.\nDoesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device "
+        "only",
+        cxxopts::value<std::string>()->default_value(
+            "CPU"))("step", "Number of diffusion steps", cxxopts::value<size_t>()->default_value("20"))(
+        "s,seed",
+        "Number of random seed to generate latent for one image output",
+        cxxopts::value<size_t>()->default_value(
+            "42"))("num", "Number of image output", cxxopts::value<size_t>()->default_value("1"))(
+        "height",
+        "Destination image height",
+        cxxopts::value<size_t>()->default_value(
+            "512"))("width", "Destination image width", cxxopts::value<size_t>()->default_value("512"))(
+        "c,useCache",
+        "Use model caching",
+        cxxopts::value<bool>()->default_value("false"))("r,readNPLatent",
+                                                        "Read numpy generated latents from file",
+                                                        cxxopts::value<bool>()->default_value("false"))(
+        "m,modelPath",
+        "Specify path of SD model IRs",
+        cxxopts::value<std::string>()->default_value("../models/dreamlike_anime_1_0_ov"))(
+        "t,type",
+        "Specify the type of SD model IRs (FP32, FP16 or INT8)",
+        cxxopts::value<std::string>()->default_value("FP16"))("dynamic",
+                                                              "Specify the model input shape to use dynamic shape",
+                                                              cxxopts::value<bool>()->default_value("false"))(
+        "l,loraPath",
+        "Specify path of LoRA file. (*.safetensors).",
+        cxxopts::value<std::string>()->default_value(
+            ""))("a,alpha", "alpha for LoRA", cxxopts::value<float>()->default_value("0.75"))("h,help", "Print usage");
     cxxopts::ParseResult result;
 
     try {
@@ -317,8 +358,10 @@ int32_t main(int32_t argc, char* argv[]) try {
     const std::string lora_path = result["loraPath"].as<std::string>();
     const float alpha = result["alpha"].as<float>();
 
-    OPENVINO_ASSERT(!read_np_latent || (read_np_latent && (num_images == 1)),
-        "\"readNPLatent\" option is only supported for one output image. Number of image output was set to " + std::to_string(num_images));
+    OPENVINO_ASSERT(
+        !read_np_latent || (read_np_latent && (num_images == 1)),
+        "\"readNPLatent\" option is only supported for one output image. Number of image output was set to " +
+            std::to_string(num_images));
 
     const std::string folder_name = "images";
     try {
@@ -338,12 +381,21 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     // Stable Diffusion pipeline
     const size_t batch_size = 1;
-    StableDiffusionModels models = compile_models(model_path, device, lora_path, alpha, use_cache, use_dynamic_shapes, batch_size, height, width, num_images);
+    StableDiffusionModels models = compile_models(model_path,
+                                                  device,
+                                                  lora_path,
+                                                  alpha,
+                                                  use_cache,
+                                                  use_dynamic_shapes,
+                                                  batch_size,
+                                                  height,
+                                                  width,
+                                                  num_images);
     ov::InferRequest unet_infer_request = models.unet.create_infer_request();
 
     ov::PartialShape sample_shape = models.unet.input("sample").get_partial_shape();
     OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == height && sample_shape[3] * 8 == width),
-        "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
+                    "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
 
     Timer t("Running Stable Diffusion pipeline");
 
@@ -354,21 +406,24 @@ int32_t main(int32_t argc, char* argv[]) try {
     std::vector<std::int64_t> timesteps = scheduler->get_timesteps();
 
     for (uint32_t n = 0; n < num_images; n++) {
-        std::uint32_t seed = num_images == 1 ? user_seed: user_seed + n;
+        std::uint32_t seed = num_images == 1 ? user_seed : user_seed + n;
         ov::Tensor noise = randn_tensor(height, width, read_np_latent, seed);
 
         // latents are multiplied by 'init_noise_sigma'
         ov::Shape latent_shape = noise.get_shape(), latent_model_input_shape = latent_shape;
-        latent_model_input_shape[0] = 2; // Unet accepts batch 2
-        ov::Tensor latent(ov::element::f32, latent_shape), latent_model_input(ov::element::f32, latent_model_input_shape);
+        latent_model_input_shape[0] = 2;  // Unet accepts batch 2
+        ov::Tensor latent(ov::element::f32, latent_shape),
+            latent_model_input(ov::element::f32, latent_model_input_shape);
         for (size_t i = 0; i < noise.get_size(); ++i) {
             latent.data<float>()[i] = noise.data<float>()[i] * scheduler->get_init_noise_sigma();
         }
 
         for (size_t inference_step = 0; inference_step < num_inference_steps; inference_step++) {
             // concat the same latent twice along a batch dimension
-            latent.copy_to(ov::Tensor(latent_model_input, {0, 0, 0, 0}, {1, latent_shape[1], latent_shape[2], latent_shape[3]}));
-            latent.copy_to(ov::Tensor(latent_model_input, {1, 0, 0, 0}, {2, latent_shape[1], latent_shape[2], latent_shape[3]}));
+            latent.copy_to(
+                ov::Tensor(latent_model_input, {0, 0, 0, 0}, {1, latent_shape[1], latent_shape[2], latent_shape[3]}));
+            latent.copy_to(
+                ov::Tensor(latent_model_input, {1, 0, 0, 0}, {2, latent_shape[1], latent_shape[2], latent_shape[3]}));
 
             scheduler->scale_model_input(latent_model_input, inference_step);
 

From ad65b6ab5028b355a677f776cb2edf1aaa44f739 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Wed, 3 Apr 2024 10:26:43 +0400
Subject: [PATCH 07/19] Update GH Actions workflow for SD tests

---
 .../workflows/stable_diffusion_1_5_cpp.yml    | 28 +++++++++++--------
 .../stable_diffusion_1_5/cpp/README.md        |  4 +--
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index ff45821fbf..3464e27153 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -25,15 +25,17 @@ jobs:
           mkdir openvino
           curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz
           sudo ./openvino/install_dependencies/install_openvino_dependencies.sh
-      - name: Download / convert models
+      - name: Download and convert model and tokenizer
         run: |
           set -e
           source ./openvino/setupvars.sh
-          cd ./image_generation/stable_diffusion_1_5/cpp/scripts/
+          cd ./image_generation/stable_diffusion_1_5/cpp/
           python -m pip install -U pip
           python -m pip install -r ./requirements.txt
-          python -m pip install ../../../../thirdparty/openvino_tokenizers/
-          python convert_model.py -sd runwayml/stable-diffusion-v1-5 -b 1 -t FP16 -dyn True
+          python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+          export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
+          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
+          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
       - name: Build app
         run: |
           set -e
@@ -45,8 +47,8 @@ jobs:
         run: |
           set -e
           source ./openvino/setupvars.sh
-          cd ./image_generation/stable_diffusion_1_5/cpp/build
-          ./stable_diffusion -m ../scripts/runwayml/stable-diffusion-v1-5 -t FP16_dyn
+          cd ./image_generation/stable_diffusion_1_5/cpp
+          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
       steps:
@@ -61,14 +63,16 @@ jobs:
           run: |
             curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip
             unzip ov.zip
-        - name: Download / convert a model / tokenizer
+        - name: Download and convert model and tokenizer
           shell: cmd
           run: |
             call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
-            cd ./image_generation/stable_diffusion_1_5/cpp/scripts/
+            cd ./image_generation/stable_diffusion_1_5/cpp/
             python -m pip install -r ./requirements.txt
-            python -m pip install ../../../../thirdparty/openvino_tokenizers/
-            python convert_model.py -sd runwayml/stable-diffusion-v1-5 -b 1 -t FP16 -dyn True
+            python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+            set MODEL_PATH=models/stable_diffusion_v1_5_ov/FP16
+            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 %MODEL_PATH%
+            convert_tokenizer %MODEL_PATH%/tokenizer/ --tokenizer-output-type i32 -o %MODEL_PATH%/tokenizer/
         - name: Build app
           shell: cmd
           run: |
@@ -80,5 +84,5 @@ jobs:
           shell: cmd
           run: |
             call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
-            cd ./image_generation/stable_diffusion_1_5/cpp/build/
-            call "./Release/stable_diffusion.exe" -m ../scripts/runwayml/stable-diffusion-v1-5 -t FP16_dyn
+            cd ./image_generation/stable_diffusion_1_5/cpp/
+            call "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 5bb5aa245e..6863b7b52c 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -29,7 +29,7 @@ conda install -c conda-forge  openvino c-compiler cxx-compiler make cmake
 ```shell
 git submodule update --init
 conda activate openvino_sd_cpp
-python -m pip install -r scripts/requirements.txt
+python -m pip install -r requirements.txt
 python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 ```
 2. Download a huggingface SD v1.5 model like:
@@ -114,7 +114,7 @@ Read the numpy latent instead of C++ std lib for the alignment with Python pipel
 
    ![](./soulcard_lora.bmp)
 
-* Generate different size image with dynamic model (C++ lib generated latent): `./build/stable_diffusion -m ../models/dreamlike_anime_1_0_ov -t FP16 --dynamic --height 448 --width 704`
+* Generate different size image with dynamic model (C++ lib generated latent): `./build/stable_diffusion -m ./models/dreamlike_anime_1_0_ov -t FP16 --dynamic --height 448 --width 704`
 
    ![](./704x448.bmp)
 

From d84b6956384989a2cd4c9a7d7e3acb543638f860 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Thu, 4 Apr 2024 20:56:12 +0400
Subject: [PATCH 08/19] Fix extra whitespace

---
 image_generation/stable_diffusion_1_5/cpp/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 6863b7b52c..4758ad2c58 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
-conda install -c conda-forge  openvino c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
 ```
 
 ## Step 2: Convert Stable Diffusion v1.5 and Tokenizer models

From a6807cab30e3d2da9ae4f220e67c25b468bdfe0f Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Thu, 4 Apr 2024 20:56:49 +0400
Subject: [PATCH 09/19] Enable conda in GH workflow for SD sample

---
 .../workflows/stable_diffusion_1_5_cpp.yml    | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 3464e27153..ef2d39c663 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -16,39 +16,43 @@ jobs:
       - uses: actions/checkout@v4
         with:
           submodules: recursive
-      - uses: actions/setup-python@v4
+
+      - name: Setup conda
+        uses: conda-incubator/setup-miniconda@v3
         with:
-          python-version: 3.8
-      - name: Install OpenVINO
+          miniconda-version: "latest"
+          activate-environment: openvino_sd_cpp
+          python-version: 3.10
+
+      - name: Install OpenVINO and other conda dependencies
         run: |
-          set -e
-          mkdir openvino
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz
-          sudo ./openvino/install_dependencies/install_openvino_dependencies.sh
-      - name: Download and convert model and tokenizer
+          conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
+
+      - name: Install python dependencies
+        working-directory: ./image_generation/stable_diffusion_1_5/cpp/  # TODO Move working dir to workflow var
         run: |
-          set -e
-          source ./openvino/setupvars.sh
-          cd ./image_generation/stable_diffusion_1_5/cpp/
-          python -m pip install -U pip
-          python -m pip install -r ./requirements.txt
+          python -m pip install -r requirements.txt
           python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+
+      - name: Download and convert model and tokenizer
+        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
+        run: |
+          export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
           export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
           optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
           convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+
       - name: Build app
+        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
         run: |
-          set -e
-          source ./openvino/setupvars.sh
-          cd ./image_generation/stable_diffusion_1_5/cpp/
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
           cmake --build ./build/ --config Release --parallel
+
       - name: Run app
+        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
         run: |
-          set -e
-          source ./openvino/setupvars.sh
-          cd ./image_generation/stable_diffusion_1_5/cpp
           ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
       steps:

From bb50dd90530819764716f1920441f27ff3b65e1e Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 13:39:26 +0400
Subject: [PATCH 10/19] Quote python version for conda setup step

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index ef2d39c663..628e018dab 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -22,7 +22,7 @@ jobs:
         with:
           miniconda-version: "latest"
           activate-environment: openvino_sd_cpp
-          python-version: 3.10
+          python-version: "3.10"
 
       - name: Install OpenVINO and other conda dependencies
         run: |

From f27bd2fdc607675d9ccdd73652be86318023a951 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 13:45:27 +0400
Subject: [PATCH 11/19] Add conda env activation to workflow

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 628e018dab..04090c6ff2 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -26,17 +26,20 @@ jobs:
 
       - name: Install OpenVINO and other conda dependencies
         run: |
+          conda activate openvino_sd_cpp
           conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
 
       - name: Install python dependencies
         working-directory: ./image_generation/stable_diffusion_1_5/cpp/  # TODO Move working dir to workflow var
         run: |
+          conda activate openvino_sd_cpp
           python -m pip install -r requirements.txt
           python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 
       - name: Download and convert model and tokenizer
         working-directory: ./image_generation/stable_diffusion_1_5/cpp/
         run: |
+          conda activate openvino_sd_cpp
           export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
           export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
           optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
@@ -45,6 +48,7 @@ jobs:
       - name: Build app
         working-directory: ./image_generation/stable_diffusion_1_5/cpp/
         run: |
+          conda activate openvino_sd_cpp
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
           cmake --build ./build/ --config Release --parallel
 

From f97f0edb22cdf430b7b61fba63773b7514587b6c Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 13:52:35 +0400
Subject: [PATCH 12/19] Set default shell

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 04090c6ff2..9d1c722219 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -12,6 +12,11 @@ concurrency:
 jobs:
   stable_diffusion_1_5_cpp-linux:
     runs-on: ubuntu-20.04-8-cores
+    # Do not ignore bash profile files. From:
+    # https://github.com/marketplace/actions/setup-miniconda
+    defaults:
+      run:
+        shell: bash -l {0}
     steps:
       - uses: actions/checkout@v4
         with:

From 1dd150948964e3c17bdbcb3e8d9dc63f67a5d973 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 14:20:43 +0400
Subject: [PATCH 13/19] Enable conda installation for windows job

---
 .../workflows/stable_diffusion_1_5_cpp.yml    | 66 ++++++++++++-------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 9d1c722219..a23038e6e5 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -1,4 +1,5 @@
 name: stable_diffusion_1_5_cpp
+
 on:
   pull_request:
     paths:
@@ -6,16 +7,22 @@ on:
       - image_generation/common/**
       - .github/workflows/stable_diffusion_1_5_cpp.yml
       - thirdparty/openvino_tokenizers
+
+env:
+  working_directory: "./image_generation/stable_diffusion_1_5/cpp/"
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
+
 jobs:
   stable_diffusion_1_5_cpp-linux:
+    if: ${{ false }}
     runs-on: ubuntu-20.04-8-cores
-    # Do not ignore bash profile files. From:
-    # https://github.com/marketplace/actions/setup-miniconda
     defaults:
       run:
+        # Do not ignore bash profile files. From:
+        # https://github.com/marketplace/actions/setup-miniconda#important
         shell: bash -l {0}
     steps:
       - uses: actions/checkout@v4
@@ -35,14 +42,14 @@ jobs:
           conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
 
       - name: Install python dependencies
-        working-directory: ./image_generation/stable_diffusion_1_5/cpp/  # TODO Move working dir to workflow var
+        working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
           python -m pip install -r requirements.txt
           python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 
       - name: Download and convert model and tokenizer
-        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
+        working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
           export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
@@ -51,51 +58,64 @@ jobs:
           convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
 
       - name: Build app
-        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
+        working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
           cmake --build ./build/ --config Release --parallel
 
       - name: Run app
-        working-directory: ./image_generation/stable_diffusion_1_5/cpp/
+        working-directory: ${{ env.working_directory }}
         run: |
           ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
+      defaults:
+        run:
+          # Do not ignore cmd Autorun commands. From:
+          # https://github.com/marketplace/actions/setup-miniconda#important
+          shell: cmd /C call {0}
       steps:
         - uses: actions/checkout@v4
           with:
             submodules: recursive
-        - uses: actions/setup-python@v4
+
+        - name: Setup conda
+          uses: conda-incubator/setup-miniconda@v3
           with:
-            python-version: 3.8
-        - name: Initialize OpenVINO
-          shell: cmd
+            miniconda-version: "latest"
+            activate-environment: openvino_sd_cpp
+            python-version: "3.10"
+
+        - name: Install OpenVINO and other conda dependencies
           run: |
-            curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip
-            unzip ov.zip
-        - name: Download and convert model and tokenizer
-          shell: cmd
+            conda activate openvino_sd_cpp
+            conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
+  
+        - name: Install python dependencies
+          working-directory: ${{ env.working_directory }}
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
-            cd ./image_generation/stable_diffusion_1_5/cpp/
-            python -m pip install -r ./requirements.txt
+            conda activate openvino_sd_cpp
+            python -m pip install -r requirements.txt
             python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+  
+        - name: Download and convert model and tokenizer
+          working-directory: ${{ env.working_directory }}
+          run: |
+            conda activate openvino_sd_cpp
             set MODEL_PATH=models/stable_diffusion_v1_5_ov/FP16
             optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 %MODEL_PATH%
             convert_tokenizer %MODEL_PATH%/tokenizer/ --tokenizer-output-type i32 -o %MODEL_PATH%/tokenizer/
+  
         - name: Build app
-          shell: cmd
+          working-directory: ${{ env.working_directory }}
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
-            cd ./image_generation/stable_diffusion_1_5/cpp/
+            conda activate openvino_sd_cpp
             cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
             cmake --build ./build/ --config Release --parallel
+  
         - name: Run app
-          shell: cmd
+          working-directory: ${{ env.working_directory }}
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
-            cd ./image_generation/stable_diffusion_1_5/cpp/
             call "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic

From 835055b1024ede046fc1f8a2dc0acb0a46acde21 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 14:30:13 +0400
Subject: [PATCH 14/19] Disable windows shell declaration

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index a23038e6e5..43ff0f140e 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -71,11 +71,11 @@ jobs:
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
-      defaults:
-        run:
-          # Do not ignore cmd Autorun commands. From:
-          # https://github.com/marketplace/actions/setup-miniconda#important
-          shell: cmd /C call {0}
+      # defaults:
+      #   run:
+      #     # Do not ignore cmd Autorun commands. From:
+      #     # https://github.com/marketplace/actions/setup-miniconda#important
+      #     shell: cmd /C call {0}
       steps:
         - uses: actions/checkout@v4
           with:

From 48238ceb808b1f65bc5482d269645ef4c9348a7e Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 15:12:17 +0400
Subject: [PATCH 15/19] Fix execute command for windows job

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 43ff0f140e..b20bfaab3a 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -104,9 +104,9 @@ jobs:
           working-directory: ${{ env.working_directory }}
           run: |
             conda activate openvino_sd_cpp
-            set MODEL_PATH=models/stable_diffusion_v1_5_ov/FP16
-            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 %MODEL_PATH%
-            convert_tokenizer %MODEL_PATH%/tokenizer/ --tokenizer-output-type i32 -o %MODEL_PATH%/tokenizer/
+            $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16'
+            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH
+            convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
   
         - name: Build app
           working-directory: ${{ env.working_directory }}
@@ -118,4 +118,4 @@ jobs:
         - name: Run app
           working-directory: ${{ env.working_directory }}
           run: |
-            call "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+            & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic

From c375919319bb45dc65b6e073a6dc0ba467a7fbe5 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 18:45:01 +0400
Subject: [PATCH 16/19] Fix review comments

---
 .../stable_diffusion_1_5/cpp/src/main.cpp     | 42 +++++++------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index 06c6024286..d1c24c32a8 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -16,7 +16,9 @@
 #include "openvino/runtime/core.hpp"
 #include "scheduler_lms_discrete.hpp"
 
-const size_t TOKENIZER_MODEL_MAX_LENGTH = 77;  // 'model_max_length' from 'tokenizer_config.json'
+const size_t TOKENIZER_MODEL_MAX_LENGTH = 77;   // 'model_max_length' parameter from 'tokenizer_config.json'
+const int64_t UNET_IN_CHANNELS = 4;             // 'in_channels' parameter from 'unet/config.json'
+const int64_t VAE_DECODER_LATENT_CHANNELS = 4;  // 'latent_channels' parameter from 'vae_decoder/config.json'
 const size_t VAE_SCALE_FACTOR = 8;
 
 class Timer {
@@ -34,7 +36,7 @@ class Timer {
 };
 
 ov::Tensor randn_tensor(uint32_t height, uint32_t width, bool use_np_latents, uint32_t seed = 42) {
-    ov::Tensor noise(ov::element::f32, {1, 4, height / 8, width / 8});
+    ov::Tensor noise(ov::element::f32, {1, UNET_IN_CHANNELS, height / VAE_SCALE_FACTOR, width / VAE_SCALE_FACTOR});
     if (use_np_latents) {
         // read np generated latents with defaut seed 42
         const char* latent_file_name = "../np_latents_512x512.txt";
@@ -81,17 +83,15 @@ void reshape_text_encoder(std::shared_ptr<ov::Model> model, size_t batch_size, s
     ov::PartialShape input_shape = model->input(0).get_partial_shape();
     input_shape[0] = batch_size;
     input_shape[1] = tokenizer_model_max_length;
-    std::map<std::string, ov::PartialShape> name_to_shape{{model->input(0).get_any_name(), input_shape}};
-    model->reshape(name_to_shape);
+    std::map<size_t, ov::PartialShape> idx_to_shape{{0, input_shape}};
+    model->reshape(idx_to_shape);
 }
 
 void reshape_unet_encoder(std::shared_ptr<ov::Model> model,
                           int64_t batch_size,
                           int64_t height,
                           int64_t width,
-                          int64_t num_images_per_prompt,
                           int64_t tokenizer_model_max_length) {
-    batch_size *= num_images_per_prompt;
     // The factor of 2 comes from the guidance scale > 1
     for (auto input : model->inputs()) {
         if (input.get_any_name().find("timestep_cond") == std::string::npos) {
@@ -111,8 +111,7 @@ void reshape_unet_encoder(std::shared_ptr<ov::Model> model,
         if (input_name == "timestep") {
             name_to_shape[input_name][0] = 1;
         } else if (input_name == "sample") {
-            int64_t in_channels = 4;  // 'in_channels' parameter from 'unet/config.json'
-            name_to_shape[input_name] = {batch_size, in_channels, height, width};
+            name_to_shape[input_name] = {batch_size, UNET_IN_CHANNELS, height, width};
         } else if (input_name == "time_ids") {
             name_to_shape[input_name][0] = batch_size;
         } else {
@@ -127,10 +126,9 @@ void reshape_unet_encoder(std::shared_ptr<ov::Model> model,
 void reshape_vae_decoder(std::shared_ptr<ov::Model> model, int64_t height, int64_t width) {
     height = height / VAE_SCALE_FACTOR;
     width = width / VAE_SCALE_FACTOR;
-    int64_t latent_channels = 4;  // 'latent_channels' parameter from 'vae_decoder/config.json'
-    std::map<std::string, ov::PartialShape> name_to_shape{
-        {model->input(0).get_any_name(), {1, latent_channels, height, width}}};
-    model->reshape(name_to_shape);
+
+    std::map<size_t, ov::PartialShape> idx_to_shape{{0, {1, VAE_DECODER_LATENT_CHANNELS, height, width}}};
+    model->reshape(idx_to_shape);
 }
 
 StableDiffusionModels compile_models(const std::string& model_path,
@@ -141,8 +139,7 @@ StableDiffusionModels compile_models(const std::string& model_path,
                                      const bool use_dynamic_shapes,
                                      const size_t batch_size,
                                      const size_t height,
-                                     const size_t width,
-                                     const size_t num_images) {
+                                     const size_t width) {
     StableDiffusionModels models;
 
     ov::Core core;
@@ -174,7 +171,7 @@ StableDiffusionModels compile_models(const std::string& model_path,
         Timer t("Loading and compiling UNet");
         auto unet_model = core.read_model(model_path + "/unet/openvino_model.xml");
         if (!use_dynamic_shapes) {
-            reshape_unet_encoder(unet_model, batch_size, height, width, num_images, TOKENIZER_MODEL_MAX_LENGTH);
+            reshape_unet_encoder(unet_model, batch_size, height, width, TOKENIZER_MODEL_MAX_LENGTH);
         }
         apply_lora(unet_model, lora_weights["unet"]);
         models.unet = core.compile_model(unet_model, device);
@@ -381,20 +378,13 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     // Stable Diffusion pipeline
     const size_t batch_size = 1;
-    StableDiffusionModels models = compile_models(model_path,
-                                                  device,
-                                                  lora_path,
-                                                  alpha,
-                                                  use_cache,
-                                                  use_dynamic_shapes,
-                                                  batch_size,
-                                                  height,
-                                                  width,
-                                                  num_images);
+    StableDiffusionModels models =
+        compile_models(model_path, device, lora_path, alpha, use_cache, use_dynamic_shapes, batch_size, height, width);
     ov::InferRequest unet_infer_request = models.unet.create_infer_request();
 
     ov::PartialShape sample_shape = models.unet.input("sample").get_partial_shape();
-    OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == height && sample_shape[3] * 8 == width),
+    OPENVINO_ASSERT(sample_shape.is_dynamic() ||
+                        (sample_shape[2] * VAE_SCALE_FACTOR == height && sample_shape[3] * VAE_SCALE_FACTOR == width),
                     "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
 
     Timer t("Running Stable Diffusion pipeline");

From 1daf7e0cbc4581e967ee34b96261afdb9e14ad63 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 18:53:11 +0400
Subject: [PATCH 17/19] Set conda env instead of explicit exporting env var

---
 image_generation/stable_diffusion_1_5/cpp/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 4758ad2c58..8134574243 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -19,6 +19,8 @@ Prepare a python environment and install dependencies:
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
 conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
+# Ensure that Conda standard libraries are used
+conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
 
 ## Step 2: Convert Stable Diffusion v1.5 and Tokenizer models
@@ -28,6 +30,7 @@ conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
 1. Install dependencies to import models from HuggingFace:
 ```shell
 git submodule update --init
+# Reactivate Conda environment after installing dependencies and setting env vars
 conda activate openvino_sd_cpp
 python -m pip install -r requirements.txt
 python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
@@ -38,7 +41,6 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 
    Example command for downloading and exporting FP16 model:
    ```shell
-   export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
    export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
    # Using optimum-cli for exporting model to OpenVINO format
    optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH

From cf027c8c013aac82e70f77ace95efeefdb684940 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 18:54:46 +0400
Subject: [PATCH 18/19] Enable linux job, align conda env installation with
 readme

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index b20bfaab3a..2b24fc02ad 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -17,7 +17,6 @@ concurrency:
 
 jobs:
   stable_diffusion_1_5_cpp-linux:
-    if: ${{ false }}
     runs-on: ubuntu-20.04-8-cores
     defaults:
       run:
@@ -40,6 +39,7 @@ jobs:
         run: |
           conda activate openvino_sd_cpp
           conda install -c conda-forge openvino c-compiler cxx-compiler make cmake
+          conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
         working-directory: ${{ env.working_directory }}
@@ -52,7 +52,6 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
-          export LD_LIBRARY_PATH="$CONDA_PREFIX/lib"
           export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
           optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
           convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
@@ -71,11 +70,6 @@ jobs:
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
-      # defaults:
-      #   run:
-      #     # Do not ignore cmd Autorun commands. From:
-      #     # https://github.com/marketplace/actions/setup-miniconda#important
-      #     shell: cmd /C call {0}
       steps:
         - uses: actions/checkout@v4
           with:

From 28652241b638701c739231589fdca0c8a9a07603 Mon Sep 17 00:00:00 2001
From: yatarkan <yaroslav.tarkan@intel.com>
Date: Fri, 5 Apr 2024 18:56:28 +0400
Subject: [PATCH 19/19] Use static shapes in linux job

---
 .github/workflows/stable_diffusion_1_5_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 2b24fc02ad..525ec8c75e 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -66,7 +66,7 @@ jobs:
       - name: Run app
         working-directory: ${{ env.working_directory }}
         run: |
-          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest