openvinotoolkit · dmitry-gorokhov · Nov 30, 2024 · Nov 27, 2024 · Nov 27, 2024 · Nov 28, 2024
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
@@ -736,7 +736,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-
+    apply_rt_info(model_, config_with_batch);
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(device_name));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -769,7 +769,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-
+    apply_rt_info(model_, config_with_batch);
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(deviceName));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -1098,6 +1098,23 @@ std::shared_ptr<const ov::Model> ov::CoreImpl::apply_auto_batching(const std::sh
     return ov::details::apply_batch_affinity(model, deviceNameWithoutBatch);
 }
 
+void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::AnyMap& config) const {
+    if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
+        if (config.find("KV_CACHE_PRECISION") == config.end()) {
+            const auto kv_cache_precision =
+                model->get_rt_info<ov::element::Type>({"runtime_options", "KV_CACHE_PRECISION"});
+            config.insert(ov::hint::kv_cache_precision(kv_cache_precision));
+        }
+    }
+    if (model->has_rt_info({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"})) {
+        if (config.find("DYNAMIC_QUANTIZATION_GROUP_SIZE") == config.end()) {
+            const auto dyn_quant_group_size =
+                model->get_rt_info<uint64_t>({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"});
+            config.insert(ov::hint::dynamic_quantization_group_size(dyn_quant_group_size));
+        }
+    }
+}
+
 void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& properties) {
     OPENVINO_ASSERT(device_name.find("HETERO:") != 0,
                     "set_property is supported only for HETERO itself (without devices). "

diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
@@ -200,6 +200,12 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
                                                          std::string& deviceName,
                                                          ov::AnyMap& config) const;
 
+    /*
+     * @brief Apply rt_info from IR model
+     */
+    void apply_rt_info(const std::shared_ptr<const ov::Model>& model,
+                    ov::AnyMap& config) const;
+
     /*
      * @brief Register plugins according to the build configuration
      */

@@ -327,4 +327,18 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPUExecutionDevice) {
     ASSERT_EQ(value.as<std::string>(), "CPU");
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPURuntimOptions) {
+    ov::Core ie;
+    ov::Any type;
+    ov::Any size;
+    ov::CompiledModel compiledModel;
+    model->set_rt_info("f16", "runtime_options", "KV_CACHE_PRECISION");
+    model->set_rt_info("0", "runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE");
+    OV_ASSERT_NO_THROW(compiledModel = ie.compile_model(model, deviceName));
+    OV_ASSERT_NO_THROW(type = compiledModel.get_property(ov::hint::kv_cache_precision));
+    OV_ASSERT_NO_THROW(size = compiledModel.get_property(ov::hint::dynamic_quantization_group_size));
+    ASSERT_EQ(type.as<ov::element::Type>(), ov::element::f16);
+    ASSERT_EQ(size.as<uint64_t>(), 0);
+}
+
 } // namespace