made shaders gen deterministic, update to c++17 (+4 squashed commit)

Squashed commit: [7bb2441b] made shaders gen deterministic [906e02a] Update c++ from 11 to 17 (#1263) * Update c/c++ from 11 to 17 * Update CMakeLists.txt only bump c++ [7ca430e] C++17 ver [b7dfb55] give up and switch to c++17 (+1 squashed commits) Squashed commits: [96cfbc48] give up and switch to c++17 (+5 squashed commit) Squashed commit: [19ac7c26] Revert "fixed incorrect number of params" This reverts commit 5138872. [45f730da] Revert "fix for c++17" This reverts commit 050ba5f. [5138872] fixed incorrect number of params [8f1ee54] build latest vk shaders [050ba5f] fix for c++17
LostRuins · Dec 13, 2024 · a63c2c9 · a63c2c9
1 parent 46d76d9
commit a63c2c9
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 107 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -59,7 +59,7 @@ option(LLAMA_OPENMP                          "llama: use OpenMP"
 # Compile flags
 #
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED true)
@@ -490,52 +490,52 @@ add_library(common2
             src/unicode.cpp
             src/unicode-data.cpp)
 target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
+target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
 target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 add_library(sdtype_adapter
             otherarch/sdcpp/sdtype_adapter.cpp)
 target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 add_library(whisper_adapter
             otherarch/whispercpp/whisper_adapter.cpp)
 target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
-target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 add_library(gpttype_adapter
             gpttype_adapter.cpp)
 target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 if (LLAMA_CUBLAS)
     set(TARGET koboldcpp_cublas)
     add_library(${TARGET} SHARED expose.cpp expose.h)
     target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-    target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
+    target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
     set_target_properties(${TARGET} PROPERTIES PREFIX "")
     set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
     set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
     target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
-    target_compile_features(${TARGET} PRIVATE cxx_std_11)
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
 endif()
 
 if (LLAMA_HIPBLAS)
     set(TARGET koboldcpp_hipblas)
     add_library(${TARGET} SHARED expose.cpp expose.h)
     target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-    target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
+    target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
     set_target_properties(${TARGET} PROPERTIES PREFIX "")
     set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
     set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
     target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
-    target_compile_features(${TARGET} PRIVATE cxx_std_11)
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
 endif()
 
diff --git a/Makefile b/Makefile
@@ -47,15 +47,15 @@ endif
 # Compile flags
 #
 
-# keep standard at C11 and C++11
+# keep standard at C11 and C++17
 CFLAGS =
 CXXFLAGS =
 ifdef KCPP_DEBUG
 	CFLAGS = -g -O0
 	CXXFLAGS = -g -O0
 endif
 CFLAGS   += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
-CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
+CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
 ifndef KCPP_DEBUG
 	CFLAGS += -DNDEBUG -s
 	CXXFLAGS += -DNDEBUG -s

diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
@@ -64,7 +64,7 @@
 
 #ifdef _WIN32
 
-using dl_handle = typename std::remove_pointer<HMODULE>::type;
+using dl_handle = std::remove_pointer_t<HMODULE>;
 
 struct dl_handle_deleter {
     void operator()(HMODULE handle) {
@@ -452,75 +452,70 @@ static std::string backend_filename_suffix() {
 static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
     // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
      // TODO: search system paths
+    std::string file_prefix = backend_filename_prefix() + name + "-";
+    std::vector<std::string> search_paths;
+    if (user_search_path == nullptr) {
+        search_paths.push_back("./");
+        search_paths.push_back(get_executable_path());
+    } else {
+#if defined(_WIN32)
+        search_paths.push_back(std::string(user_search_path) + "\\");
+#else
+        search_paths.push_back(std::string(user_search_path) + "/");
+#endif
+    }
 
-    //not available as we don't want c++17
-    printf("\nggml_backend_load_best NOT AVAILABLE!\n");
-    return nullptr;
+    int best_score = 0;
+    std::string best_path;
+
+    namespace fs = std::filesystem;
+    for (const auto & search_path : search_paths) {
+        if (!fs::exists(search_path)) {
+            continue;
+        }
+        for (const auto & entry : fs::directory_iterator(search_path)) {
+            if (entry.is_regular_file()) {
+                std::string filename = entry.path().filename().string();
+                std::string ext = entry.path().extension().string();
+                if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+                    dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
+                    if (!handle && !silent) {
+                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
+                    }
+                    if (handle) {
+                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
+                        if (score_fn) {
+                            int s = score_fn();
+#ifndef NDEBUG
+                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
+#endif
+                            if (s > best_score) {
+                                best_score = s;
+                                best_path = entry.path().string();
+                            }
+                        } else {
+                            if (!silent) {
+                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (best_score == 0) {
+        // try to load the base backend
+        for (const auto & search_path : search_paths) {
+            std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
+            if (fs::exists(path)) {
+                return get_reg().load_backend(path.c_str(), silent);
+            }
+        }
+        return nullptr;
+    }
 
-//     std::string file_prefix = backend_filename_prefix() + name + "-";
-//     std::vector<std::string> search_paths;
-//     if (user_search_path == nullptr) {
-//         search_paths.push_back("./");
-//         search_paths.push_back(get_executable_path());
-//     } else {
-// #if defined(_WIN32)
-//         search_paths.push_back(std::string(user_search_path) + "\\");
-// #else
-//         search_paths.push_back(std::string(user_search_path) + "/");
-// #endif
-//     }
-
-//     int best_score = 0;
-//     std::string best_path;
-
-//     namespace fs = std::filesystem;
-//     for (const auto & search_path : search_paths) {
-//         if (!fs::exists(search_path)) {
-//             continue;
-//         }
-//         for (const auto & entry : fs::directory_iterator(search_path)) {
-//             if (entry.is_regular_file()) {
-//                 std::string filename = entry.path().filename().string();
-//                 std::string ext = entry.path().extension().string();
-//                 if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
-//                     dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
-//                     if (!handle && !silent) {
-//                         GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
-//                     }
-//                     if (handle) {
-//                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
-//                         if (score_fn) {
-//                             int s = score_fn();
-// #ifndef NDEBUG
-//                             GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
-// #endif
-//                             if (s > best_score) {
-//                                 best_score = s;
-//                                 best_path = entry.path().string();
-//                             }
-//                         } else {
-//                             if (!silent) {
-//                                 GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
-//                             }
-//                         }
-//                     }
-//                 }
-//             }
-//         }
-//     }
-
-//     if (best_score == 0) {
-//         // try to load the base backend
-//         for (const auto & search_path : search_paths) {
-//             std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
-//             if (fs::exists(path)) {
-//                 return get_reg().load_backend(path.c_str(), silent);
-//             }
-//         }
-//         return nullptr;
-//     }
-
-//     return get_reg().load_backend(best_path.c_str(), silent);
+    return get_reg().load_backend(best_path.c_str(), silent);
 }
 
 void ggml_backend_load_all() {

diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
@@ -3838,9 +3838,7 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
     GGML_UNUSED(data_size);
 }
 
-namespace ggml {
-namespace cpu {
-namespace aarch64 { //ggml::cpu::aarch64
+namespace ggml::cpu::aarch64 {
 // repack
 template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>
 int repack(struct ggml_tensor *, const void *, size_t);
@@ -4156,8 +4154,6 @@ static const tensor_traits<block_q4_0, 8, 8> q4_0_8x8_q8_0;
 // instance for IQ4
 static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
 
-}
-}
 }  // namespace ggml::cpu::aarch64
 
 static void flag_aarch_prepacked_quant(int type)
@@ -4260,9 +4256,7 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
     GGML_UNUSED(buft);
 }
 
-namespace ggml {
-namespace cpu {
-namespace aarch64 { //ggml::cpu::aarch64
+namespace ggml::cpu::aarch64 {
 class extra_buffer_type : ggml::cpu::extra_buffer_type {
     bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
         if (    op->op == GGML_OP_MUL_MAT &&
@@ -4309,9 +4303,6 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
         return nullptr;
     }
 };
-
-}
-}
 }  // namespace ggml::cpu::aarch64
 
 ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {

diff --git a/ggml/src/ggml-cpu/ggml-cpu-traits.cpp b/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
@@ -3,12 +3,10 @@
 #include "ggml-backend-impl.h"
 #include "ggml-backend.h"
 
-namespace ggml {
-namespace cpu {
+namespace ggml::cpu {
 tensor_traits::~tensor_traits() {}
 
 extra_buffer_type::~extra_buffer_type() {}
-}
 }  // namespace ggml::cpu
 
 bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {

diff --git a/ggml/src/ggml-cpu/ggml-cpu-traits.h b/ggml/src/ggml-cpu/ggml-cpu-traits.h
@@ -15,8 +15,7 @@ bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size
 #ifdef __cplusplus
 }
 
-namespace ggml {
-namespace cpu {
+namespace ggml::cpu {
 // register in tensor->extra
 class tensor_traits {
   public:
@@ -31,7 +30,6 @@ class extra_buffer_type {
     virtual bool            supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
     virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op)                   = 0;
 };
-}
 }  // namespace ggml::cpu
 
 // implemented in ggml-cpu.cpp.

diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -263,18 +263,22 @@ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::s
 }
 
 static std::vector<std::future<void>> compiles;
+// void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
+//     {
+//         // wait until fewer than N compiles are in progress.
+//         // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
+//         uint32_t N = 16;
+//         std::unique_lock<std::mutex> guard(compile_count_mutex);
+//         while (compile_count >= N) {
+//             compile_count_cond.wait(guard);
+//         }
+//         compile_count++;
+//     }
+//     compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
+// }
 void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
-    {
-        // wait until fewer than N compiles are in progress.
-        // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
-        uint32_t N = 16;
-        std::unique_lock<std::mutex> guard(compile_count_mutex);
-        while (compile_count >= N) {
-            compile_count_cond.wait(guard);
-        }
-        compile_count++;
-    }
-    compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
+     std::cout << "string_to_spv: " << _name << "\n";
+     string_to_spv_func(_name, in_fname, defines, fp16, coopmat, coopmat2, f16acc); //non async version
 }
 
 void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) {