Skip to content

Commit

Permalink
made shaders gen deterministic, update to c++17 (+4 squashed commit)
Browse files Browse the repository at this point in the history
Squashed commit:

[7bb2441b] made shaders gen deterministic

[906e02a] Update c++ from 11 to 17 (#1263)

* Update c/c++ from 11 to 17

* Update CMakeLists.txt

only bump c++

[7ca430e] C++17 ver

[b7dfb55] give up and switch to c++17 (+1 squashed commits)

Squashed commits:

[96cfbc48] give up and switch to c++17 (+5 squashed commit)

Squashed commit:

[19ac7c26] Revert "fixed incorrect number of params"

This reverts commit 5138872.

[45f730da] Revert "fix for c++17"

This reverts commit 050ba5f.

[5138872] fixed incorrect number of params

[8f1ee54] build latest vk shaders

[050ba5f] fix for c++17
  • Loading branch information
LostRuins committed Dec 13, 2024
1 parent 46d76d9 commit a63c2c9
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 107 deletions.
18 changes: 9 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ option(LLAMA_OPENMP "llama: use OpenMP"
# Compile flags
#

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
Expand Down Expand Up @@ -490,52 +490,52 @@ add_library(common2
src/unicode.cpp
src/unicode-data.cpp)
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_library(sdtype_adapter
otherarch/sdcpp/sdtype_adapter.cpp)
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_library(whisper_adapter
otherarch/whispercpp/whisper_adapter.cpp)
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_library(gpttype_adapter
gpttype_adapter.cpp)
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

if (LLAMA_CUBLAS)
set(TARGET koboldcpp_cublas)
add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
endif()

if (LLAMA_HIPBLAS)
set(TARGET koboldcpp_hipblas)
add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
endif()

4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ endif
# Compile flags
#

# keep standard at C11 and C++11
# keep standard at C11 and C++17
CFLAGS =
CXXFLAGS =
ifdef KCPP_DEBUG
CFLAGS = -g -O0
CXXFLAGS = -g -O0
endif
CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
ifndef KCPP_DEBUG
CFLAGS += -DNDEBUG -s
CXXFLAGS += -DNDEBUG -s
Expand Down
131 changes: 63 additions & 68 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@

#ifdef _WIN32

using dl_handle = typename std::remove_pointer<HMODULE>::type;
using dl_handle = std::remove_pointer_t<HMODULE>;

struct dl_handle_deleter {
void operator()(HMODULE handle) {
Expand Down Expand Up @@ -452,75 +452,70 @@ static std::string backend_filename_suffix() {
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
std::string file_prefix = backend_filename_prefix() + name + "-";
std::vector<std::string> search_paths;
if (user_search_path == nullptr) {
search_paths.push_back("./");
search_paths.push_back(get_executable_path());
} else {
#if defined(_WIN32)
search_paths.push_back(std::string(user_search_path) + "\\");
#else
search_paths.push_back(std::string(user_search_path) + "/");
#endif
}

//not available as we don't want c++17
printf("\nggml_backend_load_best NOT AVAILABLE!\n");
return nullptr;
int best_score = 0;
std::string best_path;

namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
}
for (const auto & entry : fs::directory_iterator(search_path)) {
if (entry.is_regular_file()) {
std::string filename = entry.path().filename().string();
std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
if (!handle && !silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
}
if (handle) {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn) {
int s = score_fn();
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
#endif
if (s > best_score) {
best_score = s;
best_path = entry.path().string();
}
} else {
if (!silent) {
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
}
}
}
}
}
}
}

if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
if (fs::exists(path)) {
return get_reg().load_backend(path.c_str(), silent);
}
}
return nullptr;
}

// std::string file_prefix = backend_filename_prefix() + name + "-";
// std::vector<std::string> search_paths;
// if (user_search_path == nullptr) {
// search_paths.push_back("./");
// search_paths.push_back(get_executable_path());
// } else {
// #if defined(_WIN32)
// search_paths.push_back(std::string(user_search_path) + "\\");
// #else
// search_paths.push_back(std::string(user_search_path) + "/");
// #endif
// }

// int best_score = 0;
// std::string best_path;

// namespace fs = std::filesystem;
// for (const auto & search_path : search_paths) {
// if (!fs::exists(search_path)) {
// continue;
// }
// for (const auto & entry : fs::directory_iterator(search_path)) {
// if (entry.is_regular_file()) {
// std::string filename = entry.path().filename().string();
// std::string ext = entry.path().extension().string();
// if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
// dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
// if (!handle && !silent) {
// GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
// }
// if (handle) {
// auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
// if (score_fn) {
// int s = score_fn();
// #ifndef NDEBUG
// GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
// #endif
// if (s > best_score) {
// best_score = s;
// best_path = entry.path().string();
// }
// } else {
// if (!silent) {
// GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
// }
// }
// }
// }
// }
// }
// }

// if (best_score == 0) {
// // try to load the base backend
// for (const auto & search_path : search_paths) {
// std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
// if (fs::exists(path)) {
// return get_reg().load_backend(path.c_str(), silent);
// }
// }
// return nullptr;
// }

// return get_reg().load_backend(best_path.c_str(), silent);
return get_reg().load_backend(best_path.c_str(), silent);
}

void ggml_backend_load_all() {
Expand Down
13 changes: 2 additions & 11 deletions ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3838,9 +3838,7 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
GGML_UNUSED(data_size);
}

namespace ggml {
namespace cpu {
namespace aarch64 { //ggml::cpu::aarch64
namespace ggml::cpu::aarch64 {
// repack
template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>
int repack(struct ggml_tensor *, const void *, size_t);
Expand Down Expand Up @@ -4156,8 +4154,6 @@ static const tensor_traits<block_q4_0, 8, 8> q4_0_8x8_q8_0;
// instance for IQ4
static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;

}
}
} // namespace ggml::cpu::aarch64

static void flag_aarch_prepacked_quant(int type)
Expand Down Expand Up @@ -4260,9 +4256,7 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
GGML_UNUSED(buft);
}

namespace ggml {
namespace cpu {
namespace aarch64 { //ggml::cpu::aarch64
namespace ggml::cpu::aarch64 {
class extra_buffer_type : ggml::cpu::extra_buffer_type {
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
if ( op->op == GGML_OP_MUL_MAT &&
Expand Down Expand Up @@ -4309,9 +4303,6 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
return nullptr;
}
};

}
}
} // namespace ggml::cpu::aarch64

ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
Expand Down
4 changes: 1 addition & 3 deletions ggml/src/ggml-cpu/ggml-cpu-traits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
#include "ggml-backend-impl.h"
#include "ggml-backend.h"

namespace ggml {
namespace cpu {
namespace ggml::cpu {
tensor_traits::~tensor_traits() {}

extra_buffer_type::~extra_buffer_type() {}
}
} // namespace ggml::cpu

bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
Expand Down
4 changes: 1 addition & 3 deletions ggml/src/ggml-cpu/ggml-cpu-traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size
#ifdef __cplusplus
}

namespace ggml {
namespace cpu {
namespace ggml::cpu {
// register in tensor->extra
class tensor_traits {
public:
Expand All @@ -31,7 +30,6 @@ class extra_buffer_type {
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
};
}
} // namespace ggml::cpu

// implemented in ggml-cpu.cpp.
Expand Down
26 changes: 15 additions & 11 deletions ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,22 @@ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::s
}

static std::vector<std::future<void>> compiles;
// void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
// {
// // wait until fewer than N compiles are in progress.
// // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
// uint32_t N = 16;
// std::unique_lock<std::mutex> guard(compile_count_mutex);
// while (compile_count >= N) {
// compile_count_cond.wait(guard);
// }
// compile_count++;
// }
// compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
// }
void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
{
// wait until fewer than N compiles are in progress.
// 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
uint32_t N = 16;
std::unique_lock<std::mutex> guard(compile_count_mutex);
while (compile_count >= N) {
compile_count_cond.wait(guard);
}
compile_count++;
}
compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
std::cout << "string_to_spv: " << _name << "\n";
string_to_spv_func(_name, in_fname, defines, fp16, coopmat, coopmat2, f16acc); //non async version
}

void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) {
Expand Down

0 comments on commit a63c2c9

Please sign in to comment.