From 8222c247e49986186c7aefe7451ff899bf02ec7e Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 25 Oct 2024 12:23:38 -0700 Subject: [PATCH 1/3] Add rpath to libcustom_ops_aot_lib.dylib and libquantized_ops_aot_lib.dylib Summary: As titled. This issue is from https://github.com/pytorch/torchchat/actions/runs/11523122333/job/32080481174?pr=1312 In that job when we try to load `libcustom_ops_aot_lib.dylib` into python, it complains that it can't find `_portable_lib.cpython-310-darwin.so`. This PR is trying to fix it by adding the relative path to `_portable_lib.cpython-310-darwin.so` into `LC_RPATH`. Test Plan: Reviewers: Subscribers: Tasks: Tags: --- extension/llm/custom_ops/CMakeLists.txt | 28 +++++++++++++++++++++---- kernels/quantized/CMakeLists.txt | 22 +++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index d42e37f9bd..36b03a480f 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -59,9 +59,7 @@ target_include_directories(custom_ops PUBLIC "${_common_include_directories}") target_include_directories( custom_ops PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../../include" ) -target_link_libraries( - custom_ops PUBLIC ${custom_ops_libs} executorch_core -) +target_link_libraries(custom_ops PUBLIC ${custom_ops_libs} executorch_core) target_compile_options( custom_ops PUBLIC ${_common_compile_options} -DET_USE_THREADPOOL @@ -74,7 +72,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) find_package(Torch CONFIG REQUIRED) add_library( custom_ops_aot_lib SHARED - ${_custom_ops__srcs} ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp + ${_custom_ops__srcs} + ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp ${CMAKE_CURRENT_SOURCE_DIR}/op_fast_hadamard_transform_aten.cpp ${CMAKE_CURRENT_SOURCE_DIR}/op_tile_crop.cpp ${CMAKE_CURRENT_SOURCE_DIR}/op_tile_crop_aot.cpp @@ -110,5 +109,26 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) ${_common_compile_options} -DET_USE_THREADPOOL ) + # pip wheels will need to be able to find the dependent libraries. On Linux, + # the .so has non-absolute dependencies on libs like "_portable_lib.so" + # without paths; as long as we `import torch` first, those dependencies will + # work. But Apple dylibs do not support non-absolute dependencies, so we need + # to tell the loader where to look for its libraries. The LC_LOAD_DYLIB + # entries for the portable_lib libraries will look like + # "@rpath/_portable_lib.cpython-310-darwin.so", so we can add an LC_RPATH + # entry to look in a directory relative to the installed location of our + # _portable_lib.so file. To see these LC_* values, run `otool -l + # libcustom_ops_aot_lib.dylib`. + if(APPLE) + set_target_properties( + custom_ops_aot_lib + PROPERTIES # Assume this library will be installed in + # /executorch/extension/llm/custom_ops/, and the + # _portable_lib.so is installed in + # /executorch/extension/pybindings/ + BUILD_RPATH "@loader_path/../../pybindings" + INSTALL_RPATH "@loader_path/../../pybindings" + ) + endif() install(TARGETS custom_ops_aot_lib DESTINATION lib) endif() diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt index f073835c93..96305f638e 100644 --- a/kernels/quantized/CMakeLists.txt +++ b/kernels/quantized/CMakeLists.txt @@ -114,6 +114,28 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode" target_link_libraries( quantized_ops_aot_lib PUBLIC quantized_ops_pybind_lib ) + + # pip wheels will need to be able to find the dependent libraries. On + # Linux, the .so has non-absolute dependencies on libs like + # "_portable_lib.so" without paths; as long as we `import torch` first, + # those dependencies will work. But Apple dylibs do not support + # non-absolute dependencies, so we need to tell the loader where to look + # for its libraries. The LC_LOAD_DYLIB entries for the portable_lib + # libraries will look like "@rpath/_portable_lib.cpython-310-darwin.so", + # so we can add an LC_RPATH entry to look in a directory relative to the + # installed location of our _portable_lib.so file. To see these LC_* + # values, run `otool -l libquantized_ops_lib.dylib`. + if(APPLE) + set_target_properties( + quantized_ops_lib + PROPERTIES # Assume this library will be installed in + # /executorch/kernels/quantized/, and the + # _portable_lib.so is installed in + # /executorch/extension/pybindings/ + BUILD_RPATH "@loader_path/../../extensions/pybindings" + INSTALL_RPATH "@loader_path/../../extensions/pybindings" + ) + endif() endif() endif() endif() From 08f4d84709d25f0b29766922bd9d6bf76e360804 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 25 Oct 2024 15:59:33 -0700 Subject: [PATCH 2/3] Add smoke test --- build/packaging/smoke_test.py | 17 +++++++++++++++++ kernels/quantized/CMakeLists.txt | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/build/packaging/smoke_test.py b/build/packaging/smoke_test.py index be53ae5a37..1c640fb7c5 100644 --- a/build/packaging/smoke_test.py +++ b/build/packaging/smoke_test.py @@ -15,6 +15,14 @@ # will fail and the process will exit. from executorch.extension.pybindings import portable_lib # usort: skip +# Import custom ops. This requires portable_lib to be loaded first. +from executorch.extension.llm.custom_ops import ( + sdpa_with_kv_cache, +) # usort: skip # noqa: F401, F403 + +# Import quantized ops. This requires portable_lib to be loaded first. +from executorch.kernels import quantized # usort: skip # noqa: F401, F403 + # Import this after importing the ExecuTorch pybindings. If the pybindings # links against a different torch.so than this uses, there will be a set of # symbol comflicts; the process will either exit now, or there will be issues @@ -75,6 +83,15 @@ def main(): assert len(ops) > 0, "Empty operator list" print(f"Found {len(ops)} operators; first element '{ops[0]}'") + # Make sure custom ops are registered. + assert ( + "llama::sdpa_with_kv_cache" in ops + ), f"sdpa_with_kv_cache not registered, Got ops: {ops}" + + # Make sure quantized ops are registered. + assert ( + "quantized_decomposed::add.out" in ops + ), f"quantized_decomposed::add.out not registered, Got ops: {ops}" # Export LinearModel to .pte data. pte_data: bytes = export_linear_model() diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt index 96305f638e..9d2b14d8eb 100644 --- a/kernels/quantized/CMakeLists.txt +++ b/kernels/quantized/CMakeLists.txt @@ -127,7 +127,7 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode" # values, run `otool -l libquantized_ops_lib.dylib`. if(APPLE) set_target_properties( - quantized_ops_lib + quantized_ops_aot_lib PROPERTIES # Assume this library will be installed in # /executorch/kernels/quantized/, and the # _portable_lib.so is installed in From 9113c47c0b71193248033524b7b5274596b77f98 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 25 Oct 2024 16:21:23 -0700 Subject: [PATCH 3/3] Lint --- build/packaging/smoke_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/packaging/smoke_test.py b/build/packaging/smoke_test.py index 1c640fb7c5..59778b50d0 100644 --- a/build/packaging/smoke_test.py +++ b/build/packaging/smoke_test.py @@ -16,9 +16,9 @@ from executorch.extension.pybindings import portable_lib # usort: skip # Import custom ops. This requires portable_lib to be loaded first. -from executorch.extension.llm.custom_ops import ( +from executorch.extension.llm.custom_ops import ( # noqa: F401, F403 sdpa_with_kv_cache, -) # usort: skip # noqa: F401, F403 +) # usort: skip # Import quantized ops. This requires portable_lib to be loaded first. from executorch.kernels import quantized # usort: skip # noqa: F401, F403