diff --git a/build/packaging/smoke_test.py b/build/packaging/smoke_test.py
index be53ae5a37..59778b50d0 100644
--- a/build/packaging/smoke_test.py
+++ b/build/packaging/smoke_test.py
@@ -15,6 +15,14 @@
 # will fail and the process will exit.
 from executorch.extension.pybindings import portable_lib  # usort: skip
 
+# Import custom ops. This requires portable_lib to be loaded first.
+from executorch.extension.llm.custom_ops import (  # noqa: F401, F403
+    sdpa_with_kv_cache,
+)  # usort: skip
+
+# Import quantized ops. This requires portable_lib to be loaded first.
+from executorch.kernels import quantized  # usort: skip # noqa: F401, F403
+
 # Import this after importing the ExecuTorch pybindings. If the pybindings
 # links against a different torch.so than this uses, there will be a set of
 # symbol comflicts; the process will either exit now, or there will be issues
@@ -75,6 +83,15 @@ def main():
     assert len(ops) > 0, "Empty operator list"
     print(f"Found {len(ops)} operators; first element '{ops[0]}'")
 
+    # Make sure custom ops are registered.
+    assert (
+        "llama::sdpa_with_kv_cache" in ops
+    ), f"sdpa_with_kv_cache not registered, Got ops: {ops}"
+
+    # Make sure quantized ops are registered.
+    assert (
+        "quantized_decomposed::add.out" in ops
+    ), f"quantized_decomposed::add.out not registered, Got ops: {ops}"
     # Export LinearModel to .pte data.
     pte_data: bytes = export_linear_model()
 
diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt
index d42e37f9bd..36b03a480f 100644
--- a/extension/llm/custom_ops/CMakeLists.txt
+++ b/extension/llm/custom_ops/CMakeLists.txt
@@ -59,9 +59,7 @@ target_include_directories(custom_ops PUBLIC "${_common_include_directories}")
 target_include_directories(
   custom_ops PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../../include"
 )
-target_link_libraries(
-  custom_ops PUBLIC ${custom_ops_libs} executorch_core
-)
+target_link_libraries(custom_ops PUBLIC ${custom_ops_libs} executorch_core)
 
 target_compile_options(
   custom_ops PUBLIC ${_common_compile_options} -DET_USE_THREADPOOL
@@ -74,7 +72,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
   find_package(Torch CONFIG REQUIRED)
   add_library(
     custom_ops_aot_lib SHARED
-    ${_custom_ops__srcs} ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp
+    ${_custom_ops__srcs}
+    ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/op_fast_hadamard_transform_aten.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/op_tile_crop.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/op_tile_crop_aot.cpp
@@ -110,5 +109,26 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
            ${_common_compile_options} -DET_USE_THREADPOOL
   )
 
+  # pip wheels will need to be able to find the dependent libraries. On Linux,
+  # the .so has non-absolute dependencies on libs like "_portable_lib.so"
+  # without paths; as long as we `import torch` first, those dependencies will
+  # work. But Apple dylibs do not support non-absolute dependencies, so we need
+  # to tell the loader where to look for its libraries. The LC_LOAD_DYLIB
+  # entries for the portable_lib libraries will look like
+  # "@rpath/_portable_lib.cpython-310-darwin.so", so we can add an LC_RPATH
+  # entry to look in a directory relative to the installed location of our
+  # _portable_lib.so file. To see these LC_* values, run `otool -l
+  # libcustom_ops_aot_lib.dylib`.
+  if(APPLE)
+    set_target_properties(
+      custom_ops_aot_lib
+      PROPERTIES # Assume this library will be installed in
+                 # <site-packages>/executorch/extension/llm/custom_ops/, and the
+                 # _portable_lib.so is installed in
+                 # <site-packages>/executorch/extension/pybindings/
+                 BUILD_RPATH "@loader_path/../../pybindings"
+                 INSTALL_RPATH "@loader_path/../../pybindings"
+    )
+  endif()
   install(TARGETS custom_ops_aot_lib DESTINATION lib)
 endif()
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
index f073835c93..9d2b14d8eb 100644
--- a/kernels/quantized/CMakeLists.txt
+++ b/kernels/quantized/CMakeLists.txt
@@ -114,6 +114,28 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode"
       target_link_libraries(
         quantized_ops_aot_lib PUBLIC quantized_ops_pybind_lib
       )
+
+      # pip wheels will need to be able to find the dependent libraries. On
+      # Linux, the .so has non-absolute dependencies on libs like
+      # "_portable_lib.so" without paths; as long as we `import torch` first,
+      # those dependencies will work. But Apple dylibs do not support
+      # non-absolute dependencies, so we need to tell the loader where to look
+      # for its libraries. The LC_LOAD_DYLIB entries for the portable_lib
+      # libraries will look like "@rpath/_portable_lib.cpython-310-darwin.so",
+      # so we can add an LC_RPATH entry to look in a directory relative to the
+      # installed location of our _portable_lib.so file. To see these LC_*
+      # values, run `otool -l libquantized_ops_lib.dylib`.
+      if(APPLE)
+        set_target_properties(
+          quantized_ops_aot_lib
+          PROPERTIES # Assume this library will be installed in
+                     # <site-packages>/executorch/kernels/quantized/, and the
+                     # _portable_lib.so is installed in
+                     # <site-packages>/executorch/extension/pybindings/
+                     BUILD_RPATH "@loader_path/../../extensions/pybindings"
+                     INSTALL_RPATH "@loader_path/../../extensions/pybindings"
+        )
+      endif()
     endif()
   endif()
 endif()