update

snnn · Dec 8, 2023 · 365a067 · 365a067
1 parent c7799d7
commit 365a067
Show file tree

Hide file tree

Showing 12 changed files with 185 additions and 46 deletions.
diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake
@@ -74,11 +74,6 @@ if (onnxruntime_MINIMAL_BUILD)
   endif()
 
   if (MSVC)
-    # turn on LTO (which adds some compiler flags and turns on LTCG) unless it's a Debug build to minimize binary size
-    if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-      set(onnxruntime_ENABLE_LTO ON)
-    endif()
-
     # undocumented internal flag to allow analysis of a minimal build binary size
     if (ADD_DEBUG_INFO_TO_MINIMAL_BUILD)
       string(APPEND CMAKE_CXX_FLAGS " /Zi")
@@ -272,11 +267,6 @@ if (MSVC)
     message("Building ONNX Runtime for Windows 10 and newer")
     add_compile_definitions(WINVER=0x0A00 _WIN32_WINNT=0x0A00 NTDDI_VERSION=0x0A000000)
   endif()
-  if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA)
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL")
-    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL")
-    set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL")
-  endif()
 
   # The WinML build tool chain builds ARM/ARM64, and the internal tool chain does not have folders for spectre mitigation libs.
   # WinML performs spectre mitigation differently.

diff --git a/onnxruntime/test/framework/bfc_arena_test.cc b/onnxruntime/test/framework/bfc_arena_test.cc
@@ -164,7 +164,11 @@ void TestCustomMemoryLimit_ProcessException(const OnnxRuntimeException& ex) {
 #endif  // #ifdef GTEST_USES_POSIX_RE
 }
 
+#ifdef __SANITIZE_ADDRESS__
+TEST(BFCArenaTest, DISABLED_TestCustomMemoryLimit) {
+#else
 TEST(BFCArenaTest, TestCustomMemoryLimit) {
+#endif
   {
     // Configure a 1MiB byte limit
     BFCArena a(std::unique_ptr<IAllocator>(new CPUAllocator()), 1 << 20);

diff --git a/onnxruntime/test/logging_apis/test_logging_apis.cc b/onnxruntime/test/logging_apis/test_logging_apis.cc
@@ -167,7 +167,13 @@ TEST_F(RealCAPITestsFixture, CApiLoggerLogMessage) {
                                                     ORT_FILE, line_num, static_cast<const char*>(__FUNCTION__)));
 }
 
+// The code below where it tests for formatting error generates an out-of-bound memory access. Therefore we disable it 
+// when memory sanitizer is enabled.
+#if defined(__SANITIZE_ADDRESS__)
+TEST_F(RealCAPITestsFixture, DISABLED_CppApiORTCXXLOG) {
+#else
 TEST_F(RealCAPITestsFixture, CppApiORTCXXLOG) {
+#endif
   // Tests the output and filtering of the ORT_CXX_LOG and ORT_CXX_LOG_NOEXCEPT macros in the C++ API.
   // The first two calls go through, but the last two calls are filtered out due to an insufficient severity.
 
@@ -203,7 +209,11 @@ TEST_F(RealCAPITestsFixture, CppApiORTCXXLOG) {
   ORT_CXX_LOG_NOEXCEPT(cpp_ort_logger, OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO, "Ignored2");
 }
 
+#if defined(__SANITIZE_ADDRESS__)
+TEST_F(RealCAPITestsFixture, DISABLED_CppApiORTCXXLOGF) {
+#else
 TEST_F(RealCAPITestsFixture, CppApiORTCXXLOGF) {
+#endif
   // Tests the output and filtering of the ORT_CXX_LOGF and ORT_CXX_LOGF_NOEXCEPT macros in the C++ API.
   // The first set of calls go through. The next set of calls are filtered out due to an insufficient severity.
   // The last calls have a formatting error and we expect an exception depending on which macro is used.

diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
@@ -600,6 +600,11 @@ def convert_arg_line_to_args(self, arg_line):
         "--use_telemetry", action="store_true", help="Only official builds can set this flag to enable telemetry."
     )
     parser.add_argument("--enable_wcos", action="store_true", help="Build for Windows Core OS.")
+    # Do not enable LTO when the compiler is MSVC and the flag for generating debug symbols is set to /Z7 and training
+    # is also enabled. Because both LTO and /Zi could significantly increase *.obj/*.lib files' size, and on Windows
+    # there is a 4GB per file limit(ERROR LNK1248). We may solve the issue by splitting the big static libs to smaller
+    # ones. Before the refactoring work is done, we should avoid enabling LTO and ccache at the same time because ccache
+    # needs /Z7.
     parser.add_argument("--enable_lto", action="store_true", help="Enable Link Time Optimization")
     parser.add_argument("--enable_transformers_tool_test", action="store_true", help="Enable transformers tool test")
     parser.add_argument(
@@ -1405,6 +1410,17 @@ def generate_build_tree(
     if args.use_lock_free_queue:
         add_default_definition(cmake_extra_defines, "onnxruntime_USE_LOCK_FREE_QUEUE", "ON")
 
+    if is_windows():
+        if args.use_cache:
+            add_default_definition(
+                cmake_extra_defines, "CMAKE_MSVC_DEBUG_INFORMATION_FORMAT", "$<$<CONFIG:Debug,RelWithDebInfo>:Embedded>"
+            )
+        else:
+            # Always enable debug info even in release build. The debug information is in separated *.pdb files that
+            # can be easily discarded when debug symbols are not needed. We enable it by default because many auditting
+            # tools need to use the symbols.
+            add_default_definition(cmake_extra_defines, "CMAKE_MSVC_DEBUG_INFORMATION_FORMAT", "ProgramDatabase")
+
     cmake_args += [f"-D{define}" for define in cmake_extra_defines]
 
     cmake_args += cmake_extra_args
@@ -1444,8 +1460,68 @@ def generate_build_tree(
                     f"-DVERSION_PRIVATE_PART={MM}{DD}",
                     f"-DVERSION_STRING={ort_major}.{ort_minor}.{build_number}.{source_version[0:7]}",
                 ]
-
+    cflags = None
+    cxxflags = None
     for config in configs:
+        if "CFLAGS" not in os.environ and "CXXFLAGS" not in os.environ:
+            if is_windows():
+                cflags = [
+                    "/MP",
+                    "/guard:cf",
+                    "/DWIN32",
+                    "/D_WINDOWS",
+                    "/DWINVER=0x0A00",
+                    "/D_WIN32_WINNT=0x0A00",
+                    "/DNTDDI_VERSION=0x0A000000",
+                ]
+                if config == "Release":
+                    cflags += ["/O2", "/Ob2", "/DNDEBUG"]
+                elif config == "RelWithDebInfo":
+                    cflags += ["/O2", "/Ob1", "/DNDEBUG"]
+                elif config == "Debug":
+                    cflags += ["/Ob0", "/Od", "/RTC1", "/fsanitize=address"]
+                    if platform.architecture()[0] != "32bit":
+                        cflags += ["/fsanitize=address"]
+                elif config == "MinSizeRel":
+                    cflags += ["/O1", "/Ob1", "/DNDEBUG"]
+                if args.enable_lto:
+                    cflags += ["/Gw", "/GL"]
+                cxxflags = cflags.copy()
+                cxxflags += ["/EHsc"]
+            elif is_linux() or is_macOS():
+                if config == "Release":
+                    cflags = [
+                        "-DNDEBUG",
+                        "-Wp,-D_FORTIFY_SOURCE=2",
+                        "-Wp,-D_GLIBCXX_ASSERTIONS",
+                        "-fstack-protector-strong",
+                        "-O3",
+                        "-pipe",
+                        "-Wl,--strip-all",
+                    ]
+                elif config == "RelWithDebInfo":
+                    cflags = [
+                        "-DNDEBUG",
+                        "-Wp,-D_FORTIFY_SOURCE=2",
+                        "-Wp,-D_GLIBCXX_ASSERTIONS",
+                        "-fstack-protector-strong",
+                        "-O3",
+                        "-pipe",
+                        "-ggdb3",
+                    ]
+                elif config == "Debug":
+                    cflags = ["-ggdb3", "-O0"]
+                elif config == "MinSizeRel":
+                    cflags = [
+                        "-DNDEBUG",
+                        "-Wp,-D_FORTIFY_SOURCE=2",
+                        "-Wp,-D_GLIBCXX_ASSERTIONS",
+                        "-fstack-protector-strong",
+                        "-Os",
+                        "-pipe",
+                        "-ggdb3",
+                    ]
+
         config_build_dir = get_config_build_dir(build_dir, config)
         os.makedirs(config_build_dir, exist_ok=True)
         if args.use_tvm:
@@ -1459,20 +1535,15 @@ def generate_build_tree(
                 + os.environ["PATH"]
             )
         preinstalled_dir = Path(build_dir) / config
+        temp_cmake_args = cmake_args.copy()
+        if cflags is not None and cxxflags is not None:
+            temp_cmake_args += [
+                "-DCMAKE_C_FLAGS=%s" % (" ".join(cflags)),
+                "-DCMAKE_CXX_FLAGS=%s" % (" ".join(cxxflags)),
+            ]
         run_subprocess(
             [
-                *cmake_args,
-                "-Donnxruntime_ENABLE_MEMLEAK_CHECKER="
-                + (
-                    "ON"
-                    if config.lower() == "debug"
-                    and not args.use_tvm
-                    and not args.use_openvino
-                    and not args.use_gdk
-                    and not args.enable_msvc_static_runtime
-                    and not args.disable_memleak_checker
-                    else "OFF"
-                ),
+                *temp_cmake_args,
                 f"-DCMAKE_BUILD_TYPE={config}",
                 f"-DCMAKE_PREFIX_PATH={build_dir}/{config}/installed"
                 if preinstalled_dir.exists() and not (args.arm64 or args.arm64ec or args.arm)

diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml
@@ -30,6 +30,41 @@ steps:
     addToPath: true
     architecture: ${{parameters.BuildArch}}
 
+- ${{ if eq(parameters.BuildArch, 'x64') }}:
+  - script: |
+      @echo off
+      set vswherepath="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
+      for /f "usebackq delims=" %%i in (`%vswherepath% -latest -property installationPath`) do (
+        if exist "%%i\VC\Auxiliary\Build\vcvars64.bat" (
+          set vcvarsall="%%i\VC\Auxiliary\Build\vcvars64.bat"
+        )
+      )
+
+      @echo %vcvarsall% will be used as the VC compiler
+      @echo ##vso[task.setvariable variable=vcvarsall]%vcvarsall%
+    displayName: 'locate vcvarsall via vswhere'
+
+- ${{ if eq(parameters.BuildArch, 'x86') }}:
+  - script: |
+      @echo off
+      set vswherepath="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
+      for /f "usebackq delims=" %%i in (`%vswherepath% -latest -property installationPath`) do (
+        if exist "%%i\VC\Auxiliary\Build\vcvars32.bat" (
+          set vcvarsall="%%i\VC\Auxiliary\Build\vcvars32.bat"
+        )
+      )
+
+      @echo %vcvarsall% will be used as the VC compiler
+      @echo ##vso[task.setvariable variable=vcvarsall]%vcvarsall%
+    displayName: 'locate vcvarsall via vswhere'
+
+- task: BatchScript@1
+  displayName: 'Setup VC env'
+  inputs:
+    filename: '$(vcvarsall)'
+    modifyEnvironment: true
+    workingFolder: '$(Build.BinariesDirectory)'
+
 - script: |
     python -m pip install --upgrade "setuptools>=68.2.2" wheel numpy flatbuffers
   workingDirectory: '$(Build.BinariesDirectory)'

diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -49,7 +49,7 @@ stages:
         isTraining: false
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
-        WITH_CACHE: true
+        WITH_CACHE: false
         MachinePool: 'onnxruntime-Win-CPU-2022'
 
 - stage: x64_release
@@ -129,7 +129,7 @@ stages:
         GenerateDocumentation: false
         WITH_CACHE: false
         MachinePool: 'onnxruntime-Win-CPU-2022'
-
+        
 - stage: x86_release
   dependsOn: []
   jobs:
@@ -164,7 +164,7 @@ stages:
         isTraining: true
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
-        WITH_CACHE: true
+        WITH_CACHE: false
         MachinePool: 'onnxruntime-Win2022-CPU-training-AMD'
 
 - stage: training_x64_release
@@ -182,7 +182,7 @@ stages:
         isTraining: true
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
-        WITH_CACHE: true
+        WITH_CACHE: false
         MachinePool: 'onnxruntime-Win2022-CPU-training-AMD'
 
 - stage: ort_training_apis_x64_release

diff --git a/tools/ci_build/github/linux/build_cuda_c_api_package.sh b/tools/ci_build/github/linux/build_cuda_c_api_package.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e -x
-export CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
-export CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
+export CFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
+export CXXFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
 docker run --gpus all -e CFLAGS -e CXXFLAGS  -e NVIDIA_VISIBLE_DEVICES=all --rm --volume \
 $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
 --volume /data/models:/build/models:ro --volume /data/onnx:/data/onnx:ro -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \

diff --git a/tools/ci_build/github/linux/build_linux_python_package.sh b/tools/ci_build/github/linux/build_linux_python_package.sh
@@ -29,8 +29,8 @@ if [ "$BUILD_CONFIG" == "Debug" ]; then
     CFLAGS="-ggdb3"
     CXXFLAGS="-ggdb3"
 else
-    CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
-    CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
+    CFLAGS="-DNDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
+    CXXFLAGS="-DNDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
     BUILD_ARGS+=("--enable_lto")
 fi
 

diff --git a/tools/ci_build/github/linux/build_rocm_c_api_package.sh b/tools/ci_build/github/linux/build_rocm_c_api_package.sh
@@ -24,8 +24,8 @@ docker run --rm \
   --security-opt seccomp=unconfined \
   --shm-size=1024m \
   --user $UID:$(id -g $USER) \
-  -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
-  -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
+  -e CFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
+  -e CXXFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
   -e NIGHTLY_BUILD \
   --volume $SOURCE_DIR:/onnxruntime_src \
   --volume $BINARY_DIR:/build \

diff --git a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e -x
-export CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
-export CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
+export CFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
+export CXXFLAGS="-NDEBUG -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
 mkdir -p $HOME/.onnx
 docker run --gpus all -e CFLAGS -e CXXFLAGS -e NVIDIA_VISIBLE_DEVICES=all --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
 --volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \