From 3cb8a528a5c474c3435b9ff87848b52f96770990 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Mon, 20 Nov 2023 17:25:58 +0900 Subject: [PATCH] Make `-z rewrite-endbr` to work without -ffunction-sections Now we can rewrite an endbr64 instruction even if it is not at the beginning of a section. --- elf/input-sections.cc | 12 ---------- elf/main.cc | 3 +++ elf/mold.h | 4 ++++ elf/passes.cc | 46 +++++++++++++++++++++++++++++++++++++++ test/elf/x86_64_endbr2.sh | 28 ++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 12 deletions(-) create mode 100755 test/elf/x86_64_endbr2.sh diff --git a/elf/input-sections.cc b/elf/input-sections.cc index 70187ef392..2ed9aa8333 100644 --- a/elf/input-sections.cc +++ b/elf/input-sections.cc @@ -457,18 +457,6 @@ void InputSection::write_to(Context &ctx, u8 *buf) { apply_reloc_alloc(ctx, buf); else apply_reloc_nonalloc(ctx, buf); - - if constexpr (is_x86_64) { - u8 endbr[] = {0xf3, 0x0f, 0x1e, 0xfa}; - u8 nop[] = {0x0f, 0x1f, 0x40, 0x00}; - - // Rewrite the leading endbr instruction with a nop if the section - // is not address-taken. - if (ctx.arg.z_rewrite_endbr && (shdr().sh_flags & SHF_EXECINSTR) && - !address_taken && sh_size >= 4 && memcmp(buf, endbr, 4) == 0) { - memcpy(buf, nop, 4); - } - } } } diff --git a/elf/main.cc b/elf/main.cc index 6eacf407a2..baf2157926 100644 --- a/elf/main.cc +++ b/elf/main.cc @@ -631,6 +631,9 @@ int elf_main(int argc, char **argv) { // Copy input sections to the output file and apply relocations. copy_chunks(ctx); + if (ctx.arg.z_rewrite_endbr) + rewrite_endbr(ctx); + // Dynamic linker works better with sorted .rela.dyn section, // so we sort them. ctx.reldyn->sort(ctx); diff --git a/elf/mold.h b/elf/mold.h index 54a420e4e8..9c97a2caa7 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -1390,6 +1390,7 @@ template void create_output_symtab(Context &); template void report_undef_errors(Context &); template void create_reloc_sections(Context &); template void copy_chunks(Context &); +template void rewrite_endbr(Context &); template void apply_version_script(Context &); template void parse_symbol_version(Context &); template void compute_import_export(Context &); @@ -2163,6 +2164,9 @@ class Symbol { // opposed to IR object). bool referenced_by_regular_obj : 1 = false; + // For `-z rewrite-endbr` + bool address_taken : 1 = false; + // Target-dependent extra members. [[no_unique_address]] SymbolExtras extra; }; diff --git a/elf/passes.cc b/elf/passes.cc index e51acfa9cd..c143ce7dd8 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -1566,6 +1566,51 @@ void copy_chunks(Context &ctx) { fixup_arm_exidx_section(ctx); } +// Rewrite the leading endbr64 instruction with a nop if a function +// symbol's address was not taken. +template +void rewrite_endbr(Context &ctx) { + if constexpr (is_x86_64) { + // Compute address-taken bit for each symbol + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) { + for (const ElfRel &rel : isec->get_rels(ctx)) { + Symbol &sym = *file->symbols[rel.r_sym]; + if (!is_func_call_rel(rel) && sym.esym().st_type == STT_FUNC) { + std::scoped_lock lock(sym.mu); + sym.address_taken = true; + } + } + } + } + }); + + // Some symbols are implicitly address-taken + get_symbol(ctx, ctx.arg.entry)->address_taken = true; + get_symbol(ctx, ctx.arg.init)->address_taken = true; + get_symbol(ctx, ctx.arg.fini)->address_taken = true; + + // Rewrite endbr64 with nop + u8 endbr[] = {0xf3, 0x0f, 0x1e, 0xfa}; + u8 nop[] = {0x0f, 0x1f, 0x40, 0x00}; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) { + if (sym->file == file && sym->esym().st_type == STT_FUNC && + !sym->address_taken) { + if (InputSection *isec = sym->get_input_section()) { + u8 *buf = ctx.buf + isec->output_section->shdr.sh_offset + + isec->offset + sym->value; + if (memcmp(buf, endbr, 4) == 0) + memcpy(buf, nop, 4); + } + } + } + }); + } +} + template void construct_relr(Context &ctx) { Timer t(ctx, "construct_relr"); @@ -2883,6 +2928,7 @@ template void scan_relocations(Context &); template void report_undef_errors(Context &); template void create_reloc_sections(Context &); template void copy_chunks(Context &); +template void rewrite_endbr(Context &); template void construct_relr(Context &); template void create_output_symtab(Context &); template void apply_version_script(Context &); diff --git a/test/elf/x86_64_endbr2.sh b/test/elf/x86_64_endbr2.sh new file mode 100755 index 0000000000..69c793f72c --- /dev/null +++ b/test/elf/x86_64_endbr2.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip +test_cflags -fcf-protection || skip + +cat < $t/log1 + +grep -A1 ':' $t/log1 | grep -q endbr64 +grep -A1 ':' $t/log1 | grep -q endbr64 +grep -A1 '
:' $t/log1 | grep -q endbr64 + +$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,-z,rewrite-endbr +$OBJDUMP -dr $t/exe2 > $t/log2 + +grep -A1 ':' $t/log2 | grep -q nop +grep -A1 ':' $t/log2 | grep -q nop +grep -A1 '
:' $t/log2 | grep -q endbr64