kbuild: add support for Clang LTO
This change adds build system support for Clang's Link Time Optimization (LTO). With -flto, instead of ELF object files, Clang produces LLVM bitcode, which is compiled into native code at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html The Kconfig option CONFIG_LTO_CLANG is implemented as a choice, which defaults to LTO being disabled. To use LTO, the architecture must select ARCH_SUPPORTS_LTO_CLANG and support: - compiling with Clang, - compiling all assembly code with Clang's integrated assembler, - and linking with LLD. While using CONFIG_LTO_CLANG_FULL results in the best runtime performance, the compilation is not scalable in time or memory. CONFIG_LTO_CLANG_THIN enables ThinLTO, which allows parallel optimization and faster incremental builds. ThinLTO is used by default if the architecture also selects ARCH_SUPPORTS_LTO_CLANG_THIN: https://clang.llvm.org/docs/ThinLTO.html To enable LTO, LLVM tools must be used to handle bitcode files, by passing LLVM=1 and LLVM_IAS=1 options to make: $ make LLVM=1 LLVM_IAS=1 defconfig $ scripts/config -e LTO_CLANG_THIN $ make LLVM=1 LLVM_IAS=1 To prepare for LTO support with other compilers, common parts are gated behind the CONFIG_LTO option, and LTO can be disabled for specific files by filtering out CC_FLAGS_LTO. Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/r/20201211184633.3213045-3-samitolvanen@google.com
This commit is contained in:
parent
3b15cdc159
commit
dc5723b02e
19
Makefile
19
Makefile
|
@ -893,6 +893,21 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
|
|||
export CC_FLAGS_SCS
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
ifdef CONFIG_LTO_CLANG_THIN
|
||||
CC_FLAGS_LTO += -flto=thin -fsplit-lto-unit
|
||||
KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache
|
||||
else
|
||||
CC_FLAGS_LTO += -flto
|
||||
endif
|
||||
CC_FLAGS_LTO += -fvisibility=hidden
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO
|
||||
KBUILD_CFLAGS += $(CC_FLAGS_LTO)
|
||||
export CC_FLAGS_LTO
|
||||
endif
|
||||
|
||||
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B
|
||||
KBUILD_CFLAGS += -falign-functions=32
|
||||
endif
|
||||
|
@ -1479,7 +1494,7 @@ MRPROPER_FILES += include/config include/generated \
|
|||
*.spec
|
||||
|
||||
# Directories & files removed with 'make distclean'
|
||||
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
|
||||
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache
|
||||
|
||||
# clean - Delete most, but leave enough to build external modules
|
||||
#
|
||||
|
@ -1725,7 +1740,7 @@ PHONY += compile_commands.json
|
|||
|
||||
clean-dirs := $(KBUILD_EXTMOD)
|
||||
clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \
|
||||
$(KBUILD_EXTMOD)/compile_commands.json
|
||||
$(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache
|
||||
|
||||
PHONY += help
|
||||
help:
|
||||
|
|
91
arch/Kconfig
91
arch/Kconfig
|
@ -631,6 +631,97 @@ config SHADOW_CALL_STACK
|
|||
reading and writing arbitrary memory may be able to locate them
|
||||
and hijack control flow by modifying the stacks.
|
||||
|
||||
config LTO
|
||||
bool
|
||||
help
|
||||
Selected if the kernel will be built using the compiler's LTO feature.
|
||||
|
||||
config LTO_CLANG
|
||||
bool
|
||||
select LTO
|
||||
help
|
||||
Selected if the kernel will be built using Clang's LTO feature.
|
||||
|
||||
config ARCH_SUPPORTS_LTO_CLANG
|
||||
bool
|
||||
help
|
||||
An architecture should select this option if it supports:
|
||||
- compiling with Clang,
|
||||
- compiling inline assembly with Clang's integrated assembler,
|
||||
- and linking with LLD.
|
||||
|
||||
config ARCH_SUPPORTS_LTO_CLANG_THIN
|
||||
bool
|
||||
help
|
||||
An architecture should select this option if it can support Clang's
|
||||
ThinLTO mode.
|
||||
|
||||
config HAS_LTO_CLANG
|
||||
def_bool y
|
||||
# Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510
|
||||
depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD
|
||||
depends on $(success,test $(LLVM) -eq 1)
|
||||
depends on $(success,test $(LLVM_IAS) -eq 1)
|
||||
depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
|
||||
depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
|
||||
depends on ARCH_SUPPORTS_LTO_CLANG
|
||||
depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT
|
||||
depends on !KASAN
|
||||
depends on !GCOV_KERNEL
|
||||
depends on !MODVERSIONS
|
||||
help
|
||||
The compiler and Kconfig options support building with Clang's
|
||||
LTO.
|
||||
|
||||
choice
|
||||
prompt "Link Time Optimization (LTO)"
|
||||
default LTO_NONE
|
||||
help
|
||||
This option enables Link Time Optimization (LTO), which allows the
|
||||
compiler to optimize binaries globally.
|
||||
|
||||
If unsure, select LTO_NONE. Note that LTO is very resource-intensive
|
||||
so it's disabled by default.
|
||||
|
||||
config LTO_NONE
|
||||
bool "None"
|
||||
help
|
||||
Build the kernel normally, without Link Time Optimization (LTO).
|
||||
|
||||
config LTO_CLANG_FULL
|
||||
bool "Clang Full LTO (EXPERIMENTAL)"
|
||||
depends on HAS_LTO_CLANG
|
||||
depends on !COMPILE_TEST
|
||||
select LTO_CLANG
|
||||
help
|
||||
This option enables Clang's full Link Time Optimization (LTO), which
|
||||
allows the compiler to optimize the kernel globally. If you enable
|
||||
this option, the compiler generates LLVM bitcode instead of ELF
|
||||
object files, and the actual compilation from bitcode happens at
|
||||
the LTO link step, which may take several minutes depending on the
|
||||
kernel configuration. More information can be found from LLVM's
|
||||
documentation:
|
||||
|
||||
https://llvm.org/docs/LinkTimeOptimization.html
|
||||
|
||||
During link time, this option can use a large amount of RAM, and
|
||||
may take much longer than the ThinLTO option.
|
||||
|
||||
config LTO_CLANG_THIN
|
||||
bool "Clang ThinLTO (EXPERIMENTAL)"
|
||||
depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
|
||||
select LTO_CLANG
|
||||
help
|
||||
This option enables Clang's ThinLTO, which allows for parallel
|
||||
optimization and faster incremental compiles compared to the
|
||||
CONFIG_LTO_CLANG_FULL option. More information can be found
|
||||
from Clang's documentation:
|
||||
|
||||
https://clang.llvm.org/docs/ThinLTO.html
|
||||
|
||||
If unsure, say Y.
|
||||
endchoice
|
||||
|
||||
config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -90,15 +90,18 @@
|
|||
* .data. We don't want to pull in .data..other sections, which Linux
|
||||
* has defined. Same for text and bss.
|
||||
*
|
||||
* With LTO_CLANG, the linker also splits sections by default, so we need
|
||||
* these macros to combine the sections during the final link.
|
||||
*
|
||||
* RODATA_MAIN is not used because existing code already defines .rodata.x
|
||||
* sections to be brought in with rodata.
|
||||
*/
|
||||
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
|
||||
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
|
||||
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
|
||||
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
|
||||
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral*
|
||||
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
|
||||
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]*
|
||||
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]*
|
||||
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
|
||||
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
|
||||
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
|
||||
#else
|
||||
#define TEXT_MAIN .text
|
||||
|
|
|
@ -111,7 +111,7 @@ endif
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
|
||||
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $<
|
||||
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $<
|
||||
|
||||
$(obj)/%.s: $(src)/%.c FORCE
|
||||
$(call if_changed_dep,cc_s_c)
|
||||
|
@ -421,8 +421,15 @@ $(obj)/lib.a: $(lib-y) FORCE
|
|||
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
|
||||
# module is turned into a multi object module, $^ will contain header file
|
||||
# dependencies recorded in the .*.cmd file.
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
quiet_cmd_link_multi-m = AR [M] $@
|
||||
cmd_link_multi-m = \
|
||||
rm -f $@; \
|
||||
$(AR) cDPrsT $@ $(filter %.o,$^)
|
||||
else
|
||||
quiet_cmd_link_multi-m = LD [M] $@
|
||||
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
|
||||
endif
|
||||
|
||||
$(multi-used-m): FORCE
|
||||
$(call if_changed,link_multi-m)
|
||||
|
|
|
@ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M] $@
|
|||
|
||||
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to
|
||||
# avoid a second slow LTO link
|
||||
prelink-ext := .lto
|
||||
endif
|
||||
|
||||
quiet_cmd_ld_ko_o = LD [M] $@
|
||||
cmd_ld_ko_o = \
|
||||
$(LD) -r $(KBUILD_LDFLAGS) \
|
||||
|
@ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \
|
|||
$(cmd); \
|
||||
printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
|
||||
|
||||
|
||||
# Re-generate module BTFs if either module's .ko or vmlinux changed
|
||||
$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
|
||||
$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
|
||||
+$(call if_changed_except,ld_ko_o,vmlinux)
|
||||
ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
||||
+$(if $(newer-prereqs),$(call cmd,btf_ko))
|
||||
|
|
|
@ -43,6 +43,9 @@ __modpost:
|
|||
include include/config/auto.conf
|
||||
include scripts/Kbuild.include
|
||||
|
||||
# for ld_flags
|
||||
include scripts/Makefile.lib
|
||||
|
||||
MODPOST = scripts/mod/modpost \
|
||||
$(if $(CONFIG_MODVERSIONS),-m) \
|
||||
$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \
|
||||
|
@ -102,12 +105,26 @@ $(input-symdump):
|
|||
@echo >&2 'WARNING: Symbol version dump "$@" is missing.'
|
||||
@echo >&2 ' Modules may not have dependencies or modversions.'
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run
|
||||
# LTO to compile them into native code before running modpost
|
||||
prelink-ext := .lto
|
||||
|
||||
quiet_cmd_cc_lto_link_modules = LTO [M] $@
|
||||
cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^
|
||||
|
||||
%.lto.o: %.o
|
||||
$(call if_changed,cc_lto_link_modules)
|
||||
endif
|
||||
|
||||
modules := $(sort $(shell cat $(MODORDER)))
|
||||
|
||||
# Read out modules.order to pass in modpost.
|
||||
# Otherwise, allmodconfig would fail with "Argument list too long".
|
||||
quiet_cmd_modpost = MODPOST $@
|
||||
cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T -
|
||||
cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T -
|
||||
|
||||
$(output-symdump): $(MODORDER) $(input-symdump) FORCE
|
||||
$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE
|
||||
$(call if_changed,modpost)
|
||||
|
||||
targets += $(output-symdump)
|
||||
|
|
|
@ -56,6 +56,14 @@ modpost_link()
|
|||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group"
|
||||
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
# This might take a while, so indicate that we're doing
|
||||
# an LTO link
|
||||
info LTO ${1}
|
||||
else
|
||||
info LD ${1}
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
|
||||
}
|
||||
|
||||
|
@ -103,13 +111,22 @@ vmlinux_link()
|
|||
fi
|
||||
|
||||
if [ "${SRCARCH}" != "um" ]; then
|
||||
objects="--whole-archive \
|
||||
${KBUILD_VMLINUX_OBJS} \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${@}"
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
# Use vmlinux.o instead of performing the slow LTO
|
||||
# link again.
|
||||
objects="--whole-archive \
|
||||
vmlinux.o \
|
||||
--no-whole-archive \
|
||||
${@}"
|
||||
else
|
||||
objects="--whole-archive \
|
||||
${KBUILD_VMLINUX_OBJS} \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${@}"
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
|
||||
${strip_debug#-Wl,} \
|
||||
|
@ -274,7 +291,6 @@ fi;
|
|||
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
|
||||
|
||||
#link vmlinux.o
|
||||
info LD vmlinux.o
|
||||
modpost_link vmlinux.o
|
||||
objtool_link vmlinux.o
|
||||
|
||||
|
|
Loading…
Reference in New Issue