Below is an updated patch to incorporate everyone's feedback so far. I recognize all of the counter-arguments, and I agree with them in general. Indeed, as applications use more SIMD, this kind of patch goes in the wrong direction. However, there are applications that do not use enough SSE to offset the extra context-switch cost. SSE does not provide a clear benefit in the current libthr code with the current compiler, but it does provide a clear loss in some cases. Therefore, disabling SSE in libthr is a non-loss for most, and a gain for some. I refrained from disabling SSE in libc--as was suggested--because I can't make the above argument for libc. It provides such a variety of code that SSE might be a net win in some cases. I wish I had time to identify and benchmark the interesting cases. Thanks in advance for your further review and comments. Eric Index: head/lib/libthr/arch/amd64/Makefile.inc =================================================================== --- head/lib/libthr/arch/amd64/Makefile.inc (revision 281473) +++ head/lib/libthr/arch/amd64/Makefile.inc (working copy) _at__at_ -1,3 +1,9 _at__at_ #$FreeBSD$ SRCS+= _umtx_op_err.S + +# With the current compiler and libthr code, using SSE in libthr +# does not provide enough performance improvement to outweigh +# the extra context switch cost. This can measurably impact +# performance when the application also does not use enough SSE. +CFLAGS+=${CFLAGS_NO_SIMD} Index: head/lib/libthr/arch/i386/Makefile.inc =================================================================== --- head/lib/libthr/arch/i386/Makefile.inc (revision 281473) +++ head/lib/libthr/arch/i386/Makefile.inc (working copy) _at__at_ -1,3 +1,9 _at__at_ # $FreeBSD$ SRCS+= _umtx_op_err.S + +# With the current compiler and libthr code, using SSE in libthr +# does not provide enough performance improvement to outweigh +# the extra context switch cost. This can measurably impact +# performance when the application also does not use enough SSE. +CFLAGS+=${CFLAGS_NO_SIMD} Index: head/libexec/rtld-elf/amd64/Makefile.inc =================================================================== --- head/libexec/rtld-elf/amd64/Makefile.inc (revision 281473) +++ head/libexec/rtld-elf/amd64/Makefile.inc (working copy) _at__at_ -1,6 +1,6 _at__at_ # $FreeBSD$ -CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 -msoft-float +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float # Uncomment this to build the dynamic linker as an executable instead # of a shared library: #LDSCRIPT= ${.CURDIR}/${MACHINE_CPUARCH}/elf_rtld.x Index: head/libexec/rtld-elf/i386/Makefile.inc =================================================================== --- head/libexec/rtld-elf/i386/Makefile.inc (revision 281473) +++ head/libexec/rtld-elf/i386/Makefile.inc (working copy) _at__at_ -1,6 +1,6 _at__at_ # $FreeBSD$ -CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 -msoft-float +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float # Uncomment this to build the dynamic linker as an executable instead # of a shared library: #LDSCRIPT= ${.CURDIR}/${MACHINE_CPUARCH}/elf_rtld.x Index: head/share/mk/bsd.sys.mk =================================================================== --- head/share/mk/bsd.sys.mk (revision 281473) +++ head/share/mk/bsd.sys.mk (working copy) _at__at_ -153,6 +153,26 _at__at_ SSP_CFLAGS?= -fstack-protector CFLAGS+= ${SSP_CFLAGS} .endif # SSP && !ARM && !MIPS +# +# Prohibit the compiler from emitting SIMD instructions. +# These flags are added to CFLAGS in areas where the extra context-switch +# cost outweighs the advantages of SIMD instructions. +# +# gcc: +# Setting -mno-mmx implies -mno-3dnow +# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3 and -mfpmath=387 +# +# clang: +# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa +# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and +# -mno-sse42 +# (-mfpmath= is not supported) +# +.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" +CFLAGS_NO_SIMD.clang= -mno-avx +CFLAGS_NO_SIMD= -mno-mmx -mno-sse ${CFLAGS_NO_SIMD.${COMPILER_TYPE}} +.endif + # Allow user-specified additional warning flags, plus compiler specific flag overrides. # Unless we've overriden this... .if ${MK_WARNS} != "no" Index: head/sys/conf/kern.mk =================================================================== --- head/sys/conf/kern.mk (revision 281473) +++ head/sys/conf/kern.mk (working copy) _at__at_ -75,18 +75,10 _at__at_ FORMAT_EXTENSIONS= -fformat-extensions # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # -# gcc: -# Setting -mno-mmx implies -mno-3dnow -# Setting -mno-sse implies -mno-sse2, -mno-sse3 and -mno-ssse3 -# -# clang: -# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 -# .if ${MACHINE_CPUARCH} == "i386" CFLAGS.gcc+= -mno-align-long-strings -mpreferred-stack-boundary=2 -CFLAGS.clang+= -mno-aes -mno-avx -CFLAGS+= -mno-mmx -mno-sse -msoft-float +CFLAGS.clang+= -mno-aes +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float INLINE_LIMIT?= 8000 .endif _at__at_ -111,18 +103,9 _at__at_ INLINE_LIMIT?= 15000 # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # -# gcc: -# Setting -mno-mmx implies -mno-3dnow -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3 and -mfpmath=387 -# -# clang: -# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 -# (-mfpmath= is not supported) -# .if ${MACHINE_CPUARCH} == "amd64" -CFLAGS.clang+= -mno-aes -mno-avx -CFLAGS+= -mcmodel=kernel -mno-red-zone -mno-mmx -mno-sse -msoft-float \ +CFLAGS.clang+= -mno-aes +CFLAGS+= -mcmodel=kernel -mno-red-zone ${CFLAGS_NO_SIMD} -msoft-float \ -fno-asynchronous-unwind-tables INLINE_LIMIT?= 8000 .endifReceived on Tue Apr 14 2015 - 13:31:19 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:56 UTC