From d642a320e3d59768fdb2709173a13e6393164c9f Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Wed, 17 Aug 2022 11:20:33 -0400 Subject: [PATCH] kernel: Add SPR-5G-ISA/AVX512-FP16 support This commit adds Sapphire Rapids 5G Instruction Set Architecture (SPR-5G-ISA) support to the CentOS-based StarlingX kernel. This involves AVX512-FP16 instructions, but note that these instructions are not directly used by the kernel. The benefits for StarlingX users are the ability to enumerate CPUs' AVX512-FP16 capabilities, and the ability to start KVM-based virtual machines that can make use of the capabilities in question. (Please note that supporting AVX512-FP16 with KVM virtual machines requires patching StarlingX's qemu-kvm-ev package in addition to this commit, as well.) The cherry-picked commits were acquired from the v5.11 kernel release, and all of them applied cleanly. The only change to the patches involved the third patch, which was modified to remove a reference to a CPU feature (X86_FEATURE_VM_PAGE_FLUSH) support for which is not provided by StarlingX's v5.10 kernel baseline. Test plan: - CentOS-based StarlingX - Standard and preempt-rt kernels and all out-of-tree kernel modules were successfully built using a monolithic build procedure. - An ISO image was successfully built with this change. - The changes were confirmed to not negatively affect installation and Ansible boot-strap procedures in All-in-One Simplex virtual machines using standard and low-latency profiles. - Using a Sapphire Rapids-based server in All-in-One Simplex configuration, the aforementioned ISO image was installed and Ansible-bootstrapped, and the enumeration of the "avx512_fp16" CPU feature in /proc/cpuinfo was verified with the low-latency and standard kernels. - Debian-based StarlingX - An ISO image was successfully built (in an incremental manner) with this change. - The changes were confirmed to not negatively affect installation and Ansible boot-strap procedures in All-in-One Simplex virtual machines using standard and low-latency profiles. (Due to time constraints, Debian-based StarlingX tests were carried out with virtual machines only.) Story: 2010247 Task: 46073 Change-Id: I430de20651b6c4a0aa0d854d295b1760cb7b889c Signed-off-by: M. Vefa Bicakci --- kernel-rt/centos/kernel-rt.spec | 3 + ...erate-AVX512-FP16-CPUID-feature-flag.patch | 60 +++++++++++++++++++ ...pose-AVX512_FP16-for-supported-CPUID.patch | 41 +++++++++++++ ...ufeatures-Sync-with-the-kernel-sourc.patch | 51 ++++++++++++++++ ...erate-AVX512-FP16-CPUID-feature-flag.patch | 60 +++++++++++++++++++ ...pose-AVX512_FP16-for-supported-CPUID.patch | 41 +++++++++++++ ...ufeatures-Sync-with-the-kernel-sourc.patch | 51 ++++++++++++++++ kernel-rt/debian/patches/series | 3 + kernel-std/centos/kernel.spec | 3 + ...erate-AVX512-FP16-CPUID-feature-flag.patch | 60 +++++++++++++++++++ ...pose-AVX512_FP16-for-supported-CPUID.patch | 41 +++++++++++++ ...ufeatures-Sync-with-the-kernel-sourc.patch | 51 ++++++++++++++++ ...erate-AVX512-FP16-CPUID-feature-flag.patch | 60 +++++++++++++++++++ ...pose-AVX512_FP16-for-supported-CPUID.patch | 41 +++++++++++++ ...ufeatures-Sync-with-the-kernel-sourc.patch | 51 ++++++++++++++++ kernel-std/debian/patches/series | 3 + 16 files changed, 620 insertions(+) create mode 100644 kernel-rt/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch create mode 100644 kernel-rt/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch create mode 100644 kernel-rt/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch create mode 100644 kernel-rt/debian/patches/0031-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch create mode 100644 kernel-rt/debian/patches/0032-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch create mode 100644 kernel-rt/debian/patches/0033-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch create mode 100644 kernel-std/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch create mode 100644 kernel-std/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch create mode 100644 kernel-std/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch create mode 100644 kernel-std/debian/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch create mode 100644 kernel-std/debian/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch create mode 100644 kernel-std/debian/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch diff --git a/kernel-rt/centos/kernel-rt.spec b/kernel-rt/centos/kernel-rt.spec index be62107b..4657190c 100644 --- a/kernel-rt/centos/kernel-rt.spec +++ b/kernel-rt/centos/kernel-rt.spec @@ -805,6 +805,9 @@ Patch25: 0026-driver-core-auxiliary-bus-Remove-unneeded-module-bit.patch Patch26: 0027-driver-core-auxiliary-bus-Fix-memory-leak-when-drive.patch Patch27: 0028-driver-core-auxiliary-bus-Enable-by-default.patch Patch28: 0029-Enable-CONFIG_PAGE_POOL-by-default.patch +Patch29: 0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch +Patch30: 0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch +Patch31: 0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch # END OF PATCH DEFINITIONS %endif diff --git a/kernel-rt/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch b/kernel-rt/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch new file mode 100644 index 00000000..210790f3 --- /dev/null +++ b/kernel-rt/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch @@ -0,0 +1,60 @@ +From b1d119de793160cd63748316025b68b1b2d50f31 Mon Sep 17 00:00:00 2001 +From: Kyung Min Park +Date: Mon, 7 Dec 2020 19:34:40 -0800 +Subject: [PATCH] x86: Enumerate AVX512 FP16 CPUID feature flag + +Enumerate AVX512 Half-precision floating point (FP16) CPUID feature +flag. Compared with using FP32, using FP16 cut the number of bits +required for storage in half, reducing the exponent from 8 bits to 5, +and the mantissa from 23 bits to 10. Using FP16 also enables developers +to train and run inference on deep learning models fast when all +precision or magnitude (FP32) is not needed. + +A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] +is present. The AVX512 FP16 requires AVX512BW feature be implemented +since the instructions for manipulating 32bit masks are associated with +AVX512BW. + +The only in-kernel usage of this is kvm passthrough. The CPU feature +flag is shown as "avx512_fp16" in /proc/cpuinfo. + +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> +Acked-by: Borislav Petkov +Signed-off-by: Paolo Bonzini +(cherry picked from commit e1b35da5e624f8b09d2e98845c2e4c84b179d9a4) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/cpuid-deps.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 3b407f46f1a0..b5252fd26682 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c +index d502241995a3..42af31b64c2c 100644 +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, ++ { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, + { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, + { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, + {} +-- +2.29.2 + diff --git a/kernel-rt/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch b/kernel-rt/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch new file mode 100644 index 00000000..633a1ef1 --- /dev/null +++ b/kernel-rt/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch @@ -0,0 +1,41 @@ +From 5b18cff344fcd6906469a2849eff4ba71f42c436 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Mon, 7 Dec 2020 19:34:41 -0800 +Subject: [PATCH] KVM: x86: Expose AVX512_FP16 for supported CPUID + +AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. +It could gain better performance for it's faster compared to FP32 +if the precision or magnitude requirements are met. It's availability +is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. + +Expose it in KVM supported CPUID, then guest could make use of it; no +new registers are used, only new instructions. + +Signed-off-by: Cathy Zhang +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2224fc9efb2d6593fbfb57287e39ba4958b188ba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 41b0dc37720e..61f10169fc16 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -418,7 +418,7 @@ void kvm_set_cpu_caps(void) + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | +- F(SERIALIZE) | F(TSXLDTRK) ++ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) + ); + + /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ +-- +2.29.2 + diff --git a/kernel-rt/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch b/kernel-rt/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch new file mode 100644 index 00000000..3bea9ebd --- /dev/null +++ b/kernel-rt/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch @@ -0,0 +1,51 @@ +From 766051cc00679d0451710b8837750bb4a6b73f42 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Mon, 21 Dec 2020 09:04:54 -0300 +Subject: [PATCH] tools headers cpufeatures: Sync with the kernel sources + +To pick the changes in: + + 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") + e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") + +That causes only these 'perf bench' objects to rebuild: + + CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o + CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o + +And addresses these perf build warnings: + + Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' + diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h + +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Kyung Min Park +Cc: Namhyung Kim +Cc: Paolo Bonzini +Cc: Tom Lendacky +Signed-off-by: Arnaldo Carvalho de Melo +(cherry picked from commit 7f3905f00a2025591a6883ee6880f928029b4d96) +[mvb: Remove X86_FEATURE_VM_PAGE_FLUSH from this commit as that CPU + feature constant is not provided by StarlingX's v5.10 baseline.] +Signed-off-by: M. Vefa Bicakci +--- + tools/arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index b58730cc12e8..d7accc927691 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +-- +2.29.2 + diff --git a/kernel-rt/debian/patches/0031-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch b/kernel-rt/debian/patches/0031-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch new file mode 100644 index 00000000..00946dbc --- /dev/null +++ b/kernel-rt/debian/patches/0031-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch @@ -0,0 +1,60 @@ +From 74a0fc47d4044629b358487f99009c51ef2eb5dd Mon Sep 17 00:00:00 2001 +From: Kyung Min Park +Date: Mon, 7 Dec 2020 19:34:40 -0800 +Subject: [PATCH] x86: Enumerate AVX512 FP16 CPUID feature flag + +Enumerate AVX512 Half-precision floating point (FP16) CPUID feature +flag. Compared with using FP32, using FP16 cut the number of bits +required for storage in half, reducing the exponent from 8 bits to 5, +and the mantissa from 23 bits to 10. Using FP16 also enables developers +to train and run inference on deep learning models fast when all +precision or magnitude (FP32) is not needed. + +A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] +is present. The AVX512 FP16 requires AVX512BW feature be implemented +since the instructions for manipulating 32bit masks are associated with +AVX512BW. + +The only in-kernel usage of this is kvm passthrough. The CPU feature +flag is shown as "avx512_fp16" in /proc/cpuinfo. + +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> +Acked-by: Borislav Petkov +Signed-off-by: Paolo Bonzini +(cherry picked from commit e1b35da5e624f8b09d2e98845c2e4c84b179d9a4) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/cpuid-deps.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 3b407f46f1a0..b5252fd26682 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c +index d502241995a3..42af31b64c2c 100644 +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, ++ { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, + { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, + { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, + {} +-- +2.29.2 + diff --git a/kernel-rt/debian/patches/0032-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch b/kernel-rt/debian/patches/0032-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch new file mode 100644 index 00000000..62d3817a --- /dev/null +++ b/kernel-rt/debian/patches/0032-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch @@ -0,0 +1,41 @@ +From eb06001d368faaeaa61ca284cf63a612be446558 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Mon, 7 Dec 2020 19:34:41 -0800 +Subject: [PATCH] KVM: x86: Expose AVX512_FP16 for supported CPUID + +AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. +It could gain better performance for it's faster compared to FP32 +if the precision or magnitude requirements are met. It's availability +is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. + +Expose it in KVM supported CPUID, then guest could make use of it; no +new registers are used, only new instructions. + +Signed-off-by: Cathy Zhang +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2224fc9efb2d6593fbfb57287e39ba4958b188ba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 41b0dc37720e..61f10169fc16 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -418,7 +418,7 @@ void kvm_set_cpu_caps(void) + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | +- F(SERIALIZE) | F(TSXLDTRK) ++ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) + ); + + /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ +-- +2.29.2 + diff --git a/kernel-rt/debian/patches/0033-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch b/kernel-rt/debian/patches/0033-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch new file mode 100644 index 00000000..574e9d5a --- /dev/null +++ b/kernel-rt/debian/patches/0033-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch @@ -0,0 +1,51 @@ +From b4ac85cd84ea18e3d389db2075a966940eaa77bc Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Mon, 21 Dec 2020 09:04:54 -0300 +Subject: [PATCH] tools headers cpufeatures: Sync with the kernel sources + +To pick the changes in: + + 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") + e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") + +That causes only these 'perf bench' objects to rebuild: + + CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o + CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o + +And addresses these perf build warnings: + + Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' + diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h + +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Kyung Min Park +Cc: Namhyung Kim +Cc: Paolo Bonzini +Cc: Tom Lendacky +Signed-off-by: Arnaldo Carvalho de Melo +(cherry picked from commit 7f3905f00a2025591a6883ee6880f928029b4d96) +[mvb: Remove X86_FEATURE_VM_PAGE_FLUSH from this commit as that CPU + feature constant is not provided by StarlingX's v5.10 baseline.] +Signed-off-by: M. Vefa Bicakci +--- + tools/arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index b58730cc12e8..d7accc927691 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +-- +2.29.2 + diff --git a/kernel-rt/debian/patches/series b/kernel-rt/debian/patches/series index 9ae3ed90..17ea94e8 100644 --- a/kernel-rt/debian/patches/series +++ b/kernel-rt/debian/patches/series @@ -26,3 +26,6 @@ 0028-driver-core-auxiliary-bus-Enable-by-default.patch 0029-Enable-CONFIG_PAGE_POOL-by-default.patch 0030-printk-Add-the-condition-check-for-msleep-in-pr_flus.patch +0031-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch +0032-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch +0033-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch diff --git a/kernel-std/centos/kernel.spec b/kernel-std/centos/kernel.spec index 17d9ba18..77b57625 100644 --- a/kernel-std/centos/kernel.spec +++ b/kernel-std/centos/kernel.spec @@ -836,6 +836,9 @@ Patch25: 0026-driver-core-auxiliary-bus-Remove-unneeded-module-bit.patch Patch26: 0027-driver-core-auxiliary-bus-Fix-memory-leak-when-drive.patch Patch27: 0028-driver-core-auxiliary-bus-Enable-by-default.patch Patch28: 0029-Enable-CONFIG_PAGE_POOL-by-default.patch +Patch29: 0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch +Patch30: 0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch +Patch31: 0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch # END OF PATCH DEFINITIONS %endif diff --git a/kernel-std/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch b/kernel-std/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch new file mode 100644 index 00000000..fc85924c --- /dev/null +++ b/kernel-std/centos/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch @@ -0,0 +1,60 @@ +From 42f9b0c612aafd7094f6f4a5a80756159d2d5b21 Mon Sep 17 00:00:00 2001 +From: Kyung Min Park +Date: Mon, 7 Dec 2020 19:34:40 -0800 +Subject: [PATCH] x86: Enumerate AVX512 FP16 CPUID feature flag + +Enumerate AVX512 Half-precision floating point (FP16) CPUID feature +flag. Compared with using FP32, using FP16 cut the number of bits +required for storage in half, reducing the exponent from 8 bits to 5, +and the mantissa from 23 bits to 10. Using FP16 also enables developers +to train and run inference on deep learning models fast when all +precision or magnitude (FP32) is not needed. + +A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] +is present. The AVX512 FP16 requires AVX512BW feature be implemented +since the instructions for manipulating 32bit masks are associated with +AVX512BW. + +The only in-kernel usage of this is kvm passthrough. The CPU feature +flag is shown as "avx512_fp16" in /proc/cpuinfo. + +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> +Acked-by: Borislav Petkov +Signed-off-by: Paolo Bonzini +(cherry picked from commit e1b35da5e624f8b09d2e98845c2e4c84b179d9a4) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/cpuid-deps.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 3b407f46f1a0..b5252fd26682 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c +index d502241995a3..42af31b64c2c 100644 +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, ++ { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, + { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, + { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, + {} +-- +2.29.2 + diff --git a/kernel-std/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch b/kernel-std/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch new file mode 100644 index 00000000..23d7094a --- /dev/null +++ b/kernel-std/centos/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch @@ -0,0 +1,41 @@ +From d0edc9f018b2de1b4d35ef83920c0fd416e8abf8 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Mon, 7 Dec 2020 19:34:41 -0800 +Subject: [PATCH] KVM: x86: Expose AVX512_FP16 for supported CPUID + +AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. +It could gain better performance for it's faster compared to FP32 +if the precision or magnitude requirements are met. It's availability +is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. + +Expose it in KVM supported CPUID, then guest could make use of it; no +new registers are used, only new instructions. + +Signed-off-by: Cathy Zhang +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2224fc9efb2d6593fbfb57287e39ba4958b188ba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 41b0dc37720e..61f10169fc16 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -418,7 +418,7 @@ void kvm_set_cpu_caps(void) + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | +- F(SERIALIZE) | F(TSXLDTRK) ++ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) + ); + + /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ +-- +2.29.2 + diff --git a/kernel-std/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch b/kernel-std/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch new file mode 100644 index 00000000..2fa0800d --- /dev/null +++ b/kernel-std/centos/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch @@ -0,0 +1,51 @@ +From f9b6b17e103470f5374bb65fc930472bf62770ee Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Mon, 21 Dec 2020 09:04:54 -0300 +Subject: [PATCH] tools headers cpufeatures: Sync with the kernel sources + +To pick the changes in: + + 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") + e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") + +That causes only these 'perf bench' objects to rebuild: + + CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o + CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o + +And addresses these perf build warnings: + + Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' + diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h + +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Kyung Min Park +Cc: Namhyung Kim +Cc: Paolo Bonzini +Cc: Tom Lendacky +Signed-off-by: Arnaldo Carvalho de Melo +(cherry picked from commit 7f3905f00a2025591a6883ee6880f928029b4d96) +[mvb: Remove X86_FEATURE_VM_PAGE_FLUSH from this commit as that CPU + feature constant is not provided by StarlingX's v5.10 baseline.] +Signed-off-by: M. Vefa Bicakci +--- + tools/arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index b58730cc12e8..d7accc927691 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +-- +2.29.2 + diff --git a/kernel-std/debian/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch b/kernel-std/debian/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch new file mode 100644 index 00000000..81dd5ee6 --- /dev/null +++ b/kernel-std/debian/patches/0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch @@ -0,0 +1,60 @@ +From 91f5728b6794550f08febea3bfe4018071520727 Mon Sep 17 00:00:00 2001 +From: Kyung Min Park +Date: Mon, 7 Dec 2020 19:34:40 -0800 +Subject: [PATCH] x86: Enumerate AVX512 FP16 CPUID feature flag + +Enumerate AVX512 Half-precision floating point (FP16) CPUID feature +flag. Compared with using FP32, using FP16 cut the number of bits +required for storage in half, reducing the exponent from 8 bits to 5, +and the mantissa from 23 bits to 10. Using FP16 also enables developers +to train and run inference on deep learning models fast when all +precision or magnitude (FP32) is not needed. + +A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] +is present. The AVX512 FP16 requires AVX512BW feature be implemented +since the instructions for manipulating 32bit masks are associated with +AVX512BW. + +The only in-kernel usage of this is kvm passthrough. The CPU feature +flag is shown as "avx512_fp16" in /proc/cpuinfo. + +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> +Acked-by: Borislav Petkov +Signed-off-by: Paolo Bonzini +(cherry picked from commit e1b35da5e624f8b09d2e98845c2e4c84b179d9a4) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/cpuid-deps.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 3b407f46f1a0..b5252fd26682 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c +index d502241995a3..42af31b64c2c 100644 +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, ++ { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, + { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, + { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, + {} +-- +2.29.2 + diff --git a/kernel-std/debian/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch b/kernel-std/debian/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch new file mode 100644 index 00000000..32412f24 --- /dev/null +++ b/kernel-std/debian/patches/0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch @@ -0,0 +1,41 @@ +From 395b58fc9c018bd4fe380c40bcdd87c41ecf9b13 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Mon, 7 Dec 2020 19:34:41 -0800 +Subject: [PATCH] KVM: x86: Expose AVX512_FP16 for supported CPUID + +AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. +It could gain better performance for it's faster compared to FP32 +if the precision or magnitude requirements are met. It's availability +is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. + +Expose it in KVM supported CPUID, then guest could make use of it; no +new registers are used, only new instructions. + +Signed-off-by: Cathy Zhang +Signed-off-by: Kyung Min Park +Acked-by: Dave Hansen +Reviewed-by: Tony Luck +Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2224fc9efb2d6593fbfb57287e39ba4958b188ba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 41b0dc37720e..61f10169fc16 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -418,7 +418,7 @@ void kvm_set_cpu_caps(void) + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | +- F(SERIALIZE) | F(TSXLDTRK) ++ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) + ); + + /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ +-- +2.29.2 + diff --git a/kernel-std/debian/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch b/kernel-std/debian/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch new file mode 100644 index 00000000..db82d43b --- /dev/null +++ b/kernel-std/debian/patches/0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch @@ -0,0 +1,51 @@ +From 79f57d6d0f55cb6402cb445da438fee8d9133352 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Mon, 21 Dec 2020 09:04:54 -0300 +Subject: [PATCH] tools headers cpufeatures: Sync with the kernel sources + +To pick the changes in: + + 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") + e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") + +That causes only these 'perf bench' objects to rebuild: + + CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o + CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o + +And addresses these perf build warnings: + + Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' + diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h + +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Kyung Min Park +Cc: Namhyung Kim +Cc: Paolo Bonzini +Cc: Tom Lendacky +Signed-off-by: Arnaldo Carvalho de Melo +(cherry picked from commit 7f3905f00a2025591a6883ee6880f928029b4d96) +[mvb: Remove X86_FEATURE_VM_PAGE_FLUSH from this commit as that CPU + feature constant is not provided by StarlingX's v5.10 baseline.] +Signed-off-by: M. Vefa Bicakci +--- + tools/arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index b58730cc12e8..d7accc927691 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -374,6 +374,7 @@ + #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ + #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ ++#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +-- +2.29.2 + diff --git a/kernel-std/debian/patches/series b/kernel-std/debian/patches/series index 8fa3a8ae..2b9bd5bf 100644 --- a/kernel-std/debian/patches/series +++ b/kernel-std/debian/patches/series @@ -25,3 +25,6 @@ 0027-driver-core-auxiliary-bus-Fix-memory-leak-when-drive.patch 0028-driver-core-auxiliary-bus-Enable-by-default.patch 0029-Enable-CONFIG_PAGE_POOL-by-default.patch +0030-x86-Enumerate-AVX512-FP16-CPUID-feature-flag.patch +0031-KVM-x86-Expose-AVX512_FP16-for-supported-CPUID.patch +0032-tools-headers-cpufeatures-Sync-with-the-kernel-sourc.patch