From 753604cfe229ea9a0688784fe9f133b6dbdc7898 Mon Sep 17 00:00:00 2001 From: Vasudeva-bit Date: Thu, 25 Jun 2026 19:06:26 +0530 Subject: [PATCH 1/2] Optimize Zen 4 GEMM macro block sizes (P, Q, R) --- kernel/setparam-ref.c | 25 +++++++++++++++++++++++++ param.h | 21 +++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index a6959f2599..bc030a29ea 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -2206,7 +2206,32 @@ static void init_parameter(void) { #endif +int l3_kb = get_l3_size(); +unsigned int eax, ebx, ecx, edx; +cpuid(0, &eax, &ebx, &ecx, &edx); +if ((ebx == 0x68747541) && (l3_kb > 0) && (l3_kb % 32768 == 0)) { +#if BUILD_SINGLE == 1 + TABLE_NAME.sgemm_p = 384; + TABLE_NAME.sgemm_q = 512; + TABLE_NAME.sgemm_r = 5936; +#endif +#if BUILD_DOUBLE == 1 + TABLE_NAME.dgemm_p = 512; + TABLE_NAME.dgemm_q = 512; + TABLE_NAME.dgemm_r = 2288; +#endif +#if BUILD_COMPLEX == 1 + TABLE_NAME.cgemm_p = 160; + TABLE_NAME.cgemm_q = 480; + TABLE_NAME.cgemm_r = 528; +#endif +#if BUILD_COMPLEX16 == 1 + TABLE_NAME.zgemm_p = 176; + TABLE_NAME.zgemm_q = 256; + TABLE_NAME.zgemm_r = 1520; +#endif +} } #endif //RISCV64 diff --git a/param.h b/param.h index 8e7ab29f24..dc04de2b4e 100644 --- a/param.h +++ b/param.h @@ -1996,6 +1996,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else +#if L3_SIZE > 0 && L3_SIZE % 33554432 == 0 + +#define SGEMM_DEFAULT_P 384 +#define DGEMM_DEFAULT_P 512 +#define CGEMM_DEFAULT_P 160 +#define ZGEMM_DEFAULT_P 176 + +#define SGEMM_DEFAULT_Q 512 +#define DGEMM_DEFAULT_Q 512 +#define CGEMM_DEFAULT_Q 480 +#define ZGEMM_DEFAULT_Q 256 + +#define SGEMM_DEFAULT_R 5936 +#define DGEMM_DEFAULT_R 2288 +#define CGEMM_DEFAULT_R 528 +#define ZGEMM_DEFAULT_R 1520 + +#else + #define SGEMM_DEFAULT_P 640 #define DGEMM_DEFAULT_P 192 #define CGEMM_DEFAULT_P 384 @@ -2011,6 +2030,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_R cgemm_r #define ZGEMM_DEFAULT_R zgemm_r +#endif + #define QGEMM_DEFAULT_Q 128 #define QGEMM_DEFAULT_P 504 #define QGEMM_DEFAULT_R qgemm_r From 58846317d902e635dc77e817285ecb8f1357f41b Mon Sep 17 00:00:00 2001 From: Vasudeva-bit Date: Sat, 4 Jul 2026 11:30:34 +0530 Subject: [PATCH 2/2] =?UTF-8?q?fix=20C89=20scoping,=C2=A0dynamic=20R=20for?= =?UTF-8?q?=20memory=20issues,=20robust=20zen4/5=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/setparam-ref.c | 57 ++++++++++++++++++++++++------------------- param.h | 10 ++++---- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index bc030a29ea..107f1f4f0f 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -2084,6 +2084,38 @@ static void init_parameter(void) { TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; #endif +{ + int l3_kb = get_l3_size(); + int l2_kb = get_l2_size(); + unsigned int eax, ebx, ecx, edx; + unsigned int cpuid7_eax, cpuid7_ebx, cpuid7_ecx, cpuid7_edx; + + cpuid(0, &eax, &ebx, &ecx, &edx); + + if ((ebx == 0x68747541) && (l3_kb > 0) && (l3_kb % 32768 == 0) && (l2_kb == 1024)) { //Auth AMD + + cpuid(7, &cpuid7_eax, &cpuid7_ebx, &cpuid7_ecx, &cpuid7_edx); + + if (cpuid7_ebx & (1 << 16)) { // avx512 - Zen 4, 5 +#if BUILD_SINGLE == 1 + TABLE_NAME.sgemm_p = 384; + TABLE_NAME.sgemm_q = 512; +#endif +#if BUILD_DOUBLE == 1 + TABLE_NAME.dgemm_p = 512; + TABLE_NAME.dgemm_q = 512; +#endif +#if BUILD_COMPLEX == 1 + TABLE_NAME.cgemm_p = 160; + TABLE_NAME.cgemm_q = 480; +#endif +#if BUILD_COMPLEX16 == 1 + TABLE_NAME.zgemm_p = 176; + TABLE_NAME.zgemm_q = 256; +#endif + } + } +} #if BUILD_SINGLE == 1 TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; @@ -2206,32 +2238,7 @@ static void init_parameter(void) { #endif -int l3_kb = get_l3_size(); -unsigned int eax, ebx, ecx, edx; -cpuid(0, &eax, &ebx, &ecx, &edx); -if ((ebx == 0x68747541) && (l3_kb > 0) && (l3_kb % 32768 == 0)) { -#if BUILD_SINGLE == 1 - TABLE_NAME.sgemm_p = 384; - TABLE_NAME.sgemm_q = 512; - TABLE_NAME.sgemm_r = 5936; -#endif -#if BUILD_DOUBLE == 1 - TABLE_NAME.dgemm_p = 512; - TABLE_NAME.dgemm_q = 512; - TABLE_NAME.dgemm_r = 2288; -#endif -#if BUILD_COMPLEX == 1 - TABLE_NAME.cgemm_p = 160; - TABLE_NAME.cgemm_q = 480; - TABLE_NAME.cgemm_r = 528; -#endif -#if BUILD_COMPLEX16 == 1 - TABLE_NAME.zgemm_p = 176; - TABLE_NAME.zgemm_q = 256; - TABLE_NAME.zgemm_r = 1520; -#endif -} } #endif //RISCV64 diff --git a/param.h b/param.h index dc04de2b4e..9389c4135f 100644 --- a/param.h +++ b/param.h @@ -1996,7 +1996,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else -#if L3_SIZE > 0 && L3_SIZE % 33554432 == 0 +#if L3_SIZE > 0 && L3_SIZE % 33554432 == 0 && L2_SIZE == 1048576 #define SGEMM_DEFAULT_P 384 #define DGEMM_DEFAULT_P 512 @@ -2008,10 +2008,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_Q 480 #define ZGEMM_DEFAULT_Q 256 -#define SGEMM_DEFAULT_R 5936 -#define DGEMM_DEFAULT_R 2288 -#define CGEMM_DEFAULT_R 528 -#define ZGEMM_DEFAULT_R 1520 +#define SGEMM_DEFAULT_R sgemm_r // 5936 +#define DGEMM_DEFAULT_R dgemm_r // 2288 +#define CGEMM_DEFAULT_R cgemm_r // 528 +#define ZGEMM_DEFAULT_R zgemm_r // 1520 #else