riseproject-dev
diff --git a/‎ggml/src/ggml-cpu/arch-fallback.h‎
Lines changed: 0 additions & 28 deletions b/‎ggml/src/ggml-cpu/arch-fallback.h‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎ggml/src/ggml-cpu/arch/riscv/repack.cpp‎
Lines changed: 92 additions & 150 deletions b/‎ggml/src/ggml-cpu/arch/riscv/repack.cpp‎
Lines changed: 92 additions & 150 deletions
@@ -38,8 +38,6 @@
 #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
 #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -79,33 +77,15 @@
 #define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
 #define ggml_gemv_mxfp4_8x8_q8_0_generic ggml_gemv_mxfp4_8x8_q8_0
 #define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
-#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
-#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
-#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
-#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
-#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
-#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
-#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
-#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
 #define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
 #define ggml_gemm_mxfp4_8x8_q8_0_generic ggml_gemm_mxfp4_8x8_q8_0
 #define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
-#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
-#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
-#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
-#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
-#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
-#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
-#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
-#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
 #elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
 // quants.c
 #define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
 // repack.cpp
 #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_K_8x4_q8_K_generic ggml_gemv_q4_K_8x4_q8_K
@@ -142,8 +122,6 @@
 #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
 #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -190,8 +168,6 @@
 #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
 #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -285,8 +261,6 @@
 #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
 #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -341,8 +315,6 @@
 #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
 #define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
 #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
-#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
-#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
 #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
 #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
 #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
 
@@ -665,6 +665,97 @@ void ggml_gemv_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
         __riscv_vse32_v_f32m2(s + col_tile, v_sumf, vl);
     }
 }
+
+
+template<int ncols_interleaved>
+static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    GGML_UNUSED(bs);
+
+    const int nb = n / 1;
+
+    assert (nr == 1);
+    assert(n % 1 == 0);
+    assert(nc % ncols_interleaved == 0);
+
+    const _Float16 * a_ptr = (const _Float16 *) vy;
+    for (int x = 0; x < nc / ncols_interleaved; x++) {
+        const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
+
+        // Accumulators
+        vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
+
+        for (int l = 0; l < nb; l++) {
+            vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
+
+            sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
+        }
+
+        __riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
+    }
+
+    return;
+}
+
+void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
+}
+
+template<int ncols_interleaved>
+static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    GGML_UNUSED(bs);
+
+    const int nb = n / 1;
+
+    assert (nr == 1);
+    assert(n % 1 == 0);
+    assert(nc % ncols_interleaved == 0);
+
+    const float * a_ptr = (const float *) vy;
+    for (int x = 0; x < nc / ncols_interleaved; x++) {
+        const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
+
+        // Accumulators
+        vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
+
+        for (int l = 0; l < nb; l++) {
+            vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
+
+            sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
+        }
+
+        __riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
+    }
+
+    return;
+}
+
+void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
+}
+
+void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
+    ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
+}
 #endif
 
 void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@@ -1700,125 +1791,7 @@ void ggml_gemm_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
         }
     }
 }
-#endif
-
-template<int ncols_interleaved>
-static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-    const int nb = n / 1;
-
-    assert (nr == 1);
-    assert(n % 1 == 0);
-    assert(nc % ncols_interleaved == 0);
-
-    const _Float16 * a_ptr = (const _Float16 *) vy;
-    for (int x = 0; x < nc / ncols_interleaved; x++) {
-        const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
-
-        // Accumulators
-        vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
-
-        for (int l = 0; l < nb; l++) {
-            vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
-
-            sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
-        }
-
-        __riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
-    }
-
-    return;
-}
-
-void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f16_1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f16_1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f16_1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f16_1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-template<int ncols_interleaved>
-static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-    const int nb = n / 1;
-
-    assert (nr == 1);
-    assert(n % 1 == 0);
-    assert(nc % ncols_interleaved == 0);
-
-    const float * a_ptr = (const float *) vy;
-    for (int x = 0; x < nc / ncols_interleaved; x++) {
-        const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
-
-        // Accumulators
-        vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
-
-        for (int l = 0; l < nb; l++) {
-            vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
-
-            sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
-        }
-
-        __riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
-    }
-
-    return;
-}
-
-void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f32_1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f32_1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
-}
-
-void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f32_1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
-}
 
-void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
-    ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemv_f32_1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
-}
 
 template<int ncols_interleaved>
 static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@@ -1867,35 +1840,19 @@ static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_
 }
 
 void ggml_gemm_f16_7x1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f16_7x1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f16_7x1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f16_7x1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f16_7x1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f16_7x1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f16_7x1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f16_7x1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f16_7x1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f16_7x1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f16_7x1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f16_7x1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 template<int ncols_interleaved>
@@ -1945,33 +1902,18 @@ static inline void ggml_gemm_f32_7x1xM_f32(int n, float * GGML_RESTRICT s, size_
 }
 
 void ggml_gemm_f32_7x1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f32_7x1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f32_7x1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f32_7x1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f32_7x1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f32_7x1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f32_7x1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f32_7x1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f32_7x1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
 }
 
 void ggml_gemm_f32_7x1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
-#if defined __riscv_v_intrinsic
     ggml_gemm_f32_7x1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
-    return;
-#endif
-    ggml_gemm_f32_7x1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
 }
+#endif