Skip to content

Commit e897076

Browse files
committed
ggml-cpu: add repack GEMM and GEMV for floating-point (#4)
1 parent f78aa0e commit e897076

4 files changed

Lines changed: 140 additions & 239 deletions

File tree

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
3939
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
4040
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
41-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
42-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
4341
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
4442
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
4543
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -79,33 +77,15 @@
7977
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
8078
#define ggml_gemv_mxfp4_8x8_q8_0_generic ggml_gemv_mxfp4_8x8_q8_0
8179
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
82-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
83-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
84-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
85-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
86-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
87-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
88-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
89-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
9080
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
9181
#define ggml_gemm_mxfp4_8x8_q8_0_generic ggml_gemm_mxfp4_8x8_q8_0
9282
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
93-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
94-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
95-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
96-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
97-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
98-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
99-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
100-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
10183
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
10284
// quants.c
10385
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
10486
// repack.cpp
10587
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
10688
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
107-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
108-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
10989
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
11090
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
11191
#define ggml_gemv_q4_K_8x4_q8_K_generic ggml_gemv_q4_K_8x4_q8_K
@@ -142,8 +122,6 @@
142122
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
143123
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
144124
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
145-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
146-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
147125
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
148126
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
149127
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -190,8 +168,6 @@
190168
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
191169
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
192170
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
193-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
194-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
195171
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
196172
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
197173
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -285,8 +261,6 @@
285261
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
286262
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
287263
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
288-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
289-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
290264
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
291265
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
292266
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -341,8 +315,6 @@
341315
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
342316
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
343317
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
344-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
345-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
346318
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
347319
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
348320
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0

ggml/src/ggml-cpu/arch/riscv/repack.cpp

Lines changed: 92 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,97 @@ void ggml_gemv_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
665665
__riscv_vse32_v_f32m2(s + col_tile, v_sumf, vl);
666666
}
667667
}
668+
669+
670+
template<int ncols_interleaved>
671+
static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
672+
GGML_UNUSED(bs);
673+
674+
const int nb = n / 1;
675+
676+
assert (nr == 1);
677+
assert(n % 1 == 0);
678+
assert(nc % ncols_interleaved == 0);
679+
680+
const _Float16 * a_ptr = (const _Float16 *) vy;
681+
for (int x = 0; x < nc / ncols_interleaved; x++) {
682+
const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
683+
684+
// Accumulators
685+
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
686+
687+
for (int l = 0; l < nb; l++) {
688+
vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
689+
690+
sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
691+
}
692+
693+
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
694+
}
695+
696+
return;
697+
}
698+
699+
void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
700+
ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
701+
}
702+
703+
void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
704+
ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
705+
}
706+
707+
void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
708+
ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
709+
}
710+
711+
void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
712+
ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
713+
}
714+
715+
template<int ncols_interleaved>
716+
static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
717+
GGML_UNUSED(bs);
718+
719+
const int nb = n / 1;
720+
721+
assert (nr == 1);
722+
assert(n % 1 == 0);
723+
assert(nc % ncols_interleaved == 0);
724+
725+
const float * a_ptr = (const float *) vy;
726+
for (int x = 0; x < nc / ncols_interleaved; x++) {
727+
const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
728+
729+
// Accumulators
730+
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
731+
732+
for (int l = 0; l < nb; l++) {
733+
vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
734+
735+
sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
736+
}
737+
738+
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
739+
}
740+
741+
return;
742+
}
743+
744+
void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
745+
ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
746+
}
747+
748+
void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
749+
ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
750+
}
751+
752+
void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
753+
ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
754+
}
755+
756+
void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
757+
ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
758+
}
668759
#endif
669760

670761
void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@@ -1700,125 +1791,7 @@ void ggml_gemm_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
17001791
}
17011792
}
17021793
}
1703-
#endif
1704-
1705-
template<int ncols_interleaved>
1706-
static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1707-
const int nb = n / 1;
1708-
1709-
assert (nr == 1);
1710-
assert(n % 1 == 0);
1711-
assert(nc % ncols_interleaved == 0);
1712-
1713-
const _Float16 * a_ptr = (const _Float16 *) vy;
1714-
for (int x = 0; x < nc / ncols_interleaved; x++) {
1715-
const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
1716-
1717-
// Accumulators
1718-
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
1719-
1720-
for (int l = 0; l < nb; l++) {
1721-
vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
1722-
1723-
sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
1724-
}
1725-
1726-
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
1727-
}
1728-
1729-
return;
1730-
}
1731-
1732-
void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1733-
#if defined __riscv_v_intrinsic
1734-
ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
1735-
return;
1736-
#endif
1737-
ggml_gemv_f16_1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
1738-
}
1739-
1740-
void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1741-
#if defined __riscv_v_intrinsic
1742-
ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
1743-
return;
1744-
#endif
1745-
ggml_gemv_f16_1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
1746-
}
1747-
1748-
void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1749-
#if defined __riscv_v_intrinsic
1750-
ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
1751-
return;
1752-
#endif
1753-
ggml_gemv_f16_1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
1754-
}
1755-
1756-
void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1757-
#if defined __riscv_v_intrinsic
1758-
ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
1759-
return;
1760-
#endif
1761-
ggml_gemv_f16_1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
1762-
}
1763-
1764-
template<int ncols_interleaved>
1765-
static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1766-
const int nb = n / 1;
1767-
1768-
assert (nr == 1);
1769-
assert(n % 1 == 0);
1770-
assert(nc % ncols_interleaved == 0);
1771-
1772-
const float * a_ptr = (const float *) vy;
1773-
for (int x = 0; x < nc / ncols_interleaved; x++) {
1774-
const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
1775-
1776-
// Accumulators
1777-
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
1778-
1779-
for (int l = 0; l < nb; l++) {
1780-
vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
1781-
1782-
sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
1783-
}
1784-
1785-
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
1786-
}
1787-
1788-
return;
1789-
}
1790-
1791-
void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1792-
#if defined __riscv_v_intrinsic
1793-
ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
1794-
return;
1795-
#endif
1796-
ggml_gemv_f32_1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
1797-
}
1798-
1799-
void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1800-
#if defined __riscv_v_intrinsic
1801-
ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
1802-
return;
1803-
#endif
1804-
ggml_gemv_f32_1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
1805-
}
1806-
1807-
void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1808-
#if defined __riscv_v_intrinsic
1809-
ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
1810-
return;
1811-
#endif
1812-
ggml_gemv_f32_1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
1813-
}
18141794

1815-
void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1816-
#if defined __riscv_v_intrinsic
1817-
ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
1818-
return;
1819-
#endif
1820-
ggml_gemv_f32_1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
1821-
}
18221795

18231796
template<int ncols_interleaved>
18241797
static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@@ -1867,35 +1840,19 @@ static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_
18671840
}
18681841

18691842
void ggml_gemm_f16_7x1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1870-
#if defined __riscv_v_intrinsic
18711843
ggml_gemm_f16_7x1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
1872-
return;
1873-
#endif
1874-
ggml_gemm_f16_7x1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
18751844
}
18761845

18771846
void ggml_gemm_f16_7x1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1878-
#if defined __riscv_v_intrinsic
18791847
ggml_gemm_f16_7x1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
1880-
return;
1881-
#endif
1882-
ggml_gemm_f16_7x1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
18831848
}
18841849

18851850
void ggml_gemm_f16_7x1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1886-
#if defined __riscv_v_intrinsic
18871851
ggml_gemm_f16_7x1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
1888-
return;
1889-
#endif
1890-
ggml_gemm_f16_7x1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
18911852
}
18921853

18931854
void ggml_gemm_f16_7x1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1894-
#if defined __riscv_v_intrinsic
18951855
ggml_gemm_f16_7x1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
1896-
return;
1897-
#endif
1898-
ggml_gemm_f16_7x1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
18991856
}
19001857

19011858
template<int ncols_interleaved>
@@ -1945,33 +1902,18 @@ static inline void ggml_gemm_f32_7x1xM_f32(int n, float * GGML_RESTRICT s, size_
19451902
}
19461903

19471904
void ggml_gemm_f32_7x1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1948-
#if defined __riscv_v_intrinsic
19491905
ggml_gemm_f32_7x1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
1950-
return;
1951-
#endif
1952-
ggml_gemm_f32_7x1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
19531906
}
19541907

19551908
void ggml_gemm_f32_7x1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1956-
#if defined __riscv_v_intrinsic
19571909
ggml_gemm_f32_7x1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
1958-
return;
1959-
#endif
1960-
ggml_gemm_f32_7x1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
19611910
}
19621911

19631912
void ggml_gemm_f32_7x1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1964-
#if defined __riscv_v_intrinsic
19651913
ggml_gemm_f32_7x1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
1966-
return;
1967-
#endif
1968-
ggml_gemm_f32_7x1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
19691914
}
19701915

19711916
void ggml_gemm_f32_7x1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1972-
#if defined __riscv_v_intrinsic
19731917
ggml_gemm_f32_7x1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
1974-
return;
1975-
#endif
1976-
ggml_gemm_f32_7x1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
19771918
}
1919+
#endif

0 commit comments

Comments
 (0)