Skip to content

Commit

Permalink
QC8/QS8 GEMM/IGEMM microkernels for Wasm Relaxed Unsigned and Signed …
Browse files Browse the repository at this point in the history
…Dot Product
  • Loading branch information
fanchenkong1 committed May 30, 2024
1 parent 325f0d2 commit ec2296e
Show file tree
Hide file tree
Showing 33 changed files with 2,929 additions and 52 deletions.
34 changes: 34 additions & 0 deletions bench/qs8-gemm-e2e.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1853,9 +1853,43 @@ static void GEMMEnd2EndBenchmark(
benchmark::utils::CheckWAsmSDOT);
}

static void qs8_qc8w_gemm_2x4c16__wasmusdot(benchmark::State& state, models::ExecutionPlanFactory model) {
GEMMEnd2EndBenchmark(state, model,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_2x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_2x4c16__wasmusdot,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
/*mr=*/2, /*nr=*/4, /*log2_kr=*/4, /*log2_sr=*/0,
benchmark::utils::CheckWAsmUSDOT);
}
static void qs8_qc8w_gemm_3x4c16__wasmusdot(benchmark::State& state, models::ExecutionPlanFactory model) {
GEMMEnd2EndBenchmark(state, model,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_3x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_3x4c16__wasmusdot,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
/*mr=*/3, /*nr=*/4, /*log2_kr=*/4, /*log2_sr=*/0,
benchmark::utils::CheckWAsmUSDOT);
}
static void qs8_qc8w_gemm_4x4c16__wasmusdot(benchmark::State& state, models::ExecutionPlanFactory model) {
GEMMEnd2EndBenchmark(state, model,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_4x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_4x4c16__wasmusdot,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
/*mr=*/4, /*nr=*/4, /*log2_kr=*/4, /*log2_sr=*/0,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_QS8_END2END(qs8_qc8w_gemm_2x4c16__wasmsdot)
BENCHMARK_QS8_END2END(qs8_qc8w_gemm_3x4c16__wasmsdot)
BENCHMARK_QS8_END2END(qs8_qc8w_gemm_4x4c16__wasmsdot)
BENCHMARK_QS8_END2END(qs8_qc8w_gemm_2x4c16__wasmusdot)
BENCHMARK_QS8_END2END(qs8_qc8w_gemm_3x4c16__wasmusdot)
BENCHMARK_QS8_END2END(qs8_qc8w_gemm_4x4c16__wasmusdot)
#endif // XNN_ARCH_WASMRELAXEDSIMD


Expand Down
56 changes: 56 additions & 0 deletions bench/qs8-qc8w-gemm-fp32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4889,6 +4889,62 @@
#endif // XNN_ARCH_WASMRELAXEDSIMD


#if XNN_ARCH_WASMRELAXEDSIMD
static void qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/1, /*nr=*/4, /*kr=*/16, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_1x4c16__wasmusdot)
#endif // XNN_ARCH_WASMRELAXEDSIMD


#if XNN_ARCH_WASMRELAXEDSIMD
static void qs8_qc8w_gemm_minmax_fp32_ukernel_2x4c16__wasmusdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_2x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/2, /*nr=*/4, /*kr=*/16, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_2x4c16__wasmusdot)
#endif // XNN_ARCH_WASMRELAXEDSIMD


#if XNN_ARCH_WASMRELAXEDSIMD
static void qs8_qc8w_gemm_minmax_fp32_ukernel_3x4c16__wasmusdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_3x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/3, /*nr=*/4, /*kr=*/16, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_3x4c16__wasmusdot)
#endif // XNN_ARCH_WASMRELAXEDSIMD


#if XNN_ARCH_WASMRELAXEDSIMD
static void qs8_qc8w_gemm_minmax_fp32_ukernel_4x4c16__wasmusdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_4x4c16__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_wasmsimd_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/4, /*nr=*/4, /*kr=*/16, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_4x4c16__wasmusdot)
#endif // XNN_ARCH_WASMRELAXEDSIMD


static void qs8_qc8w_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
Expand Down
9 changes: 9 additions & 0 deletions bench/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,15 @@ void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark) {
return true;
}

bool CheckWAsmUSDOT(benchmark::State& state) {
const xnn_hardware_config* hardware_config = xnn_init_hardware_config();
if (hardware_config == nullptr || !hardware_config->use_wasm_usdot) {
state.SkipWithError("no WAsm USDOT support");
return false;
}
return true;
}

bool CheckWAsmBLENDVPS(benchmark::State& state) {
const xnn_hardware_config* hardware_config = xnn_init_hardware_config();
if (hardware_config == nullptr || !hardware_config->use_wasm_blendvps) {
Expand Down
4 changes: 4 additions & 0 deletions bench/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ bool CheckWAsmPSHUFB(benchmark::State& state);
// If WAsm SDOT is unsupported, report error in benchmark state, and return false.
bool CheckWAsmSDOT(benchmark::State& state);

// Check if USDOT instruction is available in WAsm Relaxed SIMD as Relaxed Integer Dot Product with Accumulation.
// If WAsm USDOT is unsupported, report error in benchmark state, and return false.
bool CheckWAsmUSDOT(benchmark::State& state);

// Check if BLENDVPS instruction is available in WAsm Relaxed SIMD as Relaxed Lane Select.
// If WAsm BLENDVPS is unsupported, report error in benchmark state, and return false.
bool CheckWAsmBLENDVPS(benchmark::State& state);
Expand Down
8 changes: 8 additions & 0 deletions cmake/gen/wasmrelaxedsimd_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -439,13 +439,21 @@ SET(ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS
src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-3x4c16-minmax-wasmsdot.c
src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c16-minmax-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmusdot.c
src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u8.c
src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u16.c
src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u32.c
Expand Down
8 changes: 8 additions & 0 deletions gen/wasmrelaxedsimd_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -435,13 +435,21 @@ ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS = [
"src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-3x4c16-minmax-wasmsdot.c",
"src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c16-minmax-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u8.c",
"src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u16.c",
"src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u32.c",
Expand Down
5 changes: 5 additions & 0 deletions scripts/generate-qs8-gemm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ tools/xngen src/qs8-gemm/MRx4c16-wasmsdot.c.in -D MR=2 -D REQUANTIZATION= -D
tools/xngen src/qs8-gemm/MRx4c16-wasmsdot.c.in -D MR=3 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-3x4c16-minmax-wasmsdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmsdot.c.in -D MR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c16-minmax-wasmsdot.c &

tools/xngen src/qs8-gemm/MRx4c16-wasmusdot.c.in -D MR=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmusdot.c.in -D MR=2 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmusdot.c.in -D MR=3 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmusdot.c.in -D MR=4 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmusdot.c &

################################### ARM NEON ##################################
tools/xngen src/qs8-gemm/neon-mlal-lane.c.in -D MR=1 -D NR=8 -D PREFETCH=0 -D REQUANTIZATION= -D DATATYPE=QD8 -D ARMV8=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x8-minmax-neon-mlal-lane.c &
tools/xngen src/qs8-gemm/neon-mlal-lane.c.in -D MR=2 -D NR=8 -D PREFETCH=0 -D REQUANTIZATION= -D DATATYPE=QD8 -D ARMV8=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x8-minmax-neon-mlal-lane.c &
Expand Down
5 changes: 5 additions & 0 deletions scripts/generate-qs8-igemm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ tools/xngen src/qs8-igemm/MRx4c16-wasmsdot.c.in -D MR=2 -D REQUANTIZATION=FP32 -
tools/xngen src/qs8-igemm/MRx4c16-wasmsdot.c.in -D MR=3 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmsdot.c &
tools/xngen src/qs8-igemm/MRx4c16-wasmsdot.c.in -D MR=4 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmsdot.c &

tools/xngen src/qs8-igemm/MRx4c16-wasmusdot.c.in -D MR=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-igemm/MRx4c16-wasmusdot.c.in -D MR=2 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-igemm/MRx4c16-wasmusdot.c.in -D MR=3 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-igemm/MRx4c16-wasmusdot.c.in -D MR=4 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmusdot.c &

################################## ARMv6 SIMD #################################
tools/xngen src/qs8-igemm/c4-armsimd32.c.in -D MR=1 -D NR=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x1c4-minmax-fp32-armsimd32.c &
tools/xngen src/qs8-igemm/c4-armsimd32.c.in -D MR=1 -D NR=2 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x2c4-minmax-fp32-armsimd32.c &
Expand Down
Loading

0 comments on commit ec2296e

Please sign in to comment.