template<bool transpose>
void IntegerGemm(const uint8_t *mat1, int M, int N, int K, const uint8_t *mat2,
int offset1, int offset2, int32_t *out) {
using gemmlowp::MatrixMap;
using gemmlowp::GemmContext;
using gemmlowp::GemmWithOutputPipeline;
using gemmlowp::MapOrder;
using gemmlowp::DefaultL8R8BitDepthParams;
// left(right)-hand side
MatrixMap<const uint8_t, MapOrder::RowMajor>
lhs(mat1, M, K);
MatrixMap<const uint8_t, !transpose ? MapOrder::RowMajor : MapOrder::ColMajor>
rhs(mat2, K, N);
MatrixMap<int32_t, MapOrder::RowMajor> result(out, M, N);
const std::tuple<> empty_pipeline = {};
GemmContext context;
int max_num_threads = 1;
context.set_max_num_threads(max_num_threads);
GemmWithOutputPipeline<uint8_t, int32_t, DefaultL8R8BitDepthParams>(
&context, lhs, rhs, &result, -offset1, -offset2, empty_pipeline);
}