GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > Struct Template Reference

GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1&lt; BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > Struct Template Reference
ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > Struct Template Reference

#include <gridwise_gemm_xdlops_skip_b_lds_v1.hpp>

Public Types

using ThisThreadBlock = ThisThreadBlock<BlockSize>
using DefaultBlock2CTileMap = decltype(MakeDefaultBlock2CTileMap(CGridDesc_M_N{}, 1, 1))

Static Public Member Functions

__host__ static __device__ constexpr auto GetABlockDescriptor_K0PerBlock_MPerBlock_K1 ()
__host__ static __device__ constexpr index_t GetSharedMemoryNumberOfByte ()
template<InMemoryDataOperationEnum CGlobalMemoryDataOperation_ = InMemoryDataOperationEnum::Set>
static __device__ bool constexpr IsValidCompilationParameter ()
__host__ static __device__ constexpr bool CheckValidity (const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1, const CGridDesc_M_N &c_grid_desc_m_n, index_t M01, index_t N01)
__host__ static __device__ constexpr index_t CalculateGridSize (const CGridDesc_M_N &c_grid_desc_m_n)
__host__ static __device__ constexpr bool CalculateHasMainK0BlockLoop (index_t K0)
__host__ static __device__ constexpr auto MakeBGridDescriptor_K0_K1_K2_N0_N1_N2_N3_K3 (const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1)
static __device__ auto GetWaveIdx ()
static __device__ auto GetWaveKNIdx (const index_t thread_id)
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
__host__ static __device__ constexpr auto MakeDefaultBlock2CTileMap (const CGridDesc_M_N &c_grid_desc_m_n, index_t M01, index_t N01)
template<bool HasMainK0BlockLoop, typename BGridDesc_K0_K1_K2_N0_N1_N2_N3_K3, typename CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2, typename Block2CTileMap = DefaultBlock2CTileMap>
static __device__ void Run (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, void *__restrict__ p_shared, const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1, const BGridDesc_K0_K1_K2_N0_N1_N2_N3_K3 &b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3, const CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2 &c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CElementwiseOperation &c_element_op, const Block2CTileMap &block_2_ctile_map)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto I6 = Number<6>{}
static constexpr auto I7 = Number<7>{}
static constexpr auto K1 = Number<K1Value>{}
static constexpr index_t MWaves = MPerBlock / (MXdlPerWave * MPerXdl)
static constexpr index_t NWaves = NPerBlock / (NXdlPerWave * NPerXdl)
static constexpr index_t WaveSize = BlockSize / (MWaves * NWaves)
static constexpr auto xdlops_gemm = XdlopsGemm<FloatAB, MPerXdl, NPerXdl, K1>{}
static constexpr index_t K0PerThread = K0PerBlock / xdlops_gemm.K0PerXdlops

Member Typedef Documentation

◆ DefaultBlock2CTileMap

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::DefaultBlock2CTileMap = decltype(MakeDefaultBlock2CTileMap(CGridDesc_M_N{}, 1, 1))

◆ ThisThreadBlock

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::ThisThreadBlock = ThisThreadBlock<BlockSize>

Member Function Documentation

◆ CalculateGridSize()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::CalculateGridSize ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ CalculateHasMainK0BlockLoop()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr bool ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::CalculateHasMainK0BlockLoop ( index_t K0)
inlinestaticconstexpr

◆ CheckValidity()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr bool ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::CheckValidity ( const AGridDesc_K0_M_K1 & a_grid_desc_k0_m_k1,
const BGridDesc_K0_N_K1 & b_grid_desc_k0_n_k1,
const CGridDesc_M_N & c_grid_desc_m_n,
index_t M01,
index_t N01 )
inlinestaticconstexpr

◆ GetABlockDescriptor_K0PerBlock_MPerBlock_K1()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GetABlockDescriptor_K0PerBlock_MPerBlock_K1 ( )
inlinestaticconstexpr

◆ GetSharedMemoryNumberOfByte()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GetSharedMemoryNumberOfByte ( )
inlinestaticconstexpr

◆ GetWaveIdx()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__device__ auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GetWaveIdx ( )
inlinestatic

◆ GetWaveKNIdx()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__device__ auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GetWaveKNIdx ( const index_t thread_id)
inlinestatic

◆ IsValidCompilationParameter()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
template<InMemoryDataOperationEnum CGlobalMemoryDataOperation_ = InMemoryDataOperationEnum::Set>
__device__ bool constexpr ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::IsValidCompilationParameter ( )
inlinestaticconstexpr

◆ MakeBGridDescriptor_K0_K1_K2_N0_N1_N2_N3_K3()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::MakeBGridDescriptor_K0_K1_K2_N0_N1_N2_N3_K3 ( const BGridDesc_K0_N_K1 & b_grid_desc_k0_n_k1)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ MakeDefaultBlock2CTileMap()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
__host__ static __device__ constexpr auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::MakeDefaultBlock2CTileMap ( const CGridDesc_M_N & c_grid_desc_m_n,
index_t M01,
index_t N01 )
inlinestaticconstexpr

◆ Run()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
template<bool HasMainK0BlockLoop, typename BGridDesc_K0_K1_K2_N0_N1_N2_N3_K3, typename CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2, typename Block2CTileMap = DefaultBlock2CTileMap>
__device__ void ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::Run ( const FloatAB *__restrict__ p_a_grid,
const FloatAB *__restrict__ p_b_grid,
FloatC *__restrict__ p_c_grid,
void *__restrict__ p_shared,
const AGridDesc_K0_M_K1 & a_grid_desc_k0_m_k1,
const BGridDesc_K0_K1_K2_N0_N1_N2_N3_K3 & b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3,
const CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2 & c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2,
const AElementwiseOperation & a_element_op,
const BElementwiseOperation & b_element_op,
const CElementwiseOperation & c_element_op,
const Block2CTileMap & block_2_ctile_map )
inlinestatic

Member Data Documentation

◆ I0

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I5 = Number<5>{}
staticconstexpr

◆ I6

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I6 = Number<6>{}
staticconstexpr

◆ I7

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::I7 = Number<7>{}
staticconstexpr

◆ K0PerThread

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::K0PerThread = K0PerBlock / xdlops_gemm.K0PerXdlops
staticconstexpr

◆ K1

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::K1 = Number<K1Value>{}
staticconstexpr

◆ MWaves

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::MWaves = MPerBlock / (MXdlPerWave * MPerXdl)
staticconstexpr

◆ NWaves

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::NWaves = NPerBlock / (NXdlPerWave * NPerXdl)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
index_t ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::WaveSize = BlockSize / (MWaves * NWaves)
staticconstexpr

◆ xdlops_gemm

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, index_t BBlockTransferSrcScalarPerVector, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockBufferSize, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector>
auto ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferSrcScalarPerVector, BThreadTransferSrcResetCoordinateAfterRun, BBlockBufferSize, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::xdlops_gemm = XdlopsGemm<FloatAB, MPerXdl, NPerXdl, K1>{}
staticconstexpr

The documentation for this struct was generated from the following file: