GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock > Struct Template Reference#
Classes |
Public Types |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock > Struct Template Reference
#include <gridwise_gemm_xdlops_streamk.hpp>
Classes | |
| struct | Argument |
| struct | LStr |
| struct | LStr< ck::tensor_layout::gemm::RowMajor > |
| struct | LStr< ck::tensor_layout::gemm::ColumnMajor > |
Public Types | |
| using | ThisThreadBlock = ThisThreadBlock<BlockSize> |
| using | FloatAcc = FloatAcc_ |
| using | FloatCShuffle = FloatAcc |
| using | Block2CTileMap = Block2CTileMap_ |
| using | FloatAB = FloatAB_ |
| using | FloatC = FloatC_ |
| using | CGridDesc_M_N = remove_cvref_t<decltype(MakeCGridDescriptor_M_N(1, 1, 1, 1, 1))> |
Static Public Member Functions | |
| __host__ static __device__ auto | CalculateGridSize (const Argument &karg) |
| __host__ static __device__ auto | CalculateK0 (index_t KPad) |
| __host__ static __device__ auto | MakeAGridDescriptor_K0_M_K1 (index_t M, index_t MPad, index_t K, index_t KPad, index_t StrideA) |
| __host__ static __device__ auto | MakeBGridDescriptor_K0_N_K1 (index_t K, index_t KPad, index_t N, index_t NPad, index_t StrideB) |
| __host__ static __device__ auto | MakeCGridDescriptor_M_N (index_t M, index_t MPad, index_t N, index_t NPad, index_t StrideC) |
| __host__ static __device__ constexpr auto | GetABlockDescriptor_AK0PerBlock_MPerBlock_AK1 () |
| __host__ static __device__ constexpr auto | GetBBlockDescriptor_BK0PerBlock_NPerBlock_BK1 () |
| __host__ static __device__ constexpr index_t | GetSharedMemoryNumberOfByte () |
| __host__ static __device__ constexpr bool | CheckValidity (const Argument &karg) |
| __host__ static __device__ constexpr bool | CalculateHasMainK0BlockLoop (index_t K0) |
| template<typename CGridDesc> | |
| __host__ static __device__ constexpr auto | MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock (const CGridDesc &c_m_n_grid_desc) |
| template<typename CGridDesc> | |
| __host__ static __device__ constexpr auto | MakeCBlockClusterAdaptor (const CGridDesc &c_m_n_grid_desc, index_t, index_t, index_t KBatch) |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_MBlock_MPerShuffle_NBlock_NPerShuffle () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_MShuffleRepeat_MPerShuffle_NShuffleRepeat_NPerShuffle () |
| __host__ static __device__ constexpr auto | GetClusterLengthReduction () |
| __host__ static __device__ constexpr auto | GetPartialAccBlockDescriptor () |
| static __device__ void | Run (const FloatAB *p_a_grid, const FloatAB *p_b_grid, FloatC *p_c_grid, void *p_workspace, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, Block2CTileMap block_mapping, void *__restrict__ p_shared_block) |
| static std::string | GetTypeString () |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | I6 = Number<6>{} |
| static constexpr auto | I7 = Number<7>{} |
| static constexpr auto | K1 = Number<K1Value>{} |
| static constexpr auto | M01 = 1 |
| static constexpr auto | N01 = 1 |
| static constexpr auto | KPerBlock = K0PerBlock * K1 |
| static constexpr index_t | MXdlPerWave = MRepeat |
| static constexpr index_t | NXdlPerWave = NRepeat |
Member Typedef Documentation
◆ Block2CTileMap
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::Block2CTileMap = Block2CTileMap_ |
◆ CGridDesc_M_N
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::CGridDesc_M_N = remove_cvref_t<decltype(MakeCGridDescriptor_M_N(1, 1, 1, 1, 1))> |
◆ FloatAB
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::FloatAB = FloatAB_ |
◆ FloatAcc
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::FloatAcc = FloatAcc_ |
◆ FloatC
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::FloatC = FloatC_ |
◆ FloatCShuffle
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::FloatCShuffle = FloatAcc |
◆ ThisThreadBlock
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
| using ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk< BlockSize, Block2CTileMap_, FloatAB_, FloatAcc_, FloatC_, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
Member Function Documentation
◆ CalculateGridSize()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ CalculateHasMainK0BlockLoop()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ CalculateK0()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ CheckValidity()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetABlockDescriptor_AK0PerBlock_MPerBlock_AK1()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetBBlockDescriptor_BK0PerBlock_NPerBlock_BK1()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_MBlock_MPerShuffle_NBlock_NPerShuffle()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_MShuffleRepeat_MPerShuffle_NShuffleRepeat_NPerShuffle()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetClusterLengthReduction()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetPartialAccBlockDescriptor()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetSharedMemoryNumberOfByte()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestaticconstexpr |
◆ GetTypeString()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ MakeAGridDescriptor_K0_M_K1()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ MakeBGridDescriptor_K0_N_K1()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ MakeCBlockClusterAdaptor()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
template<typename CGridDesc>
|
inlinestaticconstexpr |
◆ MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
template<typename CGridDesc>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M_N()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
◆ Run()
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
inlinestatic |
Member Data Documentation
◆ I0
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I2
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I3
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I4
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I5
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I6
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ I7
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ K1
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ KPerBlock
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ M01
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ MXdlPerWave
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ N01
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
◆ NXdlPerWave
template<index_t BlockSize, typename Block2CTileMap_, typename FloatAB_, typename FloatAcc_, typename FloatC_, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>
|
staticconstexpr |
The documentation for this struct was generated from the following file: