BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma > Struct Template Reference

BlockwiseGemmWmmaops_pipeline_hotloop_inst&lt; BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma > Struct Template Reference
ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma > Struct Template Reference

#include <blockwise_gemm_pipeline_wmmaops.hpp>

Static Public Member Functions

static constexpr auto Print ()

Static Public Attributes

static constexpr index_t WaveSize = 32
static constexpr index_t WaveNumM = MPerBlock / (MRepeat * MPerWmma)
static constexpr index_t WaveNumN = NPerBlock / (NRepeat * NPerWmma)
static constexpr index_t A_LDS_Read_Width = ALDSReadWidth
static constexpr index_t B_LDS_Read_Width = BLDSReadWidth
static constexpr index_t A_Buffer_Load_Inst_Num
static constexpr index_t B_Buffer_Load_Inst_Num
static constexpr index_t A_LDS_Write_Inst_Num
static constexpr index_t B_LDS_Write_Inst_Num
static constexpr index_t A_LDS_Read_Inst_Num
static constexpr index_t B_LDS_Read_Inst_Num
static constexpr index_t C_WMMA_Inst_Num

Member Function Documentation

◆ Print()

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
constexpr auto ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::Print ( )
inlinestaticconstexpr

Member Data Documentation

◆ A_Buffer_Load_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::A_Buffer_Load_Inst_Num
staticconstexpr
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ABufferLoadWidth)

◆ A_LDS_Read_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::A_LDS_Read_Inst_Num
staticconstexpr
Initial value:
=
WaveNumN * MPerBlock * KPerBlock / (BlockSize * ALDSReadWidth)
static constexpr index_t WaveNumN
Definition blockwise_gemm_pipeline_wmmaops.hpp:29

◆ A_LDS_Read_Width

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::A_LDS_Read_Width = ALDSReadWidth
staticconstexpr

◆ A_LDS_Write_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::A_LDS_Write_Inst_Num
staticconstexpr
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ALDSWriteWidth)

◆ B_Buffer_Load_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::B_Buffer_Load_Inst_Num
staticconstexpr
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BBufferLoadWidth)

◆ B_LDS_Read_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::B_LDS_Read_Inst_Num
staticconstexpr
Initial value:
=
WaveNumM * NPerBlock * KPerBlock / (BlockSize * BLDSReadWidth)
static constexpr index_t WaveNumM
Definition blockwise_gemm_pipeline_wmmaops.hpp:28

◆ B_LDS_Read_Width

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::B_LDS_Read_Width = BLDSReadWidth
staticconstexpr

◆ B_LDS_Write_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::B_LDS_Write_Inst_Num
staticconstexpr
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BLDSWriteWidth)

◆ C_WMMA_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::C_WMMA_Inst_Num
staticconstexpr
Initial value:
= MPerBlock * NPerBlock * KPerBlock /
(BlockSize / WaveSize) /
(MPerWmma * NPerWmma * KPerWmma)
static constexpr index_t WaveSize
Definition blockwise_gemm_pipeline_wmmaops.hpp:27

◆ WaveNumM

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::WaveNumM = MPerBlock / (MRepeat * MPerWmma)
staticconstexpr

◆ WaveNumN

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::WaveNumN = NPerBlock / (NRepeat * NPerWmma)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerWmma, index_t NPerWmma, index_t KPerWmma>
index_t ck::BlockwiseGemmWmmaops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerWmma, NPerWmma, KPerWmma >::WaveSize = 32
staticconstexpr

The documentation for this struct was generated from the following file: