BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack > Struct Template Reference

BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2&lt; BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack > Struct Template Reference
ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack > Struct Template Reference

#include <blockwise_gemm_dpp.hpp>

Public Types

using ThisThreadBlock = ThisThreadBlock<BlockSize>

Public Member Functions

__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2 ()
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const

Static Public Member Functions

static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex_M0_M1_M2_K ()
static __device__ auto CalculateBThreadOriginDataIndex_N0_N1_N2_K ()
template<index_t m0, index_t n0>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_N2 ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K ()
__host__ static __device__ constexpr auto MakeBBlockDescriptor_N0_N1_N2_K ()

Public Attributes

StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, dpp_gemm.GetRegSizePerDpp(), true > c_thread_buf_

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t MPerBlock = AK0MK1BlockDesc{}.GetLength(I1)
static constexpr index_t NPerBlock = BK0NK1BlockDesc{}.GetLength(I1)
static constexpr index_t KPerBlock
static constexpr index_t MWaves = MPerBlock / (MRepeat * MPerDpp)
static constexpr index_t NWaves = NPerBlock / (NRepeat * NPerDpp)
static constexpr index_t WaveSize = BlockSize / MWaves / NWaves
static constexpr index_t A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
static constexpr index_t B_K0 = BK0NK1BlockDesc{}.GetLength(I0)
static constexpr index_t A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
static constexpr index_t B_K1 = BK0NK1BlockDesc{}.GetLength(I2)
static constexpr auto dpp_gemm = DppGemm<ABDataType, MPerDpp, NPerDpp, KPack>{}
static constexpr index_t KPerThread = KPerBlock / dpp_gemm.K0PerDpp
static constexpr auto a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()
static constexpr auto b_block_desc_n0_n1_n2_k = MakeBBlockDescriptor_N0_N1_N2_K()

Protected Types

using AThreadCopy
using BThreadCopy

Protected Attributes

AThreadCopy a_thread_copy_ {CalculateAThreadOriginDataIndex_M0_M1_M2_K()}
BThreadCopy b_thread_copy_ {CalculateBThreadOriginDataIndex_N0_N1_N2_K()}

Static Protected Attributes

static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Detailed Description

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
struct ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >

Blockwise GEMM that uses DPP instruction modifier to limit the amount of data loaded for each thread by sharing the data between threads in a lanegroup.

In every iteration, each wave calculates a C tile of size MPerDpp * NPerDpp, there are MRepeat iterations for M dimension and NRepeat for N one. In total, the algorithm runs using MPerBlock / (MRepeat * MPerDpp) * NPerBlock / (NRepeat * NPerDpp) waves.

Member Typedef Documentation

◆ AThreadCopy

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::AThreadCopy
protected
Initial value:
ABDataType,
decltype(a_thread_desc_),
3,
static constexpr auto a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_dpp.hpp:254
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260

◆ BThreadCopy

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::BThreadCopy
protected
Initial value:
ABDataType,
decltype(b_thread_desc_),
3,
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr auto b_block_desc_n0_n1_n2_k
Definition blockwise_gemm_dpp.hpp:255

◆ ThisThreadBlock

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::ThisThreadBlock = ThisThreadBlock<BlockSize>

Constructor & Destructor Documentation

◆ BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ __device__ ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2 ( )
inline

Member Function Documentation

◆ CalculateAThreadOriginDataIndex_M0_M1_M2_K()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::CalculateAThreadOriginDataIndex_M0_M1_M2_K ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex_N0_N1_N2_K()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::CalculateBThreadOriginDataIndex_N0_N1_N2_K ( )
inlinestatic

◆ CalculateCThreadOriginDataIndex()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
template<index_t m0, index_t n0>
__device__ auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 >  )
inlinestatic

◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_M0_N0_M1_N1_M2_N2 ( )
inlinestaticconstexpr

◆ GetCThreadBuffer()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ __device__ constexpr auto & ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_M0_N0_M1_N1_M2_N2 ( )
inlinestaticconstexpr

◆ GetWaveIdx()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::GetWaveIdx ( )
inlinestatic

◆ MakeABlockDescriptor_M0_M1_M2_K()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MakeABlockDescriptor_M0_M1_M2_K ( )
inlinestaticconstexpr

◆ MakeBBlockDescriptor_N0_N1_N2_K()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MakeBBlockDescriptor_N0_N1_N2_K ( )
inlinestaticconstexpr

◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_N2 ( const CGridDesc_G_M_N & c_grid_desc_g_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_N2()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_M0_N0_M1_N1_M2_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ Run()

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::Run ( const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_block_buf,
CThreadBuffer & c_thread_buf ) const
inline

Member Data Documentation

◆ a_block_desc_m0_m1_m2_k

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()
staticconstexpr

◆ A_K0

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
staticconstexpr

◆ a_thread_copy_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
AThreadCopy ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::a_thread_copy_ {CalculateAThreadOriginDataIndex_M0_M1_M2_K()}
protected

◆ a_thread_desc_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::a_thread_desc_
staticconstexprprotected
Initial value:
=
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I1
Definition blockwise_gemm_dpp.hpp:35

◆ b_block_desc_n0_n1_n2_k

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::b_block_desc_n0_n1_n2_k = MakeBBlockDescriptor_N0_N1_N2_K()
staticconstexpr

◆ B_K0

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::B_K0 = BK0NK1BlockDesc{}.GetLength(I0)
staticconstexpr

◆ B_K1

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::B_K1 = BK0NK1BlockDesc{}.GetLength(I2)
staticconstexpr

◆ b_thread_copy_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
BThreadCopy ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::b_thread_copy_ {CalculateBThreadOriginDataIndex_N0_N1_N2_K()}
protected

◆ b_thread_desc_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::b_thread_desc_
staticconstexprprotected

◆ c_thread_buf_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, AccDataType, MRepeat * NRepeat, dpp_gemm.GetRegSizePerDpp(), true> ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::c_thread_buf_

◆ c_thread_desc_

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::c_thread_desc_
staticconstexprprotected
Initial value:
static constexpr auto dpp_gemm
Definition blockwise_gemm_dpp.hpp:55

◆ dpp_gemm

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::dpp_gemm = DppGemm<ABDataType, MPerDpp, NPerDpp, KPack>{}
staticconstexpr

◆ I0

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::I3 = Number<3>{}
staticconstexpr

◆ KPerBlock

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::KPerBlock
staticconstexpr
Initial value:
=
BK0NK1BlockDesc{}.GetLength(I0) * BK0NK1BlockDesc{}.GetLength(I2)
static constexpr auto I2
Definition blockwise_gemm_dpp.hpp:36
static constexpr auto I0
Definition blockwise_gemm_dpp.hpp:34

◆ KPerThread

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::KPerThread = KPerBlock / dpp_gemm.K0PerDpp
staticconstexpr

◆ MPerBlock

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MPerBlock = AK0MK1BlockDesc{}.GetLength(I1)
staticconstexpr

◆ MWaves

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::MWaves = MPerBlock / (MRepeat * MPerDpp)
staticconstexpr

◆ NPerBlock

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::NPerBlock = BK0NK1BlockDesc{}.GetLength(I1)
staticconstexpr

◆ NWaves

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::NWaves = NPerBlock / (NRepeat * NPerDpp)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename ABDataType, typename AccDataType, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerDpp, index_t NPerDpp, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmDpp_ak0mak1_bk0nbk1_m0n0m1n1m2n2< BlockSize, ABDataType, AccDataType, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerDpp, NPerDpp, MRepeat, NRepeat, KPack >::WaveSize = BlockSize / MWaves / NWaves
staticconstexpr

The documentation for this struct was generated from the following file: