ComputeBasePtrOfStridedBatch Struct Reference#
ck::tensor_operation::device::DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle< NumDimG, NumDimM, NumDimN, NumDimK, NumDimO, ADataType, BDataType, B1DataType, CDataType, Acc0BiasDataType, Acc1BiasDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, ASpec, BSpec, B1Spec, CSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpec, LoopSched >::ComputeBasePtrOfStridedBatch Struct Reference
#include <device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp>
Public Member Functions | |
| ComputeBasePtrOfStridedBatch (const AGridDesc_G_M_K &a_grid_desc_g_m_k, const BGridDesc_G_N_K &b_grid_desc_g_n_k, const B1GridDesc_G_N_K &b1_grid_desc_g_n_k, const CGridDesc_G_M_N &c_grid_desc_g_m_n) | |
| __host__ __device__ constexpr long_index_t | GetABasePtr (index_t g_idx) const |
| __host__ __device__ constexpr long_index_t | GetBBasePtr (index_t g_idx) const |
| __host__ __device__ constexpr long_index_t | GetB1BasePtr (index_t g_idx) const |
| __host__ __device__ constexpr long_index_t | GetCBasePtr (index_t g_idx) const |
Constructor & Destructor Documentation
◆ ComputeBasePtrOfStridedBatch()
template<index_t NumDimG, index_t NumDimM, index_t NumDimN, index_t NumDimK, index_t NumDimO, typename ADataType, typename BDataType, typename B1DataType, typename CDataType, typename Acc0BiasDataType, typename Acc1BiasDataType, typename GemmAccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization BSpec, TensorSpecialization B1Spec, TensorSpecialization CSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1, index_t BK1, index_t B1K1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, LoopScheduler LoopSched = LoopScheduler::Default>
|
inline |
Member Function Documentation
◆ GetABasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimN, index_t NumDimK, index_t NumDimO, typename ADataType, typename BDataType, typename B1DataType, typename CDataType, typename Acc0BiasDataType, typename Acc1BiasDataType, typename GemmAccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization BSpec, TensorSpecialization B1Spec, TensorSpecialization CSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1, index_t BK1, index_t B1K1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetB1BasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimN, index_t NumDimK, index_t NumDimO, typename ADataType, typename BDataType, typename B1DataType, typename CDataType, typename Acc0BiasDataType, typename Acc1BiasDataType, typename GemmAccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization BSpec, TensorSpecialization B1Spec, TensorSpecialization CSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1, index_t BK1, index_t B1K1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetBBasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimN, index_t NumDimK, index_t NumDimO, typename ADataType, typename BDataType, typename B1DataType, typename CDataType, typename Acc0BiasDataType, typename Acc1BiasDataType, typename GemmAccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization BSpec, TensorSpecialization B1Spec, TensorSpecialization CSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1, index_t BK1, index_t B1K1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetCBasePtr()
template<index_t NumDimG, index_t NumDimM, index_t NumDimN, index_t NumDimK, index_t NumDimO, typename ADataType, typename BDataType, typename B1DataType, typename CDataType, typename Acc0BiasDataType, typename Acc1BiasDataType, typename GemmAccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, TensorSpecialization ASpec, TensorSpecialization BSpec, TensorSpecialization B1Spec, TensorSpecialization CSpec, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1, index_t BK1, index_t B1K1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, MaskingSpecialization MaskingSpec, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
The documentation for this struct was generated from the following file: