BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN > Struct Template Reference

BlockwiseSoftmax&lt; BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN > Struct Template Reference
ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN > Struct Template Reference

Blockwise softmax. More...

#include <blockwise_softmax.hpp>

Public Types

using ThreadSliceDesc_M
using ThreadwiseMaxReduce
using ThreadwiseSumReduce
using ThreadClusterLengths_M_K = decltype(ThreadClusterDesc_M_K{}.GetLengths())
using BlockwiseMaxReduce
using BlockwiseSumReduce
using BufferType = StaticBuffer<AddressSpaceEnum::Vgpr, AccDataType, MRepeat, true>

Public Member Functions

template<typename CThreadBuffer, typename WorkspaceBuffer>
__host__ __device__ void Run (CThreadBuffer &in_thread_buf, WorkspaceBuffer &reduce_work_buf)

Public Attributes

BufferType max_value_buf
BufferType sum_value_buf

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr index_t MRepeat = ThreadSliceDesc_M_K{}.GetLength(I0)
static constexpr index_t KRepeat = ThreadSliceDesc_M_K{}.GetLength(I1)

Detailed Description

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
struct ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >

Blockwise softmax.

Template Parameters
BlockSizeBlock size
AccDataTypeAccumulator data type
ThreadMap_M_KThread id to m_k
ThreadClusterDesc_M_KThreadwise cluster descriptor
ThreadSliceDesc_M_KThreadwise slices descriptor
IgnoreNaNFlag to ignore NaN, false by default

Member Typedef Documentation

◆ BlockwiseMaxReduce

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BlockwiseMaxReduce
Initial value:
BlockSize,
ThreadMap_M_K,
false>
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
Definition reduction_functions_blockwise.hpp:101
Definition reduction_operator.hpp:163

◆ BlockwiseSumReduce

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BlockwiseSumReduce
Initial value:
BlockSize,
ThreadMap_M_K,
false>
Definition reduction_operator.hpp:37

◆ BufferType

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BufferType = StaticBuffer<AddressSpaceEnum::Vgpr, AccDataType, MRepeat, true>

◆ ThreadClusterLengths_M_K

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadClusterLengths_M_K = decltype(ThreadClusterDesc_M_K{}.GetLengths())

◆ ThreadSliceDesc_M

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadSliceDesc_M
Initial value:
make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0))))
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I0
Definition blockwise_softmax.hpp:33

◆ ThreadwiseMaxReduce

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadwiseMaxReduce
Initial value:
typename conditional<
IgnoreNaN,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false>>::type
decltype(make_naive_tensor_descriptor_packed( make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0)))) ThreadSliceDesc_M
Definition blockwise_softmax.hpp:38
Definition reduction_functions_threadwise.hpp:23
Definition utility/functional.hpp:100
Definition reduction_functions_accumulate.hpp:17

◆ ThreadwiseSumReduce

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadwiseSumReduce
Initial value:
typename conditional<
IgnoreNaN,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false>>::type

Member Function Documentation

◆ Run()

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
template<typename CThreadBuffer, typename WorkspaceBuffer>
__host__ __device__ void ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::Run ( CThreadBuffer & in_thread_buf,
WorkspaceBuffer & reduce_work_buf )
inline

Member Data Documentation

◆ I0

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
auto ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
auto ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::I1 = Number<1>{}
staticconstexpr

◆ KRepeat

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
index_t ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::KRepeat = ThreadSliceDesc_M_K{}.GetLength(I1)
staticconstexpr

◆ max_value_buf

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
BufferType ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::max_value_buf

◆ MRepeat

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
index_t ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::MRepeat = ThreadSliceDesc_M_K{}.GetLength(I0)
staticconstexpr

◆ sum_value_buf

template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
BufferType ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::sum_value_buf

The documentation for this struct was generated from the following file: