ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch > Struct Template Reference

ThreadwiseTensorSliceTransfer_v7r3_scatter&lt; SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch &gt; Struct Template Reference#

Composable Kernel: ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch > Struct Template Reference
ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch > Struct Template Reference

#include <threadwise_tensor_slice_transfer_v7r3_scatter.hpp>

Public Types

using Index = MultiIndex<nDim>
using SrcCoords = decltype(MakeCoordinates(SrcDescs{}, StaticallyIndexedArray<Index, nSrc>{}))
using DstCoords = decltype(MakeCoordinates(DstDescs{}, StaticallyIndexedArray<Index, nDst>{}))
using SrcSpaceFillingCurve
using DstSpaceFillingCurve

Public Member Functions

__device__ constexpr ThreadwiseTensorSliceTransfer_v7r3_scatter (const SrcDescs &src_descs, const StaticallyIndexedArray< Index, nSrc > &src_slice_origins, const DstDescs &dst_descs, const StaticallyIndexedArray< Index, nDst > &dst_slice_origins, const ElementwiseOperation &element_op)
template<typename Indices, enable_if_t< SrcDescs::Size()==Indices::Size(), bool > = false>
__device__ void SetSrcSliceOrigins (const SrcDescs &src_descs, const Indices &src_slice_origin_idxs)
template<typename Indices, enable_if_t< DstDescs::Size()==Indices::Size(), bool > = false>
__device__ void SetDstSliceOrigins (const DstDescs &dst_descs, const Indices &dst_slice_origin_idxs)
template<typename SrcBuffers, index_t ThreadScratchId = 0, enable_if_t< SrcDescs::Size()==SrcBuffers::Size(), bool > = false>
__device__ void RunRead (const SrcDescs &src_descs, const SrcBuffers &src_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<index_t ThreadScratchId = 0>
__device__ void OOBCheck (Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<index_t ThreadScratchId = 0>
__device__ void TransposeFromElmToDst (Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<typename DstBuffers, index_t ThreadScratchId = 0, enable_if_t< DstDescs::Size()==1 &&DstBuffers::Size()==1, bool > = false>
__device__ void RunWrite (const DstDescs &dst_descs, DstBuffers dst_bufs, StaticallyIndexedArray< IndexType, scatter_num > &scatter_offsets, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<typename SrcBuffers, typename DstBuffers, enable_if_t< SrcDescs::Size()==SrcBuffers::Size() &&DstDescs::Size()==DstBuffers::Size(), bool > = false>
__device__ void Run (const SrcDescs &src_descs, const SrcBuffers &src_bufs, const DstDescs &dst_descs, DstBuffers dst_bufs, StaticallyIndexedArray< IndexType, scatter_num > &scatter_offsets)
template<index_t ISrc>
__device__ void MoveSrcSliceWindow (const SrcDescs &src_descs, Number< ISrc > iSrc, const Index &src_slice_origin_step_idx)
template<index_t IDst>
__device__ void MoveDstSliceWindow (const DstDescs &dst_descs, Number< IDst > iDst, const Index &dst_slice_origin_step_idx)

Static Public Member Functions

template<typename Descs, typename Indices, enable_if_t< Descs::Size()==Indices::Size(), bool > = false>
static constexpr auto MakeCoordinates (const Descs &descs, const Indices &indices)
template<typename DataTypes, index_t ScalarPerVector>
static __device__ auto generate_vectors ()
static __device__ constexpr auto GetSrcCoordinateResetStep ()
static __device__ constexpr auto GetDstCoordinateResetStep ()
static __device__ constexpr auto GetSrcThreadScratchDescriptor ()
static __device__ constexpr auto GetDstThreadScratchDescriptor ()

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto SrcScalarPerVector = SrcScalarPerVectors{}[I0]
static constexpr index_t nDim = SliceLengths::Size()
static constexpr index_t nSrc = SrcDescs::Size()
static constexpr index_t nDst = DstDescs::Size()
static constexpr index_t scatter_num = SliceLengths{}.At(Number<ScatterDim>{})
static constexpr auto src_scalar_per_access
static constexpr auto dst_scalar_per_access

Member Typedef Documentation

◆ DstCoords

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::DstCoords = decltype(MakeCoordinates(DstDescs{}, StaticallyIndexedArray<Index, nDst>{}))

◆ DstSpaceFillingCurve

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::DstSpaceFillingCurve
Initial value:
SpaceFillingCurve<SliceLengths,
DstDimAccessOrder,
false>
typename remove_cv< T >::type remove_cv_t
Definition type.hpp:295
Definition tensor_space_filling_curve.hpp:20
static constexpr auto dst_scalar_per_access
Definition threadwise_tensor_slice_transfer_v7r2.hpp:76

◆ Index

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::Index = MultiIndex<nDim>

◆ SrcCoords

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::SrcCoords = decltype(MakeCoordinates(SrcDescs{}, StaticallyIndexedArray<Index, nSrc>{}))

◆ SrcSpaceFillingCurve

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::SrcSpaceFillingCurve
Initial value:
SpaceFillingCurve<SliceLengths,
SrcDimAccessOrder,
false>
static constexpr auto src_scalar_per_access
Definition threadwise_tensor_slice_transfer_v7r2.hpp:73

Constructor & Destructor Documentation

◆ ThreadwiseTensorSliceTransfer_v7r3_scatter()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
__device__ constexpr ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::ThreadwiseTensorSliceTransfer_v7r3_scatter ( const SrcDescs & src_descs,
const StaticallyIndexedArray< Index, nSrc > & src_slice_origins,
const DstDescs & dst_descs,
const StaticallyIndexedArray< Index, nDst > & dst_slice_origins,
const ElementwiseOperation & element_op )
inlineconstexpr

Member Function Documentation

◆ generate_vectors()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename DataTypes, index_t ScalarPerVector>
__device__ auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::generate_vectors ( )
inlinestatic

◆ GetDstCoordinateResetStep()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::GetDstCoordinateResetStep ( )
inlinestaticconstexpr

◆ GetDstThreadScratchDescriptor()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::GetDstThreadScratchDescriptor ( )
inlinestaticconstexpr

◆ GetSrcCoordinateResetStep()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::GetSrcCoordinateResetStep ( )
inlinestaticconstexpr

◆ GetSrcThreadScratchDescriptor()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::GetSrcThreadScratchDescriptor ( )
inlinestaticconstexpr

◆ MakeCoordinates()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename Descs, typename Indices, enable_if_t< Descs::Size()==Indices::Size(), bool > = false>
constexpr auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::MakeCoordinates ( const Descs & descs,
const Indices & indices )
inlinestaticconstexpr

◆ MoveDstSliceWindow()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<index_t IDst>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::MoveDstSliceWindow ( const DstDescs & dst_descs,
Number< IDst > iDst,
const Index & dst_slice_origin_step_idx )
inline

◆ MoveSrcSliceWindow()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<index_t ISrc>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::MoveSrcSliceWindow ( const SrcDescs & src_descs,
Number< ISrc > iSrc,
const Index & src_slice_origin_step_idx )
inline

◆ OOBCheck()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<index_t ThreadScratchId = 0>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::OOBCheck ( Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{})
inline

◆ Run()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename SrcBuffers, typename DstBuffers, enable_if_t< SrcDescs::Size()==SrcBuffers::Size() &&DstDescs::Size()==DstBuffers::Size(), bool > = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::Run ( const SrcDescs & src_descs,
const SrcBuffers & src_bufs,
const DstDescs & dst_descs,
DstBuffers dst_bufs,
StaticallyIndexedArray< IndexType, scatter_num > & scatter_offsets )
inline

◆ RunRead()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename SrcBuffers, index_t ThreadScratchId = 0, enable_if_t< SrcDescs::Size()==SrcBuffers::Size(), bool > = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::RunRead ( const SrcDescs & src_descs,
const SrcBuffers & src_bufs,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ RunWrite()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename DstBuffers, index_t ThreadScratchId = 0, enable_if_t< DstDescs::Size()==1 &&DstBuffers::Size()==1, bool > = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::RunWrite ( const DstDescs & dst_descs,
DstBuffers dst_bufs,
StaticallyIndexedArray< IndexType, scatter_num > & scatter_offsets,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ SetDstSliceOrigins()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename Indices, enable_if_t< DstDescs::Size()==Indices::Size(), bool > = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::SetDstSliceOrigins ( const DstDescs & dst_descs,
const Indices & dst_slice_origin_idxs )
inline

◆ SetSrcSliceOrigins()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<typename Indices, enable_if_t< SrcDescs::Size()==Indices::Size(), bool > = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::SetSrcSliceOrigins ( const SrcDescs & src_descs,
const Indices & src_slice_origin_idxs )
inline

◆ TransposeFromElmToDst()

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
template<index_t ThreadScratchId = 0>
__device__ void ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::TransposeFromElmToDst ( Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{})
inline

Member Data Documentation

◆ dst_scalar_per_access

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::dst_scalar_per_access
staticconstexpr
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
Definition threadwise_tensor_slice_transfer_util.hpp:20

◆ I0

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::I3 = Number<3>{}
staticconstexpr

◆ nDim

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
index_t ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::nDim = SliceLengths::Size()
staticconstexpr

◆ nDst

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
index_t ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::nDst = DstDescs::Size()
staticconstexpr

◆ nSrc

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
index_t ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::nSrc = SrcDescs::Size()
staticconstexpr

◆ scatter_num

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
index_t ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::scatter_num = SliceLengths{}.At(Number<ScatterDim>{})
staticconstexpr

◆ src_scalar_per_access

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::src_scalar_per_access
staticconstexpr

◆ SrcScalarPerVector

template<typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename SrcResetCoordinateAfterRunFlags, typename DstResetCoordinateAfterRunFlags, typename IndexType, index_t ScatterDim = 1, bool OutputScatter = true, index_t ScatterWeightIdx = 3, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v7r3_scatter< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, SrcResetCoordinateAfterRunFlags, DstResetCoordinateAfterRunFlags, IndexType, ScatterDim, OutputScatter, ScatterWeightIdx, NumThreadScratch >::SrcScalarPerVector = SrcScalarPerVectors{}[I0]
staticconstexpr

The documentation for this struct was generated from the following file: