thread_group_tensor_slice_transfer_direct_load.hpp Source File#
thread_group_tensor_slice_transfer_direct_load.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__host__ __device__ constexpr bool coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:560
__host__ __device__ constexpr auto generate_sequence_v2(F &&f, Number< N >)
Definition sequence_helper.hpp:25
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
__device__ void Run(const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &dst_desc, DstBuffer &dst_buf)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:225
static constexpr auto thread_single_load_size
Definition thread_group_tensor_slice_transfer_direct_load.hpp:71
static constexpr index_t nDim
Definition thread_group_tensor_slice_transfer_direct_load.hpp:56
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &step)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:315
decltype(make_tensor_coordinate(SrcDesc{}, Index{})) SrcCoord
Definition thread_group_tensor_slice_transfer_direct_load.hpp:59
static constexpr auto thread_cluster_lengths
Definition thread_group_tensor_slice_transfer_direct_load.hpp:69
static __device__ constexpr bool AreThreadClusterLengthsValid()
Definition thread_group_tensor_slice_transfer_direct_load.hpp:78
__device__ void ResetDstSliceWindow(const DstDesc &dst_desc)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:219
decltype(make_tensor_coordinate(DstDesc{}, Index{})) DstCoord
Definition thread_group_tensor_slice_transfer_direct_load.hpp:60
static constexpr auto block_slice_lengths
Definition thread_group_tensor_slice_transfer_direct_load.hpp:68
static constexpr auto thread_slice_lengths
Definition thread_group_tensor_slice_transfer_direct_load.hpp:76
static constexpr auto I0
Definition thread_group_tensor_slice_transfer_direct_load.hpp:65
__device__ constexpr ThreadGroupTensorSliceTransfer_DirectLoad(const SrcDesc &src_desc, const Index &src_block_slice_origin, const DstDesc &dst_desc, const Index &dst_block_slice_origin)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:132
decltype(make_tensor_coordinate_step(DstDesc{}, Index{})) DstCoordStep
Definition thread_group_tensor_slice_transfer_direct_load.hpp:63
__device__ void SetDstSliceOrigin(const DstDesc &dst_desc, const Index &dst_slice_origin_idx)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:213
__device__ auto generate_steps(const DescType &desc, int sign)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:322
MultiIndex< nDim > Index
Definition thread_group_tensor_slice_transfer_direct_load.hpp:57
static constexpr auto thread_steps
Definition thread_group_tensor_slice_transfer_direct_load.hpp:75
decltype(make_tensor_coordinate_step(SrcDesc{}, Index{})) SrcCoordStep
Definition thread_group_tensor_slice_transfer_direct_load.hpp:62
__device__ void SetSrcSliceOrigin(const SrcDesc &src_desc, const Index &src_slice_origin_idx)
Definition thread_group_tensor_slice_transfer_direct_load.hpp:207
static constexpr auto I1
Definition thread_group_tensor_slice_transfer_direct_load.hpp:66
Definition threadwise_tensor_slice_transfer_util.hpp:20
Definition functional2.hpp:33
Definition functional3.hpp:97