wp_pipeline_agmem_bgmem_creg_v2.hpp Source File#
wp_pipeline_agmem_bgmem_creg_v2.hpp
Go to the documentation of this file.
51template <typename Problem, typename PipelinePolicy = UniversalWeightPreshufflePipelineAgBgCrPolicy>
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_DEVICE void load_int4_tile(WarpTile &dst, const WarpWindow &src)
Definition load_interleaved_pk_type.hpp:46
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition type_traits.hpp:67
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
ck_tile::element_wise::PassThrough PassThrough
Definition grouped_convolution_utils.hpp:47
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:15
static constexpr bool UsePersistentKernel
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:19
static constexpr index_t PrefillStages
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:17
static constexpr index_t PrefetchStages
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:16
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool, TailNumber tail_number)
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:35
static CK_TILE_HOST_DEVICE constexpr auto TransposeC()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:21
static CK_TILE_HOST_DEVICE constexpr bool BlockHasHotloop(index_t num_loop)
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:23
static constexpr index_t GlobalBufferNum
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:18
static CK_TILE_HOST_DEVICE constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:28
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:54
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:130
static constexpr index_t mfma_per_wg
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:156
static constexpr bool DoubleSmemBuffer
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:191
static constexpr index_t GetVectorSizeB()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:107
static constexpr index_t Aload_rep
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:170
static constexpr index_t DsReadPreload
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:84
static constexpr index_t GetVectorSizeA()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:102
static constexpr index_t dswrite_kIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:177
remove_cvref_t< typename Problem::AsDataTypeTuple > AsDataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:57
static CK_TILE_HOST const std::string GetName()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:179
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:1030
remove_cvref_t< typename Problem::BsDataTypeTuple > BsDataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:58
remove_cvref_t< std::tuple_element_t< 0, AsLayout > > ALayout
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:69
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:295
static constexpr index_t DsWritePreIssue
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:83
static constexpr index_t NWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:135
static constexpr bool kPadN
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:118
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, TailNumber tail_number, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:1074
static constexpr index_t MWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:134
static constexpr bool kPadK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:119
static constexpr auto idxK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:129
static constexpr index_t MIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:137
static constexpr index_t m_preload
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:148
remove_cvref_t< std::tuple_element_t< 0, AsDataType > > ADataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:72
static constexpr index_t NIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:138
static constexpr bool kPadM
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:117
BaseWeightPreshufflePipelineAGmemBGmemCRegV2< Problem > Base
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:55
static constexpr index_t dswrite_num_perK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:167
static constexpr index_t Bload_num_perK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:171
remove_cvref_t< std::tuple_element_t< 0, BsLayout > > BLayout
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:70
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:81
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:131
static constexpr index_t mfma_perM_perK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:175
static constexpr index_t GetVectorSizeC()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:112
remove_cvref_t< typename Problem::BElementWise > BElementWise
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:62
static constexpr index_t KPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:96
static constexpr index_t KIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:139
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:197
remove_cvref_t< decltype(PipelinePolicy::template GetBlockWeightPreshuffle< Problem >())> BlockWeightPreshuffle
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:75
remove_cvref_t< typename Problem::CDataType > CDataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:59
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM(index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:206
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:520
static constexpr auto idxM
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:127
static constexpr index_t MPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:94
static constexpr index_t KFlatPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:141
static constexpr index_t flatKPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:98
static constexpr auto I0
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:124
static constexpr index_t NPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:95
static constexpr index_t NumWaveGroups
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:122
static constexpr auto config
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:78
static constexpr index_t kNPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:90
static constexpr index_t dswrite_rep
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:168
static constexpr index_t flatNPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:99
static constexpr index_t Aload_num_perK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:169
static constexpr index_t BlockSize
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:86
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:490
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:132
static constexpr auto I2
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:126
remove_cvref_t< std::tuple_element_t< 0, BsDataType > > BDataType
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:73
static constexpr index_t KPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:145
remove_cvref_t< typename Problem::AElementWise > AElementWise
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:61
static constexpr index_t NFlatPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:142
static constexpr index_t Bload_rep
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:173
static constexpr index_t kKPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:91
static constexpr index_t K1
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:147
static constexpr index_t MPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:144
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:63
remove_cvref_t< typename Problem::CLayout > CLayout
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:67
static constexpr index_t kMPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:89
static constexpr auto TailNum
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:151
remove_cvref_t< typename Problem::AsLayoutTuple > AsLayout
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:65
static constexpr index_t Preshuffle
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:192
static constexpr index_t dsread_num_perK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:165
static constexpr index_t kLdsAlignmentInBytes
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:121
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:1053
static constexpr index_t WaveSize
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:87
static constexpr index_t dswrite_mIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:176
static constexpr auto idxN
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:128
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:436
static constexpr index_t HalfMIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:172
static constexpr index_t dsread_per_wg
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:158
remove_cvref_t< typename Problem::BsLayoutTuple > BsLayout
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:66
static constexpr auto I1
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:125
static CK_TILE_HOST_DEVICE constexpr auto TransposeC()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:195
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43