inner_product_dpp8.hpp File Reference

inner_product_dpp8.hpp File Reference#

Composable Kernel: inner_product_dpp8.hpp File Reference
inner_product_dpp8.hpp File Reference
#include "amd_gemm_dpp.hpp"
#include "data_type.hpp"
#include "type_convert.hpp"

Go to the source code of this file.

Namespaces

namespace  ck
namespace  ck::dpp8

Functions

template<int SrcLaneIdx>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 0 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 1 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 2 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 3 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 4 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 5 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 6 > (const half2_t &a, const half2_t &b, float &c)
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 7 > (const half2_t &a, const half2_t &b, float &c)
template<int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::inline_v_dot2c_dpp8 (const half2_t &a, const half2_t &b, float &c)
template<int SrcLaneIdx>
constexpr int ck::dpp8::get_dpp_sel_mask_broadcast ()
template<int SrcLaneIdx>
__device__ void ck::dpp8::intrinsic_fdot2_impl (const half2_t &a, const half2_t &b, float &c)
template<int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::intrinsic_fdot2 (const half2_t &a, const half2_t &b, float &c)
template<typename TA, typename TB, typename TC, int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::inner_product_dpp (const TA &a, const TB &b, TC &c)

Variables

constexpr index_t ck::dpp8::lane_group_size = 8
 Number of lanes that can share data using DPP8 modifiers.
constexpr std::array< int, dpp8::lane_group_sizeck::dpp8::IntrinsicMaskDpp8