arch.hpp File Reference

arch.hpp File Reference#

Composable Kernel: arch.hpp File Reference
arch.hpp File Reference

Go to the source code of this file.

Classes

struct  ck_tile::safe_underlying_type< T, true >
struct  ck_tile::safe_underlying_type< T, false >
struct  ck_tile::WaitcntLayoutGfx12
struct  ck_tile::WaitcntLayoutGfx11
struct  ck_tile::WaitcntLayoutLegacy
struct  ck_tile::waitcnt_arg
struct  ck_tile::gfx9_t
struct  ck_tile::gfx950_t
struct  ck_tile::gfx103_t
struct  ck_tile::gfx11_t
struct  ck_tile::gfx12_t
struct  ck_tile::gfx_invalid_t

Namespaces

namespace  ck_tile
namespace  ck_tile::detail

Macros

#define CK_TILE_S_CNT_MAX   0b1100'1111'0111'1111
#define CK_TILE_VMCNT(cnt)
#define CK_TILE_EXPCNT(cnt)
#define CK_TILE_LGKMCNT(cnt)
#define CK_CONSTANT_ADDRESS_SPACE

Typedefs

template<typename T>
using ck_tile::safe_underlying_type_t = typename safe_underlying_type<T, std::is_enum<T>::value>::type
using ck_tile::Waitcnt = WaitcntLayoutLegacy

Enumerations

enum struct  ck_tile::address_space_enum : std::uint16_t {
  ck_tile::generic = 0 ,
  ck_tile::global ,
  ck_tile::lds ,
  ck_tile::sgpr ,
  ck_tile::constant ,
  ck_tile::vgpr
}
enum struct  ck_tile::memory_operation_enum : std::uint16_t {
  ck_tile::set = 0 ,
  ck_tile::atomic_add ,
  ck_tile::atomic_max ,
  ck_tile::add
}
enum  ck_tile::LLVMSchedGroupMask : int32_t {
  ck_tile::NONE = 0 ,
  ck_tile::ALU = 1 << 0 ,
  ck_tile::VALU = 1 << 1 ,
  ck_tile::SALU = 1 << 2 ,
  ck_tile::MFMA = 1 << 3 ,
  ck_tile::VMEM = 1 << 4 ,
  ck_tile::VMEM_READ = 1 << 5 ,
  ck_tile::VMEM_WRITE = 1 << 6 ,
  ck_tile::DS = 1 << 7 ,
  ck_tile::DS_READ = 1 << 8 ,
  ck_tile::DS_WRITE = 1 << 9 ,
  ck_tile::ALL = (DS_WRITE << 1) - 1
}

Functions

CK_TILE_HOST_DEVICE constexpr index_t ck_tile::get_warp_size ()
CK_TILE_HOST bool ck_tile::is_wave32 ()
CK_TILE_DEVICE index_t ck_tile::get_grid_size ()
CK_TILE_DEVICE index_t ck_tile::get_block_size ()
CK_TILE_DEVICE index_t ck_tile::get_thread_local_1d_id ()
CK_TILE_DEVICE index_t ck_tile::get_thread_global_1d_id ()
CK_TILE_DEVICE index_t ck_tile::get_block_1d_id ()
CK_TILE_DEVICE index_t ck_tile::get_lane_id ()
template<bool ReturnSgpr = true>
CK_TILE_DEVICE index_t ck_tile::get_warp_id (bool_constant< ReturnSgpr >={})
CK_TILE_DEVICE index_t ck_tile::get_thread_id ()
CK_TILE_DEVICE index_t ck_tile::get_block_id ()
CK_TILE_DEVICE void ck_tile::block_sync_load_raw (index_t cnt=0)
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void ck_tile::s_waitcnt ()
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void ck_tile::s_waitcnt_barrier ()
template<index_t lgkmcnt = 0>
CK_TILE_DEVICE void ck_tile::block_sync_lds ()
template<index_t vmcnt = 0>
CK_TILE_DEVICE void ck_tile::block_sync_lds_direct_load ()
CK_TILE_DEVICE void ck_tile::s_nop (index_t cnt=0)
template<typename T>
__device__ T * ck_tile::cast_pointer_to_generic_address_space (T CK_CONSTANT_ADDRESS_SPACE *p)
template<typename T>
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACEck_tile::cast_pointer_to_constant_address_space (T *p)
CK_TILE_HOST_DEVICE constexpr index_t ck_tile::get_smem_capacity ()
CK_TILE_HOST_DEVICE constexpr const char * ck_tile::address_space_to_string (address_space_enum addr_space)
 Helper function to convert address space enum to string.

Macro Definition Documentation

◆ CK_CONSTANT_ADDRESS_SPACE

#define CK_CONSTANT_ADDRESS_SPACE
Value:
__attribute__((address_space( \
static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))

◆ CK_TILE_EXPCNT

#define CK_TILE_EXPCNT ( cnt)
Value:
([]() { static_assert(!((cnt) >> 3), "EXP only has 3 bits"); }(), ((cnt) << 4))

◆ CK_TILE_LGKMCNT

#define CK_TILE_LGKMCNT ( cnt)
Value:
([]() { static_assert(!((cnt) >> 4), "LGKM only has 4 bits"); }(), ((cnt) << 8))

◆ CK_TILE_S_CNT_MAX

#define CK_TILE_S_CNT_MAX   0b1100'1111'0111'1111

◆ CK_TILE_VMCNT

#define CK_TILE_VMCNT ( cnt)
Value:
([]() { static_assert(!((cnt) >> 6), "VMCNT only has 6 bits"); }(), \
((cnt) & 0b1111) | (((cnt) & 0b110000) << 10))