device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp Source File

device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp Source File#

Composable Kernel: device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp Source File
device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include <iostream>
7#include <sstream>
8
19
20namespace ck {
21namespace tensor_operation {
22namespace device {
23
24template <typename ALayout,
25 typename BLayout,
26 typename DsLayout,
27 typename CLayout,
28 typename ADataType,
29 typename AScaleDataType,
30 typename BDataType,
31 typename BScaleDataType,
32 typename DsDataType,
33 typename CDataType,
34 typename GemmAccDataType,
35 typename CShuffleDataType,
36 typename AElementwiseOperation,
37 typename BElementwiseOperation,
38 typename CElementwiseOperation,
39 GemmSpecialization GemmSpec,
40 index_t BlockSize,
41 index_t ScaleBlockM,
42 index_t ScaleBlockN,
43 index_t ScaleBlockK,
44 index_t MPerBlock,
45 index_t NPerBlock,
46 index_t KPerBlock,
47 index_t AK1,
48 index_t BK1,
49 index_t MPerXDL,
50 index_t NPerXDL,
51 index_t MXdlPerWave,
52 index_t NXdlPerWave,
53 typename ABlockTransferThreadClusterLengths_AK0_M_AK1,
54 typename ABlockTransferThreadClusterArrangeOrder,
55 typename ABlockTransferSrcAccessOrder,
56 index_t ABlockTransferSrcVectorDim,
57 index_t ABlockTransferSrcScalarPerVector,
58 index_t ABlockTransferDstScalarPerVector_AK1,
59 bool ABlockLdsExtraM,
60 typename BBlockTransferThreadClusterLengths_BK0_N_BK1,
61 typename BBlockTransferThreadClusterArrangeOrder,
62 typename BBlockTransferSrcAccessOrder,
63 index_t BBlockTransferSrcVectorDim,
64 index_t BBlockTransferSrcScalarPerVector,
65 index_t BBlockTransferDstScalarPerVector_BK1,
66 bool BBlockLdsExtraN,
67 index_t CShuffleMXdlPerWavePerShuffle,
68 index_t CShuffleNXdlPerWavePerShuffle,
69 typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock,
70 typename CDEShuffleBlockTransferScalarPerVectors,
73 typename ComputeTypeA = CDataType,
74 typename ComputeTypeB = ComputeTypeA,
75 typename LDSTypeA = ComputeTypeA,
76 typename LDSTypeB = ComputeTypeB>
78 : public DeviceGemmMultipleD_ABScale<ALayout,
79 BLayout,
80 DsLayout,
81 CLayout,
82 ADataType,
83 AScaleDataType,
84 BDataType,
85 BScaleDataType,
86 DsDataType,
87 CDataType,
88 ScaleBlockM,
89 ScaleBlockN,
90 ScaleBlockK,
91 AElementwiseOperation,
92 BElementwiseOperation,
93 CElementwiseOperation>
94{
96 static constexpr auto NXdlPerWave64 = GetNXdlPerWave<true>();
97 static constexpr auto NXdlPerWave32 = GetNXdlPerWave<false>();
98 static constexpr index_t NumDTensor = DsDataType::Size();
99
100 // GridwiseGemm
101 template <index_t NXdlPerWave_>
103 ALayout,
104 BLayout,
105 DsLayout,
106 CLayout,
107 ADataType,
108 BDataType,
109 GemmAccDataType,
110 CShuffleDataType,
111 DsDataType,
112 CDataType,
113 AElementwiseOperation,
114 BElementwiseOperation,
115 CElementwiseOperation,
116 GemmSpec,
117 BlockSize,
118 ScaleBlockM,
119 ScaleBlockN,
120 ScaleBlockK,
121 MPerBlock,
122 NPerBlock,
123 KPerBlock,
124 AK1,
125 BK1,
126 MPerXDL,
127 NPerXDL,
128 MXdlPerWave,
129 NXdlPerWave_,
130 ABlockTransferThreadClusterLengths_AK0_M_AK1,
131 ABlockTransferThreadClusterArrangeOrder,
132 ABlockTransferSrcAccessOrder,
133 ABlockTransferSrcVectorDim,
134 ABlockTransferSrcScalarPerVector,
135 ABlockTransferDstScalarPerVector_AK1,
136 false,
137 ABlockLdsExtraM,
138 BBlockTransferThreadClusterLengths_BK0_N_BK1,
139 BBlockTransferThreadClusterArrangeOrder,
140 BBlockTransferSrcAccessOrder,
141 BBlockTransferSrcVectorDim,
142 BBlockTransferSrcScalarPerVector,
143 BBlockTransferDstScalarPerVector_BK1,
144 false,
145 BBlockLdsExtraN,
146 CShuffleMXdlPerWavePerShuffle,
147 math::min(CShuffleNXdlPerWavePerShuffle, NXdlPerWave_),
148 CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock,
149 CDEShuffleBlockTransferScalarPerVectors,
150 BlkGemmPipeSched,
151 BlkGemmPipelineVer,
152 ComputeTypeA,
153 ComputeTypeB,
154 LDSTypeA,
155 LDSTypeB>;
158
159 using Argument = typename GridwiseGemm64::Argument;
160
161 // Invoker
162 struct Invoker : public BaseInvoker
163 {
164 template <typename GridwiseGemm>
165 float RunImp(const typename GridwiseGemm::Argument& arg,
166 const StreamConfig& stream_config = StreamConfig{})
167 {
168 if(stream_config.log_level_ > 0)
169 {
170 arg.Print();
171 }
172
173 if(!GridwiseGemm::CheckValidity(arg))
174 {
175 throw std::runtime_error("wrong! GridwiseGemm has invalid setting");
176 }
177
178 index_t gdx, gdy, gdz;
179 std::tie(gdx, gdy, gdz) = GridwiseGemm::CalculateGridSize(arg.M, arg.N, arg.KBatch);
180
181 float ave_time = 0;
182
183 index_t k_grain = arg.KBatch * KPerBlock;
184 index_t K_split = (arg.K + k_grain - 1) / k_grain * KPerBlock;
185
186 const bool has_main_k_block_loop = GridwiseGemm::CalculateHasMainKBlockLoop(K_split);
187
188 const auto Run = [&](const auto& kernel) {
189 if(stream_config.flush_cache)
190 {
191 auto arg_ = arg;
192
193 const auto a_grid_desc_ak0_m_ak1 = GridwiseGemm::MakeAGridDescriptor_AK0_M_AK1(
194 arg_.M, arg_.MPadded, arg_.K, arg_.KPadded, arg_.StrideA, arg_.AK0);
195 const auto b_grid_desc_bk0_n_bk1 = GridwiseGemm::MakeBGridDescriptor_BK0_N_BK1(
196 arg_.K, arg_.KPadded, arg_.N, arg_.NPadded, arg_.StrideB, arg_.BK0);
197
198 auto size_a_buffer =
199 a_grid_desc_ak0_m_ak1.GetElementSpaceSize() * sizeof(ADataType);
200 auto size_b_buffer =
201 b_grid_desc_bk0_n_bk1.GetElementSpaceSize() * sizeof(BDataType);
202
204 arg_, stream_config.rotating_count, size_a_buffer, size_b_buffer);
205 rotating_mem.Print();
206
207 auto run_flush_cache = [&]() {
208 // flush icache
210 // rotating mem
211 rotating_mem.Next();
212 // clear c mem
213 if(arg_.KBatch > 1)
214 hipGetErrorString(hipMemsetAsync(arg_.p_c_grid,
215 0,
216 arg_.M * arg_.N * sizeof(CDataType),
217 stream_config.stream_id_));
218 };
219
221 stream_config,
222 run_flush_cache,
223 kernel,
224 dim3(gdx, gdy, gdz),
225 dim3(BlockSize),
226 0,
227 arg_);
228 }
229 else
230 {
231 if(arg.KBatch > 1)
232 hipGetErrorString(hipMemsetAsync(arg.p_c_grid,
233 0,
234 arg.M * arg.N * sizeof(CDataType),
235 stream_config.stream_id_));
236
237 ave_time = launch_and_time_kernel(
238 stream_config, kernel, dim3(gdx, gdy, gdz), dim3(BlockSize), 0, arg);
239 }
240 };
241
242 constexpr index_t minimum_occupancy = [&]() {
245 {
246 // FIXME: many instances have many spills with occupancy > 1, a better solution
247 // needed to get best performance
248 return 1;
249 }
250 else
251 {
252 return (BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave &&
253 MPerBlock * NPerBlock / BlockSize > 64)
254 ? 1
255 : 2;
256 }
257 }();
258
259 if(has_main_k_block_loop)
260 {
261 // Tail number always full
262 if constexpr(BlkGemmPipelineVer == BlockGemmPipelineVersion::v1 ||
263 BlkGemmPipelineVer == BlockGemmPipelineVersion::v3)
264 {
265 {
266 const auto kernel =
267 kernel_gemm_xdl_cshuffle_v3<GridwiseGemm,
268 true,
270 minimum_occupancy>;
271 Run(kernel);
272 }
273 }
274 }
275 else
276 {
277 // Tail number always 1
278 if constexpr(BlkGemmPipelineVer == BlockGemmPipelineVersion::v1)
279 {
280 if(GridwiseGemm::CalculateKBlockLoopTailNum(K_split) == TailNumber::Full)
281 {
282 const auto kernel =
283 kernel_gemm_xdl_cshuffle_v3<GridwiseGemm,
284 false,
286 minimum_occupancy>;
287 Run(kernel);
288 }
289 else if(GridwiseGemm::CalculateKBlockLoopTailNum(K_split) == TailNumber::Odd)
290 {
291 const auto kernel =
292 kernel_gemm_xdl_cshuffle_v3<GridwiseGemm,
293 false,
295 minimum_occupancy,
297 Run(kernel);
298 }
299 }
300 }
301 return ave_time;
302 }
303
305
306 // polymorphic
307 float Run(const BaseArgument* p_arg,
308 const StreamConfig& stream_config = StreamConfig{}) override
309 {
310 return Run(*dynamic_cast<const Argument*>(p_arg), stream_config);
311 }
312 };
313
314 void SetKBatch(BaseArgument* base_arg, int KBatch) const override
315 {
316 auto& arg = *dynamic_cast<Argument*>(base_arg);
317 arg.KBatch = KBatch;
318 }
319
320 static constexpr bool IsValidCompilationParameter()
321 {
322 // TODO: properly implement this check
323 return true;
324 }
325
326 static bool IsSupportedArgument(const Argument& arg)
327 {
329 {
330 return false;
331 }
332
333 // if(ScaleBlockM % MPerBlock != 0 || ScaleBlockN % NPerBlock != 0 || ScaleBlockK !=
334 // KPerBlock)
335 // {
336 // return false;
337 // }
338
339 if((arg.K % AK1 != 0 || arg.K % BK1 != 0) && !(GemmSpec == GemmSpecialization::MKPadding ||
340 GemmSpec == GemmSpecialization::NKPadding ||
341 GemmSpec == GemmSpecialization::MNKPadding ||
342 GemmSpec == GemmSpecialization::KPadding))
343 {
344 return false;
345 }
346
347 if(get_warp_size() == 64)
348 {
349 if constexpr(NXdlPerWave64 > 0)
350 {
352 }
353 }
354 else
355 {
356 if constexpr(NXdlPerWave32 > 0)
357 {
359 reinterpret_cast<const typename GridwiseGemm32::Argument&>(arg));
360 }
361 }
362 return false;
363 }
364
365 // polymorphic
366 bool IsSupportedArgument(const BaseArgument* p_arg) override
367 {
368 return IsSupportedArgument(*dynamic_cast<const Argument*>(p_arg));
369 }
370
371 static auto MakeArgument(const void* p_a,
372 const void* p_b,
373 std::array<const void*, NumDTensor> p_ds,
374 void* p_c,
375 const index_t M,
376 const index_t N,
377 const index_t K,
378 const index_t StrideA,
379 const index_t StrideB,
380 const std::array<index_t, NumDTensor> StrideDs,
381 const index_t StrideC,
382 const void* p_a_scale,
383 const void* p_b_scale,
384 AElementwiseOperation a_element_op,
385 BElementwiseOperation b_element_op,
386 CElementwiseOperation c_element_op)
387 {
388 return Argument{static_cast<const ADataType*>(p_a),
389 static_cast<const BDataType*>(p_b),
390 p_ds,
391 static_cast<CDataType*>(p_c),
392 M,
393 N,
394 K,
395 StrideA,
396 StrideB,
397 StrideDs,
398 StrideC,
399 static_cast<const AScaleDataType*>(p_a_scale),
400 static_cast<const BScaleDataType*>(p_b_scale),
401 1,
402 a_element_op,
403 b_element_op,
404 c_element_op};
405 }
406
407 static auto MakeInvoker() { return Invoker{}; }
408
409 // polymorphic
410 std::unique_ptr<BaseArgument>
411 MakeArgumentPointer(const void* p_a,
412 const void* p_b,
413 std::array<const void*, NumDTensor> p_ds,
414 void* p_c,
415 const index_t M,
416 const index_t N,
417 const index_t K,
418 const index_t StrideA,
419 const index_t StrideB,
420 const std::array<ck::index_t, NumDTensor> StrideDs,
421 const index_t StrideC,
422 const void* p_a_scale,
423 const void* p_b_scale,
424 AElementwiseOperation a_element_op,
425 BElementwiseOperation b_element_op,
426 CElementwiseOperation c_element_op) override
427 {
428 return std::make_unique<Argument>(static_cast<const ADataType*>(p_a),
429 static_cast<const BDataType*>(p_b),
430 p_ds,
431 static_cast<CDataType*>(p_c),
432 M,
433 N,
434 K,
435 StrideA,
436 StrideB,
437 StrideDs,
438 StrideC,
439 static_cast<const AScaleDataType*>(p_a_scale),
440 static_cast<const BScaleDataType*>(p_b_scale),
441 1,
442 a_element_op,
443 b_element_op,
444 c_element_op);
445 }
446
447 // polymorphic
448 std::unique_ptr<BaseInvoker> MakeInvokerPointer() override
449 {
450 return std::make_unique<Invoker>(Invoker{});
451 }
452
453 // polymorphic
454 std::string GetTypeString() const override
455 {
456 auto str = std::stringstream();
457
458 std::map<BlockGemmPipelineScheduler, std::string> BlkGemmPipelineSchedulerToString{
461
462 std::map<BlockGemmPipelineVersion, std::string> BlkGemmPipelineVersionToString{
466
467 // clang-format off
468 str << "DeviceGemmXdlUniversal"
469 << "<"
470 << getGemmSpecializationString(GemmSpec) << ", "
471 << std::string(ALayout::name)[0]
472 << std::string(BLayout::name)[0]
473 << std::string(CLayout::name)[0]
474 << ">"
475 << " BlkSize: "
476 << BlockSize << ", "
477 << "BlkTile: "
478 << MPerBlock<<"x"<<NPerBlock<<"x"<<KPerBlock << ", "
479 << "WaveTile: "
480 << MPerXDL<<"x"<<NPerXDL << ", "
481 << "WaveMap: "
482 << MXdlPerWave<<"x" << NXdlPerWave<<", "
483 << "VmemReadVec: "
484 << ABlockTransferSrcScalarPerVector<<"x"<<BBlockTransferSrcScalarPerVector<<", "
485 << "BlkGemmPipelineScheduler: "
486 << BlkGemmPipelineSchedulerToString[BlkGemmPipeSched] << ", "
487 << "BlkGemmPipelineVersion: "
488 << BlkGemmPipelineVersionToString[BlkGemmPipelineVer] << ", "
489 << "BlkGemmPipelinePrefetchStages: "
490 << GridwiseGemm64::BlockwiseGemmPipe::PrefetchStages;
491 // clang-format on
492
493 return str.str();
494 }
495};
496
497} // namespace device
498} // namespace tensor_operation
499} // namespace ck
#define INVOKER_RUN3_IMPL
Definition device_base.hpp:114
#define GET_NXDL_PER_WAVE_IMPL
Definition device_base.hpp:81
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
__host__ __device__ constexpr T max(T x)
Definition utility/math.hpp:84
__host__ __device__ constexpr T min(T x)
Definition utility/math.hpp:116
Definition convolution_backward_data_specialization.hpp:8
std::string getGemmSpecializationString(const GemmSpecialization &s)
Definition gemm_specialization.hpp:32
GemmSpecialization
Definition gemm_specialization.hpp:11
@ MKPadding
Definition gemm_specialization.hpp:18
@ KPadding
Definition gemm_specialization.hpp:16
@ MNKPadding
Definition gemm_specialization.hpp:20
@ NKPadding
Definition gemm_specialization.hpp:19
Definition convolution_backward_data_specialization.hpp:7
void flush_icache()
Definition flush_cache.hpp:383
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, GemmArgs &gemm_args, Args... args)
Definition flush_cache.hpp:398
Definition ck.hpp:268
int32_t index_t
Definition ck.hpp:299
@ Set
Definition ck.hpp:278
BlockGemmPipelineVersion
Definition blkgemmpipe_scheduler.hpp:12
@ v2
Definition blkgemmpipe_scheduler.hpp:15
@ v3
Definition blkgemmpipe_scheduler.hpp:16
@ v1
Definition blkgemmpipe_scheduler.hpp:14
@ Odd
Definition blkgemmpipe_scheduler.hpp:33
@ Full
Definition blkgemmpipe_scheduler.hpp:49
bool is_xdl_wmma_supported()
Definition host_utility/device_prop.hpp:76
__device__ constexpr index_t get_warp_size()
Definition get_id.hpp:10
constexpr bool is_same_v
Definition type.hpp:283
BlockGemmPipelineScheduler
Definition blkgemmpipe_scheduler.hpp:25
@ Intrawave
Definition blkgemmpipe_scheduler.hpp:26
@ Interwave
Definition blkgemmpipe_scheduler.hpp:27
__global__ void kernel_gemm_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg)
Definition gridwise_gemm_xdl_cshuffle_streamk_v3.hpp:38
Definition ck/stream_config.hpp:10
Definition gridwise_gemm_xdl_cshuffle_v3_multi_d_ab_scale.hpp:118
Definition device_base.hpp:197
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:163
float RunImp(const typename GridwiseGemm::Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:165
INVOKER_RUN3_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:307
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:94
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:448
static constexpr auto NXdlPerWave32
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:97
static auto MakeArgument(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_c, const index_t M, const index_t N, const index_t K, const index_t StrideA, const index_t StrideB, const std::array< index_t, NumDTensor > StrideDs, const index_t StrideC, const void *p_a_scale, const void *p_b_scale, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:371
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:96
static bool IsSupportedArgument(const Argument &arg)
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:326
static constexpr index_t NumDTensor
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:98
GridwiseGemmBase< math::max(NXdlPerWave32, 1)> GridwiseGemm32
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:157
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_c, const index_t M, const index_t N, const index_t K, const index_t StrideA, const index_t StrideB, const std::array< ck::index_t, NumDTensor > StrideDs, const index_t StrideC, const void *p_a_scale, const void *p_b_scale, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:411
GridwiseGemmMultiD_ABScale_xdl_cshuffle_v3< ALayout, BLayout, DsLayout, CLayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, DsDataType, CDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockM, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, math::min(CShuffleNXdlPerWavePerShuffle, NXdlPerWave_), CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, LDSTypeA, LDSTypeB > GridwiseGemmBase
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:102
std::string GetTypeString() const override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:454
static constexpr bool IsValidCompilationParameter()
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:320
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:156
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:366
static auto MakeInvoker()
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:407
void SetKBatch(BaseArgument *base_arg, int KBatch) const override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:314
typename GridwiseGemm64::Argument Argument
Definition device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp:159
Definition device_gemm_multiple_d_ab_scale.hpp:39
Definition flush_cache.hpp:299