Prefetch
constexpr int STRIDE = 4096;
for (auto i = 0u; i < arr.size(); ++i) {
arr[i] = (i + STRIDE) % arr.size();
}
# Measure below part only
int sum{0};
int p{0};
for (auto i = 0u; i < arr.size(); ++i) {
__builtin_prefetch(&arr[(p + 1 * STRIDE) % arr.size()], 0, 0);
__builtin_prefetch(&arr[(p + 2 * STRIDE) % arr.size()], 0, 0);
sum += arr[p];
p = arr[p];
}
constexpr int STRIDE = 4096;
for (auto i = 0u; i < arr.size(); ++i) {
arr[i] = (i + STRIDE) % arr.size();
}
# Measure below part only
int sum{0};
int p{0};
for (auto i = 0u; i < arr.size(); ++i) {
sum += arr[p];
p = arr[p];
}