Cache vs Memory

constexpr int STRIDE = 1;

for (auto i = 0u; i < arr.size(); ++i) {
  arr[i] = (i + STRIDE) % arr.size();
}

# Measure below part only
int sum{0};
int p{0};
for (auto i = 0u; i < arr.size(); ++i) {
  sum += arr[p];
  p = arr[p];
}
constexpr int STRIDE = 4096;

for (auto i = 0u; i < arr.size(); ++i) {
  arr[i] = (i + STRIDE) % arr.size();
}

# Measure below part only
int sum{0};
int p{0};
for (auto i = 0u; i < arr.size(); ++i) {
  sum += arr[p];
  p = arr[p];
}

* The benchmark is run under Apple Macbook Air M2.

* For the full benchmark code, please refer here.