Register Spill

// arr has 1M doubles
constexpr int N = 14;

double sums[N] = {1.0, ...};
for (auto i = 0u; i < arr.size(); i += N) {
  for (int j = 0; j < N; ++j)
    sums[j] = sums[j] * 1.01 + arr[i + j];
}
^ This is Faster?
// arr has 1M doubles
constexpr int N = 16;

double sums[N] = {1.0, ...};
for (auto i = 0u; i < arr.size(); i += N) {
  for (int j = 0; j < N; ++j)
    sums[j] = sums[j] * 1.01 + arr[i + j];
}
^ This is Faster?

* The benchmark is run under AMD Ryzen 9.

* For the full benchmark code, please refer here.

* For illustration purposes only, see FAQ for more details.