Loop Reordering

constexpr int ROWS = 1000, COLS = 1000;
std::vector<std::vector<int>> matrix(
    ROWS, std::vector<int>(COLS, 1));

for (int i = 0; i < ROWS; ++i)
  for (int j = 0; j < COLS; ++j)
    sum += matrix[i][j];
^ This is Faster?
constexpr int ROWS = 1000, COLS = 1000;
std::vector<std::vector<int>> matrix(
    ROWS, std::vector<int>(COLS, 1));

for (int j = 0; j < COLS; ++j)
  for (int i = 0; i < ROWS; ++i)
    sum += matrix[i][j];
^ This is Faster?

* The benchmark is run under AMD Ryzen 9.

* For the full benchmark code, please refer here.

* For illustration purposes only, see FAQ for more details.