False Sharing

struct Foo {
  int x;
  int y;
  int z;
};
std::vector<Foo> arr(100'000);

# Run in 2 threads
for (int i = thread_idx; i < arr.size(); i += 2) {
  arr[i].x = i;
  arr[i].y = arr.size() - i;
  arr[i].z = arr.size() + i;
}
struct alignas(64) Foo {
  int x;
  int y;
  int z;
};
std::vector<Foo> arr(100'000);

# Run in 2 threads
for (int i = thread_idx; i < arr.size(); i += 2) {
  arr[i].x = i;
  arr[i].y = arr.size() - i;
  arr[i].z = arr.size() + i;
}

* The benchmark is run under Apple Macbook Air M2.

* For the full benchmark code, please refer here.