Register Balancing

// Unbalanced: stage 1 is the bottleneck
always @(posedge clk) begin
  // Stage 1: multiply + add (slow, 8ns)
  temp <= a * b + c;
  // Stage 2: add only (fast, 2ns)
  result <= temp + d;
end
^ This is Lower Latency?
// Balanced: both stages ~5ns
always @(posedge clk) begin
  // Stage 1: multiply only (5ns)
  temp <= a * b;
  // Stage 2: add + add (5ns)
  result <= temp + c + d;
end
^ This is Lower Latency?

* For illustration purposes only, see FAQ for more details.